vfs_trans.c revision 1.64
1/* $NetBSD: vfs_trans.c,v 1.64 2022/06/28 00:13:48 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2007, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.64 2022/06/28 00:13:48 riastradh Exp $"); 34 35/* 36 * File system transaction operations. 37 */ 38 39#ifdef _KERNEL_OPT 40#include "opt_ddb.h" 41#endif 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/atomic.h> 46#include <sys/buf.h> 47#include <sys/kmem.h> 48#include <sys/mount.h> 49#include <sys/pserialize.h> 50#include <sys/vnode.h> 51#include <sys/fstrans.h> 52#include <sys/proc.h> 53#include <sys/pool.h> 54 55#include <miscfs/specfs/specdev.h> 56 57enum fstrans_lock_type { 58 FSTRANS_LAZY, /* Granted while not suspended */ 59 FSTRANS_SHARED /* Granted while not suspending */ 60}; 61 62struct fscow_handler { 63 LIST_ENTRY(fscow_handler) ch_list; 64 int (*ch_func)(void *, struct buf *, bool); 65 void *ch_arg; 66}; 67struct fstrans_lwp_info { 68 struct fstrans_lwp_info *fli_succ; 69 struct lwp *fli_self; 70 struct mount *fli_mount; 71 struct fstrans_lwp_info *fli_alias; 72 struct fstrans_mount_info *fli_mountinfo; 73 int fli_trans_cnt; 74 int fli_alias_cnt; 75 int fli_cow_cnt; 76 enum fstrans_lock_type fli_lock_type; 77 LIST_ENTRY(fstrans_lwp_info) fli_list; 78}; 79struct fstrans_mount_info { 80 enum fstrans_state fmi_state; 81 unsigned int fmi_ref_cnt; 82 bool fmi_gone; 83 bool fmi_cow_change; 84 LIST_HEAD(, fscow_handler) fmi_cow_handler; 85 struct mount *fmi_mount; 86 struct lwp *fmi_owner; 87}; 88 89static kmutex_t vfs_suspend_lock /* Serialize suspensions. */ 90 __cacheline_aligned; 91static kmutex_t fstrans_lock /* Fstrans big lock. */ 92 __cacheline_aligned; 93static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */ 94static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */ 95static pserialize_t fstrans_psz; /* Pserialize state. */ 96static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head; 97 /* List of all fstrans_lwp_info. */ 98static pool_cache_t fstrans_lwp_cache; /* Cache of fstrans_lwp_info. */ 99 100static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */ 101 102static void fstrans_mount_dtor(struct fstrans_mount_info *); 103static void fstrans_clear_lwp_info(void); 104static inline struct fstrans_lwp_info * 105 fstrans_get_lwp_info(struct mount *, bool); 106static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *); 107static int fstrans_lwp_pcc(void *, void *, int); 108static void fstrans_lwp_pcd(void *, void *); 109static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int); 110static bool grant_lock(const struct fstrans_mount_info *, 111 const enum fstrans_lock_type); 112static bool state_change_done(const struct fstrans_mount_info *); 113static bool cow_state_change_done(const struct fstrans_mount_info *); 114static void cow_change_enter(struct fstrans_mount_info *); 115static void cow_change_done(struct fstrans_mount_info *); 116 117extern struct mount *dead_rootmount; 118 119#if defined(DIAGNOSTIC) 120 121struct fstrans_debug_mount { 122 struct mount *fdm_mount; 123 SLIST_ENTRY(fstrans_debug_mount) fdm_list; 124}; 125 126static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head = 127 SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head); 128 129static void 130fstrans_debug_mount(struct mount *mp) 131{ 132 struct fstrans_debug_mount *fdm, *new; 133 134 KASSERT(mutex_owned(&fstrans_lock)); 135 136 mutex_exit(&fstrans_lock); 137 new = kmem_alloc(sizeof(*new), KM_SLEEP); 138 new->fdm_mount = mp; 139 mutex_enter(&fstrans_lock); 140 141 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 142 KASSERT(fdm->fdm_mount != mp); 143 SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list); 144} 145 146static void 147fstrans_debug_unmount(struct mount *mp) 148{ 149 struct fstrans_debug_mount *fdm; 150 151 KASSERT(mutex_owned(&fstrans_lock)); 152 153 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 154 if (fdm->fdm_mount == mp) 155 break; 156 KASSERT(fdm != NULL); 157 SLIST_REMOVE(&fstrans_debug_mount_head, fdm, 158 fstrans_debug_mount, fdm_list); 159 kmem_free(fdm, sizeof(*fdm)); 160} 161 162static void 163fstrans_debug_validate_mount(struct mount *mp) 164{ 165 struct fstrans_debug_mount *fdm; 166 167 KASSERT(mutex_owned(&fstrans_lock)); 168 169 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 170 if (fdm->fdm_mount == mp) 171 break; 172 KASSERTMSG(fdm != NULL, "mount %p invalid", mp); 173} 174 175#else /* defined(DIAGNOSTIC) */ 176 177#define fstrans_debug_mount(mp) 178#define fstrans_debug_unmount(mp) 179#define fstrans_debug_validate_mount(mp) 180 181#endif /* defined(DIAGNOSTIC) */ 182 183/* 184 * Initialize. 185 */ 186void 187fstrans_init(void) 188{ 189 190 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 191 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE); 192 cv_init(&fstrans_state_cv, "fstchg"); 193 cv_init(&fstrans_count_cv, "fstcnt"); 194 fstrans_psz = pserialize_create(); 195 LIST_INIT(&fstrans_fli_head); 196 fstrans_lwp_cache = pool_cache_init(sizeof(struct fstrans_lwp_info), 197 coherency_unit, 0, 0, "fstlwp", NULL, IPL_NONE, 198 fstrans_lwp_pcc, fstrans_lwp_pcd, NULL); 199 KASSERT(fstrans_lwp_cache != NULL); 200} 201 202/* 203 * pool_cache constructor for fstrans_lwp_info. Updating the global list 204 * produces cache misses on MP. Minimise by keeping free entries on list. 205 */ 206int 207fstrans_lwp_pcc(void *arg, void *obj, int flags) 208{ 209 struct fstrans_lwp_info *fli = obj; 210 211 memset(fli, 0, sizeof(*fli)); 212 213 mutex_enter(&fstrans_lock); 214 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list); 215 mutex_exit(&fstrans_lock); 216 217 return 0; 218} 219 220/* 221 * pool_cache destructor 222 */ 223void 224fstrans_lwp_pcd(void *arg, void *obj) 225{ 226 struct fstrans_lwp_info *fli = obj; 227 228 mutex_enter(&fstrans_lock); 229 LIST_REMOVE(fli, fli_list); 230 mutex_exit(&fstrans_lock); 231} 232 233/* 234 * Deallocate lwp state. 235 */ 236void 237fstrans_lwp_dtor(lwp_t *l) 238{ 239 struct fstrans_lwp_info *fli, *fli_next; 240 241 if (l->l_fstrans == NULL) 242 return; 243 244 mutex_enter(&fstrans_lock); 245 for (fli = l->l_fstrans; fli; fli = fli_next) { 246 KASSERT(fli->fli_trans_cnt == 0); 247 KASSERT(fli->fli_cow_cnt == 0); 248 KASSERT(fli->fli_self == l); 249 if (fli->fli_mount != NULL) 250 fstrans_mount_dtor(fli->fli_mountinfo); 251 fli_next = fli->fli_succ; 252 fli->fli_alias_cnt = 0; 253 fli->fli_mount = NULL; 254 fli->fli_alias = NULL; 255 fli->fli_mountinfo = NULL; 256 fli->fli_self = NULL; 257 } 258 mutex_exit(&fstrans_lock); 259 260 for (fli = l->l_fstrans; fli; fli = fli_next) { 261 fli_next = fli->fli_succ; 262 pool_cache_put(fstrans_lwp_cache, fli); 263 } 264 l->l_fstrans = NULL; 265} 266 267/* 268 * Dereference mount state. 269 */ 270static void 271fstrans_mount_dtor(struct fstrans_mount_info *fmi) 272{ 273 274 KASSERT(mutex_owned(&fstrans_lock)); 275 276 KASSERT(fmi != NULL); 277 fmi->fmi_ref_cnt -= 1; 278 if (__predict_true(fmi->fmi_ref_cnt > 0)) { 279 return; 280 } 281 282 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 283 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL); 284 KASSERT(fmi->fmi_owner == NULL); 285 286 KASSERT(fstrans_gone_count > 0); 287 fstrans_gone_count -= 1; 288 289 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount)); 290 kmem_free(fmi, sizeof(*fmi)); 291} 292 293/* 294 * Allocate mount state. 295 */ 296int 297fstrans_mount(struct mount *mp) 298{ 299 struct fstrans_mount_info *newfmi; 300 301 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP); 302 newfmi->fmi_state = FSTRANS_NORMAL; 303 newfmi->fmi_ref_cnt = 1; 304 newfmi->fmi_gone = false; 305 LIST_INIT(&newfmi->fmi_cow_handler); 306 newfmi->fmi_cow_change = false; 307 newfmi->fmi_mount = mp; 308 newfmi->fmi_owner = NULL; 309 310 mutex_enter(&fstrans_lock); 311 mp->mnt_transinfo = newfmi; 312 fstrans_debug_mount(mp); 313 mutex_exit(&fstrans_lock); 314 315 return 0; 316} 317 318/* 319 * Deallocate mount state. 320 */ 321void 322fstrans_unmount(struct mount *mp) 323{ 324 struct fstrans_mount_info *fmi = mp->mnt_transinfo; 325 326 KASSERT(fmi != NULL); 327 328 mutex_enter(&fstrans_lock); 329 fstrans_debug_unmount(mp); 330 fmi->fmi_gone = true; 331 mp->mnt_transinfo = NULL; 332 fstrans_gone_count += 1; 333 fstrans_mount_dtor(fmi); 334 mutex_exit(&fstrans_lock); 335} 336 337/* 338 * Clear mount entries whose mount is gone. 339 */ 340static void 341fstrans_clear_lwp_info(void) 342{ 343 struct fstrans_lwp_info **p, *fli, *tofree = NULL; 344 345 /* 346 * Scan our list clearing entries whose mount is gone. 347 */ 348 mutex_enter(&fstrans_lock); 349 for (p = &curlwp->l_fstrans; *p; ) { 350 fli = *p; 351 if (fli->fli_mount != NULL && 352 fli->fli_mountinfo->fmi_gone && 353 fli->fli_trans_cnt == 0 && 354 fli->fli_cow_cnt == 0 && 355 fli->fli_alias_cnt == 0) { 356 *p = (*p)->fli_succ; 357 fstrans_mount_dtor(fli->fli_mountinfo); 358 if (fli->fli_alias) { 359 KASSERT(fli->fli_alias->fli_alias_cnt > 0); 360 fli->fli_alias->fli_alias_cnt--; 361 } 362 fli->fli_mount = NULL; 363 fli->fli_alias = NULL; 364 fli->fli_mountinfo = NULL; 365 fli->fli_self = NULL; 366 p = &curlwp->l_fstrans; 367 fli->fli_succ = tofree; 368 tofree = fli; 369 } else { 370 p = &(*p)->fli_succ; 371 } 372 } 373#ifdef DIAGNOSTIC 374 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) 375 if (fli->fli_alias != NULL) 376 KASSERT(fli->fli_alias->fli_self == curlwp); 377#endif /* DIAGNOSTIC */ 378 mutex_exit(&fstrans_lock); 379 380 while (tofree != NULL) { 381 fli = tofree; 382 tofree = fli->fli_succ; 383 pool_cache_put(fstrans_lwp_cache, fli); 384 } 385} 386 387/* 388 * Allocate and return per lwp info for this mount. 389 */ 390static struct fstrans_lwp_info * 391fstrans_alloc_lwp_info(struct mount *mp) 392{ 393 struct fstrans_lwp_info *fli; 394 struct fstrans_mount_info *fmi; 395 396 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 397 if (fli->fli_mount == mp) 398 return fli; 399 } 400 401 /* 402 * Allocate a new entry. 403 */ 404 fli = pool_cache_get(fstrans_lwp_cache, PR_WAITOK); 405 KASSERT(fli->fli_trans_cnt == 0); 406 KASSERT(fli->fli_cow_cnt == 0); 407 KASSERT(fli->fli_alias_cnt == 0); 408 KASSERT(fli->fli_mount == NULL); 409 KASSERT(fli->fli_alias == NULL); 410 KASSERT(fli->fli_mountinfo == NULL); 411 KASSERT(fli->fli_self == NULL); 412 fli->fli_succ = curlwp->l_fstrans; 413 curlwp->l_fstrans = fli; 414 415 /* 416 * Attach the entry to the mount if its mnt_transinfo is valid. 417 */ 418 419 mutex_enter(&fstrans_lock); 420 fli->fli_self = curlwp; 421 fstrans_debug_validate_mount(mp); 422 fmi = mp->mnt_transinfo; 423 KASSERT(fmi != NULL); 424 fli->fli_mount = mp; 425 fli->fli_mountinfo = fmi; 426 fmi->fmi_ref_cnt += 1; 427 do { 428 mp = mp->mnt_lower; 429 } while (mp && mp->mnt_lower); 430 mutex_exit(&fstrans_lock); 431 432 if (mp) { 433 fli->fli_alias = fstrans_alloc_lwp_info(mp); 434 fli->fli_alias->fli_alias_cnt++; 435 fli = fli->fli_alias; 436 } 437 438 return fli; 439} 440 441/* 442 * Retrieve the per lwp info for this mount allocating if necessary. 443 */ 444static inline struct fstrans_lwp_info * 445fstrans_get_lwp_info(struct mount *mp, bool do_alloc) 446{ 447 struct fstrans_lwp_info *fli; 448 449 /* 450 * Scan our list for a match. 451 */ 452 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 453 if (fli->fli_mount == mp) { 454 KASSERT((mp->mnt_lower == NULL) == 455 (fli->fli_alias == NULL)); 456 if (fli->fli_alias != NULL) 457 fli = fli->fli_alias; 458 break; 459 } 460 } 461 462 if (do_alloc) { 463 if (__predict_false(fli == NULL)) 464 fli = fstrans_alloc_lwp_info(mp); 465 KASSERT(fli != NULL); 466 KASSERT(!fli->fli_mountinfo->fmi_gone); 467 } else { 468 KASSERT(fli != NULL); 469 } 470 471 return fli; 472} 473 474/* 475 * Check if this lock type is granted at this state. 476 */ 477static bool 478grant_lock(const struct fstrans_mount_info *fmi, 479 const enum fstrans_lock_type type) 480{ 481 482 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) 483 return true; 484 if (fmi->fmi_owner == curlwp) 485 return true; 486 if (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY) 487 return true; 488 489 return false; 490} 491 492/* 493 * Start a transaction. If this thread already has a transaction on this 494 * file system increment the reference counter. 495 */ 496static inline int 497_fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 498{ 499 int s; 500 struct fstrans_lwp_info *fli; 501 struct fstrans_mount_info *fmi; 502 503#ifndef FSTRANS_DEAD_ENABLED 504 if (mp == dead_rootmount) 505 return 0; 506#endif 507 508 ASSERT_SLEEPABLE(); 509 510 fli = fstrans_get_lwp_info(mp, true); 511 fmi = fli->fli_mountinfo; 512 513 if (fli->fli_trans_cnt > 0) { 514 fli->fli_trans_cnt += 1; 515 516 return 0; 517 } 518 519 s = pserialize_read_enter(); 520 if (__predict_true(grant_lock(fmi, lock_type))) { 521 fli->fli_trans_cnt = 1; 522 fli->fli_lock_type = lock_type; 523 pserialize_read_exit(s); 524 525 return 0; 526 } 527 pserialize_read_exit(s); 528 529 if (! wait) 530 return EBUSY; 531 532 mutex_enter(&fstrans_lock); 533 while (! grant_lock(fmi, lock_type)) 534 cv_wait(&fstrans_state_cv, &fstrans_lock); 535 fli->fli_trans_cnt = 1; 536 fli->fli_lock_type = lock_type; 537 mutex_exit(&fstrans_lock); 538 539 return 0; 540} 541 542void 543fstrans_start(struct mount *mp) 544{ 545 int error __diagused; 546 547 error = _fstrans_start(mp, FSTRANS_SHARED, 1); 548 KASSERT(error == 0); 549} 550 551int 552fstrans_start_nowait(struct mount *mp) 553{ 554 555 return _fstrans_start(mp, FSTRANS_SHARED, 0); 556} 557 558void 559fstrans_start_lazy(struct mount *mp) 560{ 561 int error __diagused; 562 563 error = _fstrans_start(mp, FSTRANS_LAZY, 1); 564 KASSERT(error == 0); 565} 566 567/* 568 * Finish a transaction. 569 */ 570void 571fstrans_done(struct mount *mp) 572{ 573 int s; 574 struct fstrans_lwp_info *fli; 575 struct fstrans_mount_info *fmi; 576 577#ifndef FSTRANS_DEAD_ENABLED 578 if (mp == dead_rootmount) 579 return; 580#endif 581 582 fli = fstrans_get_lwp_info(mp, false); 583 fmi = fli->fli_mountinfo; 584 KASSERT(fli->fli_trans_cnt > 0); 585 586 if (fli->fli_trans_cnt > 1) { 587 fli->fli_trans_cnt -= 1; 588 589 return; 590 } 591 592 if (__predict_false(fstrans_gone_count > 0)) 593 fstrans_clear_lwp_info(); 594 595 s = pserialize_read_enter(); 596 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) { 597 fli->fli_trans_cnt = 0; 598 pserialize_read_exit(s); 599 600 return; 601 } 602 pserialize_read_exit(s); 603 604 mutex_enter(&fstrans_lock); 605 fli->fli_trans_cnt = 0; 606 cv_signal(&fstrans_count_cv); 607 mutex_exit(&fstrans_lock); 608} 609 610/* 611 * Check if we hold an lock. 612 */ 613int 614fstrans_held(struct mount *mp) 615{ 616 struct fstrans_lwp_info *fli; 617 struct fstrans_mount_info *fmi; 618 619 KASSERT(mp != dead_rootmount); 620 621 fli = fstrans_get_lwp_info(mp, true); 622 fmi = fli->fli_mountinfo; 623 624 return (fli->fli_trans_cnt > 0 || fmi->fmi_owner == curlwp); 625} 626 627/* 628 * Check if this thread has an exclusive lock. 629 */ 630int 631fstrans_is_owner(struct mount *mp) 632{ 633 struct fstrans_lwp_info *fli; 634 struct fstrans_mount_info *fmi; 635 636 KASSERT(mp != dead_rootmount); 637 638 fli = fstrans_get_lwp_info(mp, true); 639 fmi = fli->fli_mountinfo; 640 641 return (fmi->fmi_owner == curlwp); 642} 643 644/* 645 * True, if no thread is in a transaction not granted at the current state. 646 */ 647static bool 648state_change_done(const struct fstrans_mount_info *fmi) 649{ 650 struct fstrans_lwp_info *fli; 651 652 KASSERT(mutex_owned(&fstrans_lock)); 653 654 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 655 if (fli->fli_mountinfo != fmi) 656 continue; 657 if (fli->fli_trans_cnt == 0) 658 continue; 659 if (fli->fli_self == curlwp) 660 continue; 661 if (grant_lock(fmi, fli->fli_lock_type)) 662 continue; 663 664 return false; 665 } 666 667 return true; 668} 669 670/* 671 * Set new file system state. 672 */ 673int 674fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 675{ 676 int error; 677 enum fstrans_state old_state; 678 struct fstrans_lwp_info *fli; 679 struct fstrans_mount_info *fmi; 680 681 KASSERT(mp != dead_rootmount); 682 683 fli = fstrans_get_lwp_info(mp, true); 684 fmi = fli->fli_mountinfo; 685 old_state = fmi->fmi_state; 686 if (old_state == new_state) 687 return 0; 688 689 mutex_enter(&fstrans_lock); 690 fmi->fmi_state = new_state; 691 pserialize_perform(fstrans_psz); 692 693 /* 694 * All threads see the new state now. 695 * Wait for transactions invalid at this state to leave. 696 */ 697 error = 0; 698 while (! state_change_done(fmi)) { 699 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock); 700 if (error) { 701 new_state = fmi->fmi_state = FSTRANS_NORMAL; 702 break; 703 } 704 } 705 if (old_state != new_state) { 706 if (old_state == FSTRANS_NORMAL) { 707 KASSERT(fmi->fmi_owner == NULL); 708 fmi->fmi_owner = curlwp; 709 } 710 if (new_state == FSTRANS_NORMAL) { 711 KASSERT(fmi->fmi_owner == curlwp); 712 fmi->fmi_owner = NULL; 713 } 714 } 715 cv_broadcast(&fstrans_state_cv); 716 mutex_exit(&fstrans_lock); 717 718 return error; 719} 720 721/* 722 * Get current file system state. 723 */ 724enum fstrans_state 725fstrans_getstate(struct mount *mp) 726{ 727 struct fstrans_lwp_info *fli; 728 struct fstrans_mount_info *fmi; 729 730 KASSERT(mp != dead_rootmount); 731 732 fli = fstrans_get_lwp_info(mp, true); 733 fmi = fli->fli_mountinfo; 734 735 return fmi->fmi_state; 736} 737 738/* 739 * Request a filesystem to suspend all operations. 740 */ 741int 742vfs_suspend(struct mount *mp, int nowait) 743{ 744 struct fstrans_lwp_info *fli; 745 int error; 746 747 if (mp == dead_rootmount) 748 return EOPNOTSUPP; 749 750 fli = fstrans_get_lwp_info(mp, true); 751 mp = fli->fli_mount; 752 753 if (nowait) { 754 if (!mutex_tryenter(&vfs_suspend_lock)) 755 return EWOULDBLOCK; 756 } else 757 mutex_enter(&vfs_suspend_lock); 758 759 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) 760 mutex_exit(&vfs_suspend_lock); 761 762 return error; 763} 764 765/* 766 * Request a filesystem to resume all operations. 767 */ 768void 769vfs_resume(struct mount *mp) 770{ 771 struct fstrans_lwp_info *fli; 772 773 KASSERT(mp != dead_rootmount); 774 775 fli = fstrans_get_lwp_info(mp, false); 776 mp = fli->fli_mount; 777 778 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 779 mutex_exit(&vfs_suspend_lock); 780} 781 782 783/* 784 * True, if no thread is running a cow handler. 785 */ 786static bool 787cow_state_change_done(const struct fstrans_mount_info *fmi) 788{ 789 struct fstrans_lwp_info *fli; 790 791 KASSERT(mutex_owned(&fstrans_lock)); 792 KASSERT(fmi->fmi_cow_change); 793 794 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 795 if (fli->fli_mount != fmi->fmi_mount) 796 continue; 797 if (fli->fli_cow_cnt == 0) 798 continue; 799 800 return false; 801 } 802 803 return true; 804} 805 806/* 807 * Prepare for changing this mounts cow list. 808 * Returns with fstrans_lock locked. 809 */ 810static void 811cow_change_enter(struct fstrans_mount_info *fmi) 812{ 813 814 mutex_enter(&fstrans_lock); 815 816 /* 817 * Wait for other threads changing the list. 818 */ 819 while (fmi->fmi_cow_change) 820 cv_wait(&fstrans_state_cv, &fstrans_lock); 821 822 /* 823 * Wait until all threads are aware of a state change. 824 */ 825 fmi->fmi_cow_change = true; 826 pserialize_perform(fstrans_psz); 827 828 while (! cow_state_change_done(fmi)) 829 cv_wait(&fstrans_count_cv, &fstrans_lock); 830} 831 832/* 833 * Done changing this mounts cow list. 834 */ 835static void 836cow_change_done(struct fstrans_mount_info *fmi) 837{ 838 839 KASSERT(mutex_owned(&fstrans_lock)); 840 841 fmi->fmi_cow_change = false; 842 pserialize_perform(fstrans_psz); 843 844 cv_broadcast(&fstrans_state_cv); 845 846 mutex_exit(&fstrans_lock); 847} 848 849/* 850 * Add a handler to this mount. 851 */ 852int 853fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 854 void *arg) 855{ 856 struct fstrans_mount_info *fmi; 857 struct fscow_handler *newch; 858 859 KASSERT(mp != dead_rootmount); 860 861 mutex_enter(&fstrans_lock); 862 fmi = mp->mnt_transinfo; 863 KASSERT(fmi != NULL); 864 fmi->fmi_ref_cnt += 1; 865 mutex_exit(&fstrans_lock); 866 867 newch = kmem_alloc(sizeof(*newch), KM_SLEEP); 868 newch->ch_func = func; 869 newch->ch_arg = arg; 870 871 cow_change_enter(fmi); 872 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list); 873 cow_change_done(fmi); 874 875 return 0; 876} 877 878/* 879 * Remove a handler from this mount. 880 */ 881int 882fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 883 void *arg) 884{ 885 struct fstrans_mount_info *fmi; 886 struct fscow_handler *hp = NULL; 887 888 KASSERT(mp != dead_rootmount); 889 890 fmi = mp->mnt_transinfo; 891 KASSERT(fmi != NULL); 892 893 cow_change_enter(fmi); 894 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 895 if (hp->ch_func == func && hp->ch_arg == arg) 896 break; 897 if (hp != NULL) { 898 LIST_REMOVE(hp, ch_list); 899 kmem_free(hp, sizeof(*hp)); 900 } 901 fstrans_mount_dtor(fmi); 902 cow_change_done(fmi); 903 904 return hp ? 0 : EINVAL; 905} 906 907/* 908 * Check for need to copy block that is about to be written. 909 */ 910int 911fscow_run(struct buf *bp, bool data_valid) 912{ 913 int error, s; 914 struct mount *mp; 915 struct fstrans_lwp_info *fli; 916 struct fstrans_mount_info *fmi; 917 struct fscow_handler *hp; 918 919 /* 920 * First check if we need run the copy-on-write handler. 921 */ 922 if ((bp->b_flags & B_COWDONE)) 923 return 0; 924 if (bp->b_vp == NULL) { 925 bp->b_flags |= B_COWDONE; 926 return 0; 927 } 928 if (bp->b_vp->v_type == VBLK) 929 mp = spec_node_getmountedfs(bp->b_vp); 930 else 931 mp = bp->b_vp->v_mount; 932 if (mp == NULL || mp == dead_rootmount) { 933 bp->b_flags |= B_COWDONE; 934 return 0; 935 } 936 937 fli = fstrans_get_lwp_info(mp, true); 938 fmi = fli->fli_mountinfo; 939 940 /* 941 * On non-recursed run check if other threads 942 * want to change the list. 943 */ 944 if (fli->fli_cow_cnt == 0) { 945 s = pserialize_read_enter(); 946 if (__predict_false(fmi->fmi_cow_change)) { 947 pserialize_read_exit(s); 948 mutex_enter(&fstrans_lock); 949 while (fmi->fmi_cow_change) 950 cv_wait(&fstrans_state_cv, &fstrans_lock); 951 fli->fli_cow_cnt = 1; 952 mutex_exit(&fstrans_lock); 953 } else { 954 fli->fli_cow_cnt = 1; 955 pserialize_read_exit(s); 956 } 957 } else 958 fli->fli_cow_cnt += 1; 959 960 /* 961 * Run all copy-on-write handlers, stop on error. 962 */ 963 error = 0; 964 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 965 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 966 break; 967 if (error == 0) 968 bp->b_flags |= B_COWDONE; 969 970 /* 971 * Check if other threads want to change the list. 972 */ 973 if (fli->fli_cow_cnt > 1) { 974 fli->fli_cow_cnt -= 1; 975 } else { 976 s = pserialize_read_enter(); 977 if (__predict_false(fmi->fmi_cow_change)) { 978 pserialize_read_exit(s); 979 mutex_enter(&fstrans_lock); 980 fli->fli_cow_cnt = 0; 981 cv_signal(&fstrans_count_cv); 982 mutex_exit(&fstrans_lock); 983 } else { 984 fli->fli_cow_cnt = 0; 985 pserialize_read_exit(s); 986 } 987 } 988 989 return error; 990} 991 992#if defined(DDB) 993void fstrans_dump(int); 994 995static void 996fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 997{ 998 char prefix[9]; 999 struct fstrans_lwp_info *fli; 1000 1001 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 1002 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 1003 if (fli->fli_self != l) 1004 continue; 1005 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) { 1006 if (! verbose) 1007 continue; 1008 } 1009 printf("%-8s", prefix); 1010 if (verbose) 1011 printf(" @%p", fli); 1012 if (fli->fli_mount == dead_rootmount) 1013 printf(" <dead>"); 1014 else if (fli->fli_mount != NULL) 1015 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 1016 else 1017 printf(" NULL"); 1018 if (fli->fli_alias != NULL) { 1019 struct mount *amp = fli->fli_alias->fli_mount; 1020 1021 printf(" alias"); 1022 if (verbose) 1023 printf(" @%p", fli->fli_alias); 1024 if (amp == NULL) 1025 printf(" NULL"); 1026 else 1027 printf(" (%s)", amp->mnt_stat.f_mntonname); 1028 } 1029 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone) 1030 printf(" gone"); 1031 if (fli->fli_trans_cnt == 0) { 1032 printf(" -"); 1033 } else { 1034 switch (fli->fli_lock_type) { 1035 case FSTRANS_LAZY: 1036 printf(" lazy"); 1037 break; 1038 case FSTRANS_SHARED: 1039 printf(" shared"); 1040 break; 1041 default: 1042 printf(" %#x", fli->fli_lock_type); 1043 break; 1044 } 1045 } 1046 printf(" %d cow %d alias %d\n", 1047 fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt); 1048 prefix[0] = '\0'; 1049 } 1050} 1051 1052static void 1053fstrans_print_mount(struct mount *mp, int verbose) 1054{ 1055 struct fstrans_mount_info *fmi; 1056 1057 fmi = mp->mnt_transinfo; 1058 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 1059 return; 1060 1061 printf("%-16s ", mp->mnt_stat.f_mntonname); 1062 if (fmi == NULL) { 1063 printf("(null)\n"); 1064 return; 1065 } 1066 printf("owner %p ", fmi->fmi_owner); 1067 switch (fmi->fmi_state) { 1068 case FSTRANS_NORMAL: 1069 printf("state normal\n"); 1070 break; 1071 case FSTRANS_SUSPENDING: 1072 printf("state suspending\n"); 1073 break; 1074 case FSTRANS_SUSPENDED: 1075 printf("state suspended\n"); 1076 break; 1077 default: 1078 printf("state %#x\n", fmi->fmi_state); 1079 break; 1080 } 1081} 1082 1083void 1084fstrans_dump(int full) 1085{ 1086 const struct proclist_desc *pd; 1087 struct proc *p; 1088 struct lwp *l; 1089 struct mount *mp; 1090 1091 printf("Fstrans locks by lwp:\n"); 1092 for (pd = proclists; pd->pd_list != NULL; pd++) 1093 PROCLIST_FOREACH(p, pd->pd_list) 1094 LIST_FOREACH(l, &p->p_lwps, l_sibling) 1095 fstrans_print_lwp(p, l, full == 1); 1096 1097 printf("Fstrans state by mount:\n"); 1098 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) 1099 fstrans_print_mount(mp, full == 1); 1100} 1101#endif /* defined(DDB) */ 1102