vfs_trans.c revision 1.65
1/* $NetBSD: vfs_trans.c,v 1.65 2022/07/08 07:42:05 hannken Exp $ */ 2 3/*- 4 * Copyright (c) 2007, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.65 2022/07/08 07:42:05 hannken Exp $"); 34 35/* 36 * File system transaction operations. 37 */ 38 39#ifdef _KERNEL_OPT 40#include "opt_ddb.h" 41#endif 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/atomic.h> 46#include <sys/buf.h> 47#include <sys/kmem.h> 48#include <sys/mount.h> 49#include <sys/pserialize.h> 50#include <sys/vnode.h> 51#include <sys/fstrans.h> 52#include <sys/proc.h> 53#include <sys/pool.h> 54 55#include <miscfs/specfs/specdev.h> 56 57enum fstrans_lock_type { 58 FSTRANS_LAZY, /* Granted while not suspended */ 59 FSTRANS_SHARED /* Granted while not suspending */ 60}; 61 62struct fscow_handler { 63 LIST_ENTRY(fscow_handler) ch_list; 64 int (*ch_func)(void *, struct buf *, bool); 65 void *ch_arg; 66}; 67struct fstrans_lwp_info { 68 struct fstrans_lwp_info *fli_succ; 69 struct lwp *fli_self; 70 struct mount *fli_mount; 71 struct fstrans_lwp_info *fli_alias; 72 struct fstrans_mount_info *fli_mountinfo; 73 int fli_trans_cnt; 74 int fli_alias_cnt; 75 int fli_cow_cnt; 76 enum fstrans_lock_type fli_lock_type; 77 LIST_ENTRY(fstrans_lwp_info) fli_list; 78}; 79struct fstrans_mount_info { 80 enum fstrans_state fmi_state; 81 unsigned int fmi_ref_cnt; 82 bool fmi_gone; 83 bool fmi_cow_change; 84 LIST_HEAD(, fscow_handler) fmi_cow_handler; 85 struct mount *fmi_mount; 86 struct lwp *fmi_owner; 87}; 88 89static kmutex_t vfs_suspend_lock /* Serialize suspensions. */ 90 __cacheline_aligned; 91static kmutex_t fstrans_lock /* Fstrans big lock. */ 92 __cacheline_aligned; 93static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */ 94static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */ 95static pserialize_t fstrans_psz; /* Pserialize state. */ 96static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head; 97 /* List of all fstrans_lwp_info. */ 98static pool_cache_t fstrans_lwp_cache; /* Cache of fstrans_lwp_info. */ 99 100static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */ 101 102static void fstrans_mount_dtor(struct fstrans_mount_info *); 103static void fstrans_clear_lwp_info(void); 104static inline struct fstrans_lwp_info * 105 fstrans_get_lwp_info(struct mount *, bool); 106static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *); 107static int fstrans_lwp_pcc(void *, void *, int); 108static void fstrans_lwp_pcd(void *, void *); 109static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int); 110static bool grant_lock(const struct fstrans_mount_info *, 111 const enum fstrans_lock_type); 112static bool state_change_done(const struct fstrans_mount_info *); 113static bool cow_state_change_done(const struct fstrans_mount_info *); 114static void cow_change_enter(struct fstrans_mount_info *); 115static void cow_change_done(struct fstrans_mount_info *); 116 117extern struct mount *dead_rootmount; 118 119#if defined(DIAGNOSTIC) 120 121struct fstrans_debug_mount { 122 struct mount *fdm_mount; 123 SLIST_ENTRY(fstrans_debug_mount) fdm_list; 124}; 125 126static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head = 127 SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head); 128 129static void 130fstrans_debug_mount(struct mount *mp) 131{ 132 struct fstrans_debug_mount *fdm, *new; 133 134 KASSERT(mutex_owned(&fstrans_lock)); 135 136 mutex_exit(&fstrans_lock); 137 new = kmem_alloc(sizeof(*new), KM_SLEEP); 138 new->fdm_mount = mp; 139 mutex_enter(&fstrans_lock); 140 141 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 142 KASSERT(fdm->fdm_mount != mp); 143 SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list); 144} 145 146static void 147fstrans_debug_unmount(struct mount *mp) 148{ 149 struct fstrans_debug_mount *fdm; 150 151 KASSERT(mutex_owned(&fstrans_lock)); 152 153 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 154 if (fdm->fdm_mount == mp) 155 break; 156 KASSERT(fdm != NULL); 157 SLIST_REMOVE(&fstrans_debug_mount_head, fdm, 158 fstrans_debug_mount, fdm_list); 159 kmem_free(fdm, sizeof(*fdm)); 160} 161 162static void 163fstrans_debug_validate_mount(struct mount *mp) 164{ 165 struct fstrans_debug_mount *fdm; 166 167 KASSERT(mutex_owned(&fstrans_lock)); 168 169 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 170 if (fdm->fdm_mount == mp) 171 break; 172 KASSERTMSG(fdm != NULL, "mount %p invalid", mp); 173} 174 175#else /* defined(DIAGNOSTIC) */ 176 177#define fstrans_debug_mount(mp) 178#define fstrans_debug_unmount(mp) 179#define fstrans_debug_validate_mount(mp) 180 181#endif /* defined(DIAGNOSTIC) */ 182 183/* 184 * Initialize. 185 */ 186void 187fstrans_init(void) 188{ 189 190 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 191 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE); 192 cv_init(&fstrans_state_cv, "fstchg"); 193 cv_init(&fstrans_count_cv, "fstcnt"); 194 fstrans_psz = pserialize_create(); 195 LIST_INIT(&fstrans_fli_head); 196 fstrans_lwp_cache = pool_cache_init(sizeof(struct fstrans_lwp_info), 197 coherency_unit, 0, 0, "fstlwp", NULL, IPL_NONE, 198 fstrans_lwp_pcc, fstrans_lwp_pcd, NULL); 199 KASSERT(fstrans_lwp_cache != NULL); 200} 201 202/* 203 * pool_cache constructor for fstrans_lwp_info. Updating the global list 204 * produces cache misses on MP. Minimise by keeping free entries on list. 205 */ 206int 207fstrans_lwp_pcc(void *arg, void *obj, int flags) 208{ 209 struct fstrans_lwp_info *fli = obj; 210 211 memset(fli, 0, sizeof(*fli)); 212 213 mutex_enter(&fstrans_lock); 214 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list); 215 mutex_exit(&fstrans_lock); 216 217 return 0; 218} 219 220/* 221 * pool_cache destructor 222 */ 223void 224fstrans_lwp_pcd(void *arg, void *obj) 225{ 226 struct fstrans_lwp_info *fli = obj; 227 228 mutex_enter(&fstrans_lock); 229 LIST_REMOVE(fli, fli_list); 230 mutex_exit(&fstrans_lock); 231} 232 233/* 234 * Deallocate lwp state. 235 */ 236void 237fstrans_lwp_dtor(lwp_t *l) 238{ 239 struct fstrans_lwp_info *fli, *fli_next; 240 241 if (l->l_fstrans == NULL) 242 return; 243 244 mutex_enter(&fstrans_lock); 245 for (fli = l->l_fstrans; fli; fli = fli_next) { 246 KASSERT(fli->fli_trans_cnt == 0); 247 KASSERT(fli->fli_cow_cnt == 0); 248 KASSERT(fli->fli_self == l); 249 if (fli->fli_mount != NULL) 250 fstrans_mount_dtor(fli->fli_mountinfo); 251 fli_next = fli->fli_succ; 252 fli->fli_alias_cnt = 0; 253 fli->fli_mount = NULL; 254 fli->fli_alias = NULL; 255 fli->fli_mountinfo = NULL; 256 fli->fli_self = NULL; 257 } 258 mutex_exit(&fstrans_lock); 259 260 for (fli = l->l_fstrans; fli; fli = fli_next) { 261 fli_next = fli->fli_succ; 262 pool_cache_put(fstrans_lwp_cache, fli); 263 } 264 l->l_fstrans = NULL; 265} 266 267/* 268 * Dereference mount state. 269 */ 270static void 271fstrans_mount_dtor(struct fstrans_mount_info *fmi) 272{ 273 274 KASSERT(mutex_owned(&fstrans_lock)); 275 276 KASSERT(fmi != NULL); 277 fmi->fmi_ref_cnt -= 1; 278 if (__predict_true(fmi->fmi_ref_cnt > 0)) { 279 return; 280 } 281 282 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 283 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL); 284 KASSERT(fmi->fmi_owner == NULL); 285 286 KASSERT(fstrans_gone_count > 0); 287 fstrans_gone_count -= 1; 288 289 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount)); 290 kmem_free(fmi, sizeof(*fmi)); 291} 292 293/* 294 * Allocate mount state. 295 */ 296int 297fstrans_mount(struct mount *mp) 298{ 299 struct fstrans_mount_info *newfmi; 300 301 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP); 302 newfmi->fmi_state = FSTRANS_NORMAL; 303 newfmi->fmi_ref_cnt = 1; 304 newfmi->fmi_gone = false; 305 LIST_INIT(&newfmi->fmi_cow_handler); 306 newfmi->fmi_cow_change = false; 307 newfmi->fmi_mount = mp; 308 newfmi->fmi_owner = NULL; 309 310 mutex_enter(&fstrans_lock); 311 mp->mnt_transinfo = newfmi; 312 fstrans_debug_mount(mp); 313 mutex_exit(&fstrans_lock); 314 315 return 0; 316} 317 318/* 319 * Deallocate mount state. 320 */ 321void 322fstrans_unmount(struct mount *mp) 323{ 324 struct fstrans_mount_info *fmi = mp->mnt_transinfo; 325 326 KASSERT(fmi != NULL); 327 328 mutex_enter(&fstrans_lock); 329 fstrans_debug_unmount(mp); 330 fmi->fmi_gone = true; 331 mp->mnt_transinfo = NULL; 332 fstrans_gone_count += 1; 333 fstrans_mount_dtor(fmi); 334 mutex_exit(&fstrans_lock); 335} 336 337/* 338 * Clear mount entries whose mount is gone. 339 */ 340static void 341fstrans_clear_lwp_info(void) 342{ 343 struct fstrans_lwp_info **p, *fli, *tofree = NULL; 344 345 /* 346 * Scan our list clearing entries whose mount is gone. 347 */ 348 mutex_enter(&fstrans_lock); 349 for (p = &curlwp->l_fstrans; *p; ) { 350 fli = *p; 351 if (fli->fli_mount != NULL && 352 fli->fli_mountinfo->fmi_gone && 353 fli->fli_trans_cnt == 0 && 354 fli->fli_cow_cnt == 0 && 355 fli->fli_alias_cnt == 0) { 356 *p = (*p)->fli_succ; 357 fstrans_mount_dtor(fli->fli_mountinfo); 358 if (fli->fli_alias) { 359 KASSERT(fli->fli_alias->fli_alias_cnt > 0); 360 fli->fli_alias->fli_alias_cnt--; 361 } 362 fli->fli_mount = NULL; 363 fli->fli_alias = NULL; 364 fli->fli_mountinfo = NULL; 365 fli->fli_self = NULL; 366 p = &curlwp->l_fstrans; 367 fli->fli_succ = tofree; 368 tofree = fli; 369 } else { 370 p = &(*p)->fli_succ; 371 } 372 } 373#ifdef DIAGNOSTIC 374 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) 375 if (fli->fli_alias != NULL) 376 KASSERT(fli->fli_alias->fli_self == curlwp); 377#endif /* DIAGNOSTIC */ 378 mutex_exit(&fstrans_lock); 379 380 while (tofree != NULL) { 381 fli = tofree; 382 tofree = fli->fli_succ; 383 pool_cache_put(fstrans_lwp_cache, fli); 384 } 385} 386 387/* 388 * Allocate and return per lwp info for this mount. 389 */ 390static struct fstrans_lwp_info * 391fstrans_alloc_lwp_info(struct mount *mp) 392{ 393 struct fstrans_lwp_info *fli; 394 struct fstrans_mount_info *fmi; 395 396 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 397 if (fli->fli_mount == mp) 398 return fli; 399 } 400 401 /* 402 * Allocate a new entry. 403 */ 404 fli = pool_cache_get(fstrans_lwp_cache, PR_WAITOK); 405 KASSERT(fli->fli_trans_cnt == 0); 406 KASSERT(fli->fli_cow_cnt == 0); 407 KASSERT(fli->fli_alias_cnt == 0); 408 KASSERT(fli->fli_mount == NULL); 409 KASSERT(fli->fli_alias == NULL); 410 KASSERT(fli->fli_mountinfo == NULL); 411 KASSERT(fli->fli_self == NULL); 412 fli->fli_succ = curlwp->l_fstrans; 413 curlwp->l_fstrans = fli; 414 415 /* 416 * Attach the entry to the mount if its mnt_transinfo is valid. 417 */ 418 419 mutex_enter(&fstrans_lock); 420 fli->fli_self = curlwp; 421 fstrans_debug_validate_mount(mp); 422 fmi = mp->mnt_transinfo; 423 KASSERT(fmi != NULL); 424 fli->fli_mount = mp; 425 fli->fli_mountinfo = fmi; 426 fmi->fmi_ref_cnt += 1; 427 do { 428 mp = mp->mnt_lower; 429 } while (mp && mp->mnt_lower); 430 mutex_exit(&fstrans_lock); 431 432 if (mp) { 433 fli->fli_alias = fstrans_alloc_lwp_info(mp); 434 fli->fli_alias->fli_alias_cnt++; 435 fli = fli->fli_alias; 436 } 437 438 return fli; 439} 440 441/* 442 * Retrieve the per lwp info for this mount allocating if necessary. 443 */ 444static inline struct fstrans_lwp_info * 445fstrans_get_lwp_info(struct mount *mp, bool do_alloc) 446{ 447 struct fstrans_lwp_info *fli; 448 449 /* 450 * Scan our list for a match. 451 */ 452 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 453 if (fli->fli_mount == mp) { 454 KASSERT((mp->mnt_lower == NULL) == 455 (fli->fli_alias == NULL)); 456 if (fli->fli_alias != NULL) 457 fli = fli->fli_alias; 458 break; 459 } 460 } 461 462 if (do_alloc) { 463 if (__predict_false(fli == NULL)) 464 fli = fstrans_alloc_lwp_info(mp); 465 KASSERT(fli != NULL); 466 KASSERT(!fli->fli_mountinfo->fmi_gone); 467 } else { 468 KASSERT(fli != NULL); 469 } 470 471 return fli; 472} 473 474/* 475 * Check if this lock type is granted at this state. 476 */ 477static bool 478grant_lock(const struct fstrans_mount_info *fmi, 479 const enum fstrans_lock_type type) 480{ 481 482 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) 483 return true; 484 if (fmi->fmi_owner == curlwp) 485 return true; 486 if (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY) 487 return true; 488 489 return false; 490} 491 492/* 493 * Start a transaction. If this thread already has a transaction on this 494 * file system increment the reference counter. 495 */ 496static inline int 497_fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 498{ 499 int s; 500 struct fstrans_lwp_info *fli; 501 struct fstrans_mount_info *fmi; 502 503#ifndef FSTRANS_DEAD_ENABLED 504 if (mp == dead_rootmount) 505 return 0; 506#endif 507 508 ASSERT_SLEEPABLE(); 509 510 fli = fstrans_get_lwp_info(mp, true); 511 fmi = fli->fli_mountinfo; 512 513 if (fli->fli_trans_cnt > 0) { 514 fli->fli_trans_cnt += 1; 515 516 return 0; 517 } 518 519 s = pserialize_read_enter(); 520 if (__predict_true(grant_lock(fmi, lock_type))) { 521 fli->fli_trans_cnt = 1; 522 fli->fli_lock_type = lock_type; 523 pserialize_read_exit(s); 524 525 return 0; 526 } 527 pserialize_read_exit(s); 528 529 if (! wait) 530 return EBUSY; 531 532 mutex_enter(&fstrans_lock); 533 while (! grant_lock(fmi, lock_type)) 534 cv_wait(&fstrans_state_cv, &fstrans_lock); 535 fli->fli_trans_cnt = 1; 536 fli->fli_lock_type = lock_type; 537 mutex_exit(&fstrans_lock); 538 539 return 0; 540} 541 542void 543fstrans_start(struct mount *mp) 544{ 545 int error __diagused; 546 547 error = _fstrans_start(mp, FSTRANS_SHARED, 1); 548 KASSERT(error == 0); 549} 550 551int 552fstrans_start_nowait(struct mount *mp) 553{ 554 555 return _fstrans_start(mp, FSTRANS_SHARED, 0); 556} 557 558void 559fstrans_start_lazy(struct mount *mp) 560{ 561 int error __diagused; 562 563 error = _fstrans_start(mp, FSTRANS_LAZY, 1); 564 KASSERT(error == 0); 565} 566 567/* 568 * Finish a transaction. 569 */ 570void 571fstrans_done(struct mount *mp) 572{ 573 int s; 574 struct fstrans_lwp_info *fli; 575 struct fstrans_mount_info *fmi; 576 577#ifndef FSTRANS_DEAD_ENABLED 578 if (mp == dead_rootmount) 579 return; 580#endif 581 582 fli = fstrans_get_lwp_info(mp, false); 583 fmi = fli->fli_mountinfo; 584 KASSERT(fli->fli_trans_cnt > 0); 585 586 if (fli->fli_trans_cnt > 1) { 587 fli->fli_trans_cnt -= 1; 588 589 return; 590 } 591 592 if (__predict_false(fstrans_gone_count > 0)) 593 fstrans_clear_lwp_info(); 594 595 s = pserialize_read_enter(); 596 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) { 597 fli->fli_trans_cnt = 0; 598 pserialize_read_exit(s); 599 600 return; 601 } 602 pserialize_read_exit(s); 603 604 mutex_enter(&fstrans_lock); 605 fli->fli_trans_cnt = 0; 606 cv_signal(&fstrans_count_cv); 607 mutex_exit(&fstrans_lock); 608} 609 610/* 611 * Check if we hold an lock. 612 */ 613int 614fstrans_held(struct mount *mp) 615{ 616 struct fstrans_lwp_info *fli; 617 struct fstrans_mount_info *fmi; 618 619 KASSERT(mp != dead_rootmount); 620 621 fli = fstrans_get_lwp_info(mp, true); 622 fmi = fli->fli_mountinfo; 623 624 return (fli->fli_trans_cnt > 0 || fmi->fmi_owner == curlwp); 625} 626 627/* 628 * Check if this thread has an exclusive lock. 629 */ 630int 631fstrans_is_owner(struct mount *mp) 632{ 633 struct fstrans_lwp_info *fli; 634 struct fstrans_mount_info *fmi; 635 636 KASSERT(mp != dead_rootmount); 637 638 fli = fstrans_get_lwp_info(mp, true); 639 fmi = fli->fli_mountinfo; 640 641 return (fmi->fmi_owner == curlwp); 642} 643 644/* 645 * True, if no thread is in a transaction not granted at the current state. 646 */ 647static bool 648state_change_done(const struct fstrans_mount_info *fmi) 649{ 650 struct fstrans_lwp_info *fli; 651 652 KASSERT(mutex_owned(&fstrans_lock)); 653 654 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 655 if (fli->fli_mountinfo != fmi) 656 continue; 657 if (fli->fli_trans_cnt == 0) 658 continue; 659 if (fli->fli_self == curlwp) 660 continue; 661 if (grant_lock(fmi, fli->fli_lock_type)) 662 continue; 663 664 return false; 665 } 666 667 return true; 668} 669 670/* 671 * Set new file system state. 672 */ 673int 674fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 675{ 676 int error; 677 enum fstrans_state old_state; 678 struct fstrans_lwp_info *fli; 679 struct fstrans_mount_info *fmi; 680 681 KASSERT(mp != dead_rootmount); 682 683 fli = fstrans_get_lwp_info(mp, true); 684 fmi = fli->fli_mountinfo; 685 old_state = fmi->fmi_state; 686 if (old_state == new_state) 687 return 0; 688 689 mutex_enter(&fstrans_lock); 690 fmi->fmi_state = new_state; 691 pserialize_perform(fstrans_psz); 692 693 /* 694 * All threads see the new state now. 695 * Wait for transactions invalid at this state to leave. 696 */ 697 error = 0; 698 while (! state_change_done(fmi)) { 699 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock); 700 if (error) { 701 new_state = fmi->fmi_state = FSTRANS_NORMAL; 702 break; 703 } 704 } 705 if (old_state != new_state) { 706 if (old_state == FSTRANS_NORMAL) { 707 KASSERT(fmi->fmi_owner == NULL); 708 fmi->fmi_owner = curlwp; 709 } 710 if (new_state == FSTRANS_NORMAL) { 711 KASSERT(fmi->fmi_owner == curlwp); 712 fmi->fmi_owner = NULL; 713 } 714 } 715 cv_broadcast(&fstrans_state_cv); 716 mutex_exit(&fstrans_lock); 717 718 return error; 719} 720 721/* 722 * Get current file system state. 723 */ 724enum fstrans_state 725fstrans_getstate(struct mount *mp) 726{ 727 struct fstrans_lwp_info *fli; 728 struct fstrans_mount_info *fmi; 729 730 KASSERT(mp != dead_rootmount); 731 732 fli = fstrans_get_lwp_info(mp, true); 733 fmi = fli->fli_mountinfo; 734 735 return fmi->fmi_state; 736} 737 738/* 739 * Request a filesystem to suspend all operations. 740 */ 741int 742vfs_suspend(struct mount *mp, int nowait) 743{ 744 struct fstrans_lwp_info *fli; 745 int error; 746 747 if (mp == dead_rootmount) 748 return EOPNOTSUPP; 749 750 fli = fstrans_get_lwp_info(mp, true); 751 752 if (nowait) { 753 if (!mutex_tryenter(&vfs_suspend_lock)) 754 return EWOULDBLOCK; 755 } else 756 mutex_enter(&vfs_suspend_lock); 757 758 if ((error = VFS_SUSPENDCTL(fli->fli_mount, SUSPEND_SUSPEND)) != 0) { 759 mutex_exit(&vfs_suspend_lock); 760 return error; 761 } 762 763 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 764 vfs_resume(mp); 765 return ENOENT; 766 } 767 768 return 0; 769} 770 771/* 772 * Request a filesystem to resume all operations. 773 */ 774void 775vfs_resume(struct mount *mp) 776{ 777 struct fstrans_lwp_info *fli; 778 779 KASSERT(mp != dead_rootmount); 780 781 fli = fstrans_get_lwp_info(mp, false); 782 mp = fli->fli_mount; 783 784 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 785 mutex_exit(&vfs_suspend_lock); 786} 787 788 789/* 790 * True, if no thread is running a cow handler. 791 */ 792static bool 793cow_state_change_done(const struct fstrans_mount_info *fmi) 794{ 795 struct fstrans_lwp_info *fli; 796 797 KASSERT(mutex_owned(&fstrans_lock)); 798 KASSERT(fmi->fmi_cow_change); 799 800 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 801 if (fli->fli_mount != fmi->fmi_mount) 802 continue; 803 if (fli->fli_cow_cnt == 0) 804 continue; 805 806 return false; 807 } 808 809 return true; 810} 811 812/* 813 * Prepare for changing this mounts cow list. 814 * Returns with fstrans_lock locked. 815 */ 816static void 817cow_change_enter(struct fstrans_mount_info *fmi) 818{ 819 820 mutex_enter(&fstrans_lock); 821 822 /* 823 * Wait for other threads changing the list. 824 */ 825 while (fmi->fmi_cow_change) 826 cv_wait(&fstrans_state_cv, &fstrans_lock); 827 828 /* 829 * Wait until all threads are aware of a state change. 830 */ 831 fmi->fmi_cow_change = true; 832 pserialize_perform(fstrans_psz); 833 834 while (! cow_state_change_done(fmi)) 835 cv_wait(&fstrans_count_cv, &fstrans_lock); 836} 837 838/* 839 * Done changing this mounts cow list. 840 */ 841static void 842cow_change_done(struct fstrans_mount_info *fmi) 843{ 844 845 KASSERT(mutex_owned(&fstrans_lock)); 846 847 fmi->fmi_cow_change = false; 848 pserialize_perform(fstrans_psz); 849 850 cv_broadcast(&fstrans_state_cv); 851 852 mutex_exit(&fstrans_lock); 853} 854 855/* 856 * Add a handler to this mount. 857 */ 858int 859fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 860 void *arg) 861{ 862 struct fstrans_mount_info *fmi; 863 struct fscow_handler *newch; 864 865 KASSERT(mp != dead_rootmount); 866 867 mutex_enter(&fstrans_lock); 868 fmi = mp->mnt_transinfo; 869 KASSERT(fmi != NULL); 870 fmi->fmi_ref_cnt += 1; 871 mutex_exit(&fstrans_lock); 872 873 newch = kmem_alloc(sizeof(*newch), KM_SLEEP); 874 newch->ch_func = func; 875 newch->ch_arg = arg; 876 877 cow_change_enter(fmi); 878 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list); 879 cow_change_done(fmi); 880 881 return 0; 882} 883 884/* 885 * Remove a handler from this mount. 886 */ 887int 888fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 889 void *arg) 890{ 891 struct fstrans_mount_info *fmi; 892 struct fscow_handler *hp = NULL; 893 894 KASSERT(mp != dead_rootmount); 895 896 fmi = mp->mnt_transinfo; 897 KASSERT(fmi != NULL); 898 899 cow_change_enter(fmi); 900 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 901 if (hp->ch_func == func && hp->ch_arg == arg) 902 break; 903 if (hp != NULL) { 904 LIST_REMOVE(hp, ch_list); 905 kmem_free(hp, sizeof(*hp)); 906 } 907 fstrans_mount_dtor(fmi); 908 cow_change_done(fmi); 909 910 return hp ? 0 : EINVAL; 911} 912 913/* 914 * Check for need to copy block that is about to be written. 915 */ 916int 917fscow_run(struct buf *bp, bool data_valid) 918{ 919 int error, s; 920 struct mount *mp; 921 struct fstrans_lwp_info *fli; 922 struct fstrans_mount_info *fmi; 923 struct fscow_handler *hp; 924 925 /* 926 * First check if we need run the copy-on-write handler. 927 */ 928 if ((bp->b_flags & B_COWDONE)) 929 return 0; 930 if (bp->b_vp == NULL) { 931 bp->b_flags |= B_COWDONE; 932 return 0; 933 } 934 if (bp->b_vp->v_type == VBLK) 935 mp = spec_node_getmountedfs(bp->b_vp); 936 else 937 mp = bp->b_vp->v_mount; 938 if (mp == NULL || mp == dead_rootmount) { 939 bp->b_flags |= B_COWDONE; 940 return 0; 941 } 942 943 fli = fstrans_get_lwp_info(mp, true); 944 fmi = fli->fli_mountinfo; 945 946 /* 947 * On non-recursed run check if other threads 948 * want to change the list. 949 */ 950 if (fli->fli_cow_cnt == 0) { 951 s = pserialize_read_enter(); 952 if (__predict_false(fmi->fmi_cow_change)) { 953 pserialize_read_exit(s); 954 mutex_enter(&fstrans_lock); 955 while (fmi->fmi_cow_change) 956 cv_wait(&fstrans_state_cv, &fstrans_lock); 957 fli->fli_cow_cnt = 1; 958 mutex_exit(&fstrans_lock); 959 } else { 960 fli->fli_cow_cnt = 1; 961 pserialize_read_exit(s); 962 } 963 } else 964 fli->fli_cow_cnt += 1; 965 966 /* 967 * Run all copy-on-write handlers, stop on error. 968 */ 969 error = 0; 970 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 971 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 972 break; 973 if (error == 0) 974 bp->b_flags |= B_COWDONE; 975 976 /* 977 * Check if other threads want to change the list. 978 */ 979 if (fli->fli_cow_cnt > 1) { 980 fli->fli_cow_cnt -= 1; 981 } else { 982 s = pserialize_read_enter(); 983 if (__predict_false(fmi->fmi_cow_change)) { 984 pserialize_read_exit(s); 985 mutex_enter(&fstrans_lock); 986 fli->fli_cow_cnt = 0; 987 cv_signal(&fstrans_count_cv); 988 mutex_exit(&fstrans_lock); 989 } else { 990 fli->fli_cow_cnt = 0; 991 pserialize_read_exit(s); 992 } 993 } 994 995 return error; 996} 997 998#if defined(DDB) 999void fstrans_dump(int); 1000 1001static void 1002fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 1003{ 1004 char prefix[9]; 1005 struct fstrans_lwp_info *fli; 1006 1007 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 1008 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 1009 if (fli->fli_self != l) 1010 continue; 1011 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) { 1012 if (! verbose) 1013 continue; 1014 } 1015 printf("%-8s", prefix); 1016 if (verbose) 1017 printf(" @%p", fli); 1018 if (fli->fli_mount == dead_rootmount) 1019 printf(" <dead>"); 1020 else if (fli->fli_mount != NULL) 1021 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 1022 else 1023 printf(" NULL"); 1024 if (fli->fli_alias != NULL) { 1025 struct mount *amp = fli->fli_alias->fli_mount; 1026 1027 printf(" alias"); 1028 if (verbose) 1029 printf(" @%p", fli->fli_alias); 1030 if (amp == NULL) 1031 printf(" NULL"); 1032 else 1033 printf(" (%s)", amp->mnt_stat.f_mntonname); 1034 } 1035 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone) 1036 printf(" gone"); 1037 if (fli->fli_trans_cnt == 0) { 1038 printf(" -"); 1039 } else { 1040 switch (fli->fli_lock_type) { 1041 case FSTRANS_LAZY: 1042 printf(" lazy"); 1043 break; 1044 case FSTRANS_SHARED: 1045 printf(" shared"); 1046 break; 1047 default: 1048 printf(" %#x", fli->fli_lock_type); 1049 break; 1050 } 1051 } 1052 printf(" %d cow %d alias %d\n", 1053 fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt); 1054 prefix[0] = '\0'; 1055 } 1056} 1057 1058static void 1059fstrans_print_mount(struct mount *mp, int verbose) 1060{ 1061 struct fstrans_mount_info *fmi; 1062 1063 fmi = mp->mnt_transinfo; 1064 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 1065 return; 1066 1067 printf("%-16s ", mp->mnt_stat.f_mntonname); 1068 if (fmi == NULL) { 1069 printf("(null)\n"); 1070 return; 1071 } 1072 printf("owner %p ", fmi->fmi_owner); 1073 switch (fmi->fmi_state) { 1074 case FSTRANS_NORMAL: 1075 printf("state normal\n"); 1076 break; 1077 case FSTRANS_SUSPENDING: 1078 printf("state suspending\n"); 1079 break; 1080 case FSTRANS_SUSPENDED: 1081 printf("state suspended\n"); 1082 break; 1083 default: 1084 printf("state %#x\n", fmi->fmi_state); 1085 break; 1086 } 1087} 1088 1089void 1090fstrans_dump(int full) 1091{ 1092 const struct proclist_desc *pd; 1093 struct proc *p; 1094 struct lwp *l; 1095 struct mount *mp; 1096 1097 printf("Fstrans locks by lwp:\n"); 1098 for (pd = proclists; pd->pd_list != NULL; pd++) 1099 PROCLIST_FOREACH(p, pd->pd_list) 1100 LIST_FOREACH(l, &p->p_lwps, l_sibling) 1101 fstrans_print_lwp(p, l, full == 1); 1102 1103 printf("Fstrans state by mount:\n"); 1104 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) 1105 fstrans_print_mount(mp, full == 1); 1106} 1107#endif /* defined(DDB) */ 1108