vfs_trans.c revision 1.70
1/* $NetBSD: vfs_trans.c,v 1.70 2022/11/04 11:20:39 hannken Exp $ */ 2 3/*- 4 * Copyright (c) 2007, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.70 2022/11/04 11:20:39 hannken Exp $"); 34 35/* 36 * File system transaction operations. 37 */ 38 39#ifdef _KERNEL_OPT 40#include "opt_ddb.h" 41#endif 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/atomic.h> 46#include <sys/buf.h> 47#include <sys/hash.h> 48#include <sys/kmem.h> 49#include <sys/mount.h> 50#include <sys/pserialize.h> 51#include <sys/vnode.h> 52#include <sys/fstrans.h> 53#include <sys/proc.h> 54#include <sys/pool.h> 55 56#include <miscfs/deadfs/deadfs.h> 57#include <miscfs/specfs/specdev.h> 58 59#define FSTRANS_MOUNT_HASHSIZE 32 60 61enum fstrans_lock_type { 62 FSTRANS_LAZY, /* Granted while not suspended */ 63 FSTRANS_SHARED /* Granted while not suspending */ 64}; 65 66struct fscow_handler { 67 LIST_ENTRY(fscow_handler) ch_list; 68 int (*ch_func)(void *, struct buf *, bool); 69 void *ch_arg; 70}; 71struct fstrans_lwp_info { 72 struct fstrans_lwp_info *fli_succ; 73 struct lwp *fli_self; 74 struct mount *fli_mount; 75 struct fstrans_lwp_info *fli_alias; 76 struct fstrans_mount_info *fli_mountinfo; 77 int fli_trans_cnt; 78 int fli_alias_cnt; 79 int fli_cow_cnt; 80 enum fstrans_lock_type fli_lock_type; 81 LIST_ENTRY(fstrans_lwp_info) fli_list; 82}; 83struct fstrans_mount_info { 84 enum fstrans_state fmi_state; 85 unsigned int fmi_ref_cnt; 86 bool fmi_gone; 87 bool fmi_cow_change; 88 SLIST_ENTRY(fstrans_mount_info) fmi_hash; 89 LIST_HEAD(, fscow_handler) fmi_cow_handler; 90 struct mount *fmi_mount; 91 struct fstrans_mount_info *fmi_lower_info; 92 struct lwp *fmi_owner; 93}; 94SLIST_HEAD(fstrans_mount_hashhead, fstrans_mount_info); 95 96static kmutex_t vfs_suspend_lock /* Serialize suspensions. */ 97 __cacheline_aligned; 98static kmutex_t fstrans_lock /* Fstrans big lock. */ 99 __cacheline_aligned; 100static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */ 101static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */ 102static pserialize_t fstrans_psz; /* Pserialize state. */ 103static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head; 104 /* List of all fstrans_lwp_info. */ 105static pool_cache_t fstrans_lwp_cache; /* Cache of fstrans_lwp_info. */ 106 107static u_long fstrans_mount_hashmask; 108static struct fstrans_mount_hashhead *fstrans_mount_hashtab; 109static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */ 110 111static inline uint32_t fstrans_mount_hash(struct mount *); 112static inline struct fstrans_mount_info *fstrans_mount_get(struct mount *); 113static void fstrans_mount_dtor(struct fstrans_mount_info *); 114static void fstrans_clear_lwp_info(void); 115static inline struct fstrans_lwp_info * 116 fstrans_get_lwp_info(struct mount *, bool); 117static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *); 118static int fstrans_lwp_pcc(void *, void *, int); 119static void fstrans_lwp_pcd(void *, void *); 120static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int); 121static bool grant_lock(const struct fstrans_mount_info *, 122 const enum fstrans_lock_type); 123static bool state_change_done(const struct fstrans_mount_info *); 124static bool cow_state_change_done(const struct fstrans_mount_info *); 125static void cow_change_enter(struct fstrans_mount_info *); 126static void cow_change_done(struct fstrans_mount_info *); 127 128/* 129 * Initialize. 130 */ 131void 132fstrans_init(void) 133{ 134 135 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 136 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE); 137 cv_init(&fstrans_state_cv, "fstchg"); 138 cv_init(&fstrans_count_cv, "fstcnt"); 139 fstrans_psz = pserialize_create(); 140 LIST_INIT(&fstrans_fli_head); 141 fstrans_lwp_cache = pool_cache_init(sizeof(struct fstrans_lwp_info), 142 coherency_unit, 0, 0, "fstlwp", NULL, IPL_NONE, 143 fstrans_lwp_pcc, fstrans_lwp_pcd, NULL); 144 KASSERT(fstrans_lwp_cache != NULL); 145 fstrans_mount_hashtab = hashinit(FSTRANS_MOUNT_HASHSIZE, HASH_SLIST, 146 true, &fstrans_mount_hashmask); 147} 148 149/* 150 * pool_cache constructor for fstrans_lwp_info. Updating the global list 151 * produces cache misses on MP. Minimise by keeping free entries on list. 152 */ 153int 154fstrans_lwp_pcc(void *arg, void *obj, int flags) 155{ 156 struct fstrans_lwp_info *fli = obj; 157 158 memset(fli, 0, sizeof(*fli)); 159 160 mutex_enter(&fstrans_lock); 161 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list); 162 mutex_exit(&fstrans_lock); 163 164 return 0; 165} 166 167/* 168 * pool_cache destructor 169 */ 170void 171fstrans_lwp_pcd(void *arg, void *obj) 172{ 173 struct fstrans_lwp_info *fli = obj; 174 175 mutex_enter(&fstrans_lock); 176 LIST_REMOVE(fli, fli_list); 177 mutex_exit(&fstrans_lock); 178} 179 180/* 181 * Deallocate lwp state. 182 */ 183void 184fstrans_lwp_dtor(lwp_t *l) 185{ 186 struct fstrans_lwp_info *fli, *fli_next; 187 188 if (l->l_fstrans == NULL) 189 return; 190 191 mutex_enter(&fstrans_lock); 192 for (fli = l->l_fstrans; fli; fli = fli_next) { 193 KASSERT(fli->fli_trans_cnt == 0); 194 KASSERT(fli->fli_cow_cnt == 0); 195 KASSERT(fli->fli_self == l); 196 if (fli->fli_mount != NULL) 197 fstrans_mount_dtor(fli->fli_mountinfo); 198 fli_next = fli->fli_succ; 199 fli->fli_alias_cnt = 0; 200 fli->fli_mount = NULL; 201 fli->fli_alias = NULL; 202 fli->fli_mountinfo = NULL; 203 fli->fli_self = NULL; 204 } 205 mutex_exit(&fstrans_lock); 206 207 for (fli = l->l_fstrans; fli; fli = fli_next) { 208 fli_next = fli->fli_succ; 209 pool_cache_put(fstrans_lwp_cache, fli); 210 } 211 l->l_fstrans = NULL; 212} 213 214/* 215 * mount pointer to hash 216 */ 217static inline uint32_t 218fstrans_mount_hash(struct mount *mp) 219{ 220 221 return hash32_buf(&mp, sizeof(mp), HASH32_BUF_INIT) & 222 fstrans_mount_hashmask; 223} 224 225/* 226 * retrieve fstrans_mount_info by mount or NULL 227 */ 228static inline struct fstrans_mount_info * 229fstrans_mount_get(struct mount *mp) 230{ 231 uint32_t indx; 232 struct fstrans_mount_info *fmi, *fmi_lower; 233 234 KASSERT(mutex_owned(&fstrans_lock)); 235 236 indx = fstrans_mount_hash(mp); 237 SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) { 238 if (fmi->fmi_mount == mp) { 239 if (__predict_false(mp->mnt_lower != NULL && 240 fmi->fmi_lower_info == NULL)) { 241 /* 242 * Intern the lower/lowest mount into 243 * this mount info on first lookup. 244 */ 245 KASSERT(fmi->fmi_ref_cnt == 1); 246 247 fmi_lower = fstrans_mount_get(mp->mnt_lower); 248 if (fmi_lower && fmi_lower->fmi_lower_info) 249 fmi_lower = fmi_lower->fmi_lower_info; 250 if (fmi_lower == NULL) 251 return NULL; 252 fmi->fmi_lower_info = fmi_lower; 253 fmi->fmi_lower_info->fmi_ref_cnt += 1; 254 } 255 return fmi; 256 } 257 } 258 259 return NULL; 260} 261 262/* 263 * Dereference mount state. 264 */ 265static void 266fstrans_mount_dtor(struct fstrans_mount_info *fmi) 267{ 268 269 KASSERT(mutex_owned(&fstrans_lock)); 270 271 KASSERT(fmi != NULL); 272 fmi->fmi_ref_cnt -= 1; 273 if (__predict_true(fmi->fmi_ref_cnt > 0)) { 274 return; 275 } 276 277 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 278 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL); 279 KASSERT(fmi->fmi_owner == NULL); 280 281 if (fmi->fmi_lower_info) 282 fstrans_mount_dtor(fmi->fmi_lower_info); 283 284 KASSERT(fstrans_gone_count > 0); 285 fstrans_gone_count -= 1; 286 287 KASSERT(fmi->fmi_mount->mnt_lower == NULL); 288 289 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount)); 290 kmem_free(fmi, sizeof(*fmi)); 291} 292 293/* 294 * Allocate mount state. 295 */ 296int 297fstrans_mount(struct mount *mp) 298{ 299 uint32_t indx; 300 struct fstrans_mount_info *newfmi; 301 302 indx = fstrans_mount_hash(mp); 303 304 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP); 305 newfmi->fmi_state = FSTRANS_NORMAL; 306 newfmi->fmi_ref_cnt = 1; 307 newfmi->fmi_gone = false; 308 LIST_INIT(&newfmi->fmi_cow_handler); 309 newfmi->fmi_cow_change = false; 310 newfmi->fmi_mount = mp; 311 newfmi->fmi_lower_info = NULL; 312 newfmi->fmi_owner = NULL; 313 314 mutex_enter(&fstrans_lock); 315 SLIST_INSERT_HEAD(&fstrans_mount_hashtab[indx], newfmi, fmi_hash); 316 mutex_exit(&fstrans_lock); 317 318 return 0; 319} 320 321/* 322 * Deallocate mount state. 323 */ 324void 325fstrans_unmount(struct mount *mp) 326{ 327 uint32_t indx; 328 struct fstrans_mount_info *fmi; 329 330 indx = fstrans_mount_hash(mp); 331 332 mutex_enter(&fstrans_lock); 333 fmi = fstrans_mount_get(mp); 334 KASSERT(fmi != NULL); 335 fmi->fmi_gone = true; 336 SLIST_REMOVE(&fstrans_mount_hashtab[indx], 337 fmi, fstrans_mount_info, fmi_hash); 338 fstrans_gone_count += 1; 339 fstrans_mount_dtor(fmi); 340 mutex_exit(&fstrans_lock); 341} 342 343/* 344 * Clear mount entries whose mount is gone. 345 */ 346static void 347fstrans_clear_lwp_info(void) 348{ 349 struct fstrans_lwp_info **p, *fli, *tofree = NULL; 350 351 /* 352 * Scan our list clearing entries whose mount is gone. 353 */ 354 mutex_enter(&fstrans_lock); 355 for (p = &curlwp->l_fstrans; *p; ) { 356 fli = *p; 357 if (fli->fli_mount != NULL && 358 fli->fli_mountinfo->fmi_gone && 359 fli->fli_trans_cnt == 0 && 360 fli->fli_cow_cnt == 0 && 361 fli->fli_alias_cnt == 0) { 362 *p = (*p)->fli_succ; 363 fstrans_mount_dtor(fli->fli_mountinfo); 364 if (fli->fli_alias) { 365 KASSERT(fli->fli_alias->fli_alias_cnt > 0); 366 fli->fli_alias->fli_alias_cnt--; 367 } 368 fli->fli_mount = NULL; 369 fli->fli_alias = NULL; 370 fli->fli_mountinfo = NULL; 371 fli->fli_self = NULL; 372 p = &curlwp->l_fstrans; 373 fli->fli_succ = tofree; 374 tofree = fli; 375 } else { 376 p = &(*p)->fli_succ; 377 } 378 } 379#ifdef DIAGNOSTIC 380 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) 381 if (fli->fli_alias != NULL) 382 KASSERT(fli->fli_alias->fli_self == curlwp); 383#endif /* DIAGNOSTIC */ 384 mutex_exit(&fstrans_lock); 385 386 while (tofree != NULL) { 387 fli = tofree; 388 tofree = fli->fli_succ; 389 pool_cache_put(fstrans_lwp_cache, fli); 390 } 391} 392 393/* 394 * Allocate and return per lwp info for this mount. 395 */ 396static struct fstrans_lwp_info * 397fstrans_alloc_lwp_info(struct mount *mp) 398{ 399 struct fstrans_lwp_info *fli, *fli_lower; 400 struct fstrans_mount_info *fmi; 401 402 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 403 if (fli->fli_mount == mp) 404 return fli; 405 } 406 407 /* 408 * Lookup mount info and get lower mount per lwp info. 409 */ 410 mutex_enter(&fstrans_lock); 411 fmi = fstrans_mount_get(mp); 412 if (fmi == NULL) { 413 mutex_exit(&fstrans_lock); 414 return NULL; 415 } 416 fmi->fmi_ref_cnt += 1; 417 mutex_exit(&fstrans_lock); 418 419 if (fmi->fmi_lower_info) { 420 fli_lower = 421 fstrans_alloc_lwp_info(fmi->fmi_lower_info->fmi_mount); 422 if (fli_lower == NULL) { 423 mutex_enter(&fstrans_lock); 424 fstrans_mount_dtor(fmi); 425 mutex_exit(&fstrans_lock); 426 427 return NULL; 428 } 429 } else { 430 fli_lower = NULL; 431 } 432 433 /* 434 * Allocate a new entry. 435 */ 436 fli = pool_cache_get(fstrans_lwp_cache, PR_WAITOK); 437 KASSERT(fli->fli_trans_cnt == 0); 438 KASSERT(fli->fli_cow_cnt == 0); 439 KASSERT(fli->fli_alias_cnt == 0); 440 KASSERT(fli->fli_mount == NULL); 441 KASSERT(fli->fli_alias == NULL); 442 KASSERT(fli->fli_mountinfo == NULL); 443 KASSERT(fli->fli_self == NULL); 444 445 /* 446 * Attach the mount info and alias. 447 */ 448 449 fli->fli_self = curlwp; 450 fli->fli_mount = mp; 451 fli->fli_mountinfo = fmi; 452 453 fli->fli_succ = curlwp->l_fstrans; 454 curlwp->l_fstrans = fli; 455 456 if (fli_lower) { 457 fli->fli_alias = fli_lower; 458 fli->fli_alias->fli_alias_cnt++; 459 fli = fli->fli_alias; 460 } 461 462 return fli; 463} 464 465/* 466 * Retrieve the per lwp info for this mount allocating if necessary. 467 */ 468static inline struct fstrans_lwp_info * 469fstrans_get_lwp_info(struct mount *mp, bool do_alloc) 470{ 471 struct fstrans_lwp_info *fli; 472 473 /* 474 * Scan our list for a match. 475 */ 476 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 477 if (fli->fli_mount == mp) { 478 KASSERT(mp->mnt_lower == NULL || 479 fli->fli_alias != NULL); 480 if (fli->fli_alias != NULL) 481 fli = fli->fli_alias; 482 break; 483 } 484 } 485 486 if (do_alloc) { 487 if (__predict_false(fli == NULL)) 488 fli = fstrans_alloc_lwp_info(mp); 489 } 490 491 return fli; 492} 493 494/* 495 * Check if this lock type is granted at this state. 496 */ 497static bool 498grant_lock(const struct fstrans_mount_info *fmi, 499 const enum fstrans_lock_type type) 500{ 501 502 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) 503 return true; 504 if (fmi->fmi_owner == curlwp) 505 return true; 506 if (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY) 507 return true; 508 509 return false; 510} 511 512/* 513 * Start a transaction. If this thread already has a transaction on this 514 * file system increment the reference counter. 515 */ 516static inline int 517_fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 518{ 519 int s; 520 struct fstrans_lwp_info *fli; 521 struct fstrans_mount_info *fmi; 522 523 ASSERT_SLEEPABLE(); 524 525 fli = fstrans_get_lwp_info(mp, true); 526 if (fli == NULL) 527 return 0; 528 fmi = fli->fli_mountinfo; 529 530 if (fli->fli_trans_cnt > 0) { 531 fli->fli_trans_cnt += 1; 532 533 return 0; 534 } 535 536 s = pserialize_read_enter(); 537 if (__predict_true(grant_lock(fmi, lock_type))) { 538 fli->fli_trans_cnt = 1; 539 fli->fli_lock_type = lock_type; 540 pserialize_read_exit(s); 541 542 return 0; 543 } 544 pserialize_read_exit(s); 545 546 if (! wait) 547 return EBUSY; 548 549 mutex_enter(&fstrans_lock); 550 while (! grant_lock(fmi, lock_type)) 551 cv_wait(&fstrans_state_cv, &fstrans_lock); 552 fli->fli_trans_cnt = 1; 553 fli->fli_lock_type = lock_type; 554 mutex_exit(&fstrans_lock); 555 556 return 0; 557} 558 559void 560fstrans_start(struct mount *mp) 561{ 562 int error __diagused; 563 564 error = _fstrans_start(mp, FSTRANS_SHARED, 1); 565 KASSERT(error == 0); 566} 567 568int 569fstrans_start_nowait(struct mount *mp) 570{ 571 572 return _fstrans_start(mp, FSTRANS_SHARED, 0); 573} 574 575void 576fstrans_start_lazy(struct mount *mp) 577{ 578 int error __diagused; 579 580 error = _fstrans_start(mp, FSTRANS_LAZY, 1); 581 KASSERT(error == 0); 582} 583 584/* 585 * Finish a transaction. 586 */ 587void 588fstrans_done(struct mount *mp) 589{ 590 int s; 591 struct fstrans_lwp_info *fli; 592 struct fstrans_mount_info *fmi; 593 594 fli = fstrans_get_lwp_info(mp, false); 595 if (fli == NULL) 596 return; 597 fmi = fli->fli_mountinfo; 598 KASSERT(fli->fli_trans_cnt > 0); 599 600 if (fli->fli_trans_cnt > 1) { 601 fli->fli_trans_cnt -= 1; 602 603 return; 604 } 605 606 if (__predict_false(fstrans_gone_count > 0)) 607 fstrans_clear_lwp_info(); 608 609 s = pserialize_read_enter(); 610 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) { 611 fli->fli_trans_cnt = 0; 612 pserialize_read_exit(s); 613 614 return; 615 } 616 pserialize_read_exit(s); 617 618 mutex_enter(&fstrans_lock); 619 fli->fli_trans_cnt = 0; 620 cv_signal(&fstrans_count_cv); 621 mutex_exit(&fstrans_lock); 622} 623 624/* 625 * Check if we hold an lock. 626 */ 627int 628fstrans_held(struct mount *mp) 629{ 630 struct fstrans_lwp_info *fli; 631 struct fstrans_mount_info *fmi; 632 633 KASSERT(mp != dead_rootmount); 634 635 fli = fstrans_get_lwp_info(mp, false); 636 if (fli == NULL) 637 return 0; 638 fmi = fli->fli_mountinfo; 639 640 return (fli->fli_trans_cnt > 0 || fmi->fmi_owner == curlwp); 641} 642 643/* 644 * Check if this thread has an exclusive lock. 645 */ 646int 647fstrans_is_owner(struct mount *mp) 648{ 649 struct fstrans_lwp_info *fli; 650 struct fstrans_mount_info *fmi; 651 652 KASSERT(mp != dead_rootmount); 653 654 fli = fstrans_get_lwp_info(mp, false); 655 if (fli == NULL) 656 return 0; 657 fmi = fli->fli_mountinfo; 658 659 return (fmi->fmi_owner == curlwp); 660} 661 662/* 663 * True, if no thread is in a transaction not granted at the current state. 664 */ 665static bool 666state_change_done(const struct fstrans_mount_info *fmi) 667{ 668 struct fstrans_lwp_info *fli; 669 670 KASSERT(mutex_owned(&fstrans_lock)); 671 672 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 673 if (fli->fli_mountinfo != fmi) 674 continue; 675 if (fli->fli_trans_cnt == 0) 676 continue; 677 if (fli->fli_self == curlwp) 678 continue; 679 if (grant_lock(fmi, fli->fli_lock_type)) 680 continue; 681 682 return false; 683 } 684 685 return true; 686} 687 688/* 689 * Set new file system state. 690 */ 691int 692fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 693{ 694 int error; 695 enum fstrans_state old_state; 696 struct fstrans_lwp_info *fli; 697 struct fstrans_mount_info *fmi; 698 699 KASSERT(mp != dead_rootmount); 700 701 fli = fstrans_get_lwp_info(mp, true); 702 if (fli == NULL) 703 return ENOENT; 704 fmi = fli->fli_mountinfo; 705 old_state = fmi->fmi_state; 706 if (old_state == new_state) 707 return 0; 708 709 mutex_enter(&fstrans_lock); 710 fmi->fmi_state = new_state; 711 pserialize_perform(fstrans_psz); 712 713 /* 714 * All threads see the new state now. 715 * Wait for transactions invalid at this state to leave. 716 */ 717 error = 0; 718 while (! state_change_done(fmi)) { 719 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock); 720 if (error) { 721 new_state = fmi->fmi_state = FSTRANS_NORMAL; 722 break; 723 } 724 } 725 if (old_state != new_state) { 726 if (old_state == FSTRANS_NORMAL) { 727 KASSERT(fmi->fmi_owner == NULL); 728 fmi->fmi_owner = curlwp; 729 } 730 if (new_state == FSTRANS_NORMAL) { 731 KASSERT(fmi->fmi_owner == curlwp); 732 fmi->fmi_owner = NULL; 733 } 734 } 735 cv_broadcast(&fstrans_state_cv); 736 mutex_exit(&fstrans_lock); 737 738 return error; 739} 740 741/* 742 * Get current file system state. 743 */ 744enum fstrans_state 745fstrans_getstate(struct mount *mp) 746{ 747 struct fstrans_lwp_info *fli; 748 struct fstrans_mount_info *fmi; 749 750 KASSERT(mp != dead_rootmount); 751 752 fli = fstrans_get_lwp_info(mp, true); 753 KASSERT(fli != NULL); 754 fmi = fli->fli_mountinfo; 755 756 return fmi->fmi_state; 757} 758 759/* 760 * Request a filesystem to suspend all operations. 761 */ 762int 763vfs_suspend(struct mount *mp, int nowait) 764{ 765 struct fstrans_lwp_info *fli; 766 int error; 767 768 if (mp == dead_rootmount) 769 return EOPNOTSUPP; 770 771 fli = fstrans_get_lwp_info(mp, true); 772 if (fli == NULL) 773 return ENOENT; 774 775 if (nowait) { 776 if (!mutex_tryenter(&vfs_suspend_lock)) 777 return EWOULDBLOCK; 778 } else 779 mutex_enter(&vfs_suspend_lock); 780 781 if ((error = VFS_SUSPENDCTL(fli->fli_mount, SUSPEND_SUSPEND)) != 0) { 782 mutex_exit(&vfs_suspend_lock); 783 return error; 784 } 785 786 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 787 vfs_resume(mp); 788 return ENOENT; 789 } 790 791 return 0; 792} 793 794/* 795 * Request a filesystem to resume all operations. 796 */ 797void 798vfs_resume(struct mount *mp) 799{ 800 struct fstrans_lwp_info *fli; 801 802 KASSERT(mp != dead_rootmount); 803 804 fli = fstrans_get_lwp_info(mp, false); 805 mp = fli->fli_mount; 806 807 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 808 mutex_exit(&vfs_suspend_lock); 809} 810 811 812/* 813 * True, if no thread is running a cow handler. 814 */ 815static bool 816cow_state_change_done(const struct fstrans_mount_info *fmi) 817{ 818 struct fstrans_lwp_info *fli; 819 820 KASSERT(mutex_owned(&fstrans_lock)); 821 KASSERT(fmi->fmi_cow_change); 822 823 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 824 if (fli->fli_mount != fmi->fmi_mount) 825 continue; 826 if (fli->fli_cow_cnt == 0) 827 continue; 828 829 return false; 830 } 831 832 return true; 833} 834 835/* 836 * Prepare for changing this mounts cow list. 837 * Returns with fstrans_lock locked. 838 */ 839static void 840cow_change_enter(struct fstrans_mount_info *fmi) 841{ 842 843 mutex_enter(&fstrans_lock); 844 845 /* 846 * Wait for other threads changing the list. 847 */ 848 while (fmi->fmi_cow_change) 849 cv_wait(&fstrans_state_cv, &fstrans_lock); 850 851 /* 852 * Wait until all threads are aware of a state change. 853 */ 854 fmi->fmi_cow_change = true; 855 pserialize_perform(fstrans_psz); 856 857 while (! cow_state_change_done(fmi)) 858 cv_wait(&fstrans_count_cv, &fstrans_lock); 859} 860 861/* 862 * Done changing this mounts cow list. 863 */ 864static void 865cow_change_done(struct fstrans_mount_info *fmi) 866{ 867 868 KASSERT(mutex_owned(&fstrans_lock)); 869 870 fmi->fmi_cow_change = false; 871 pserialize_perform(fstrans_psz); 872 873 cv_broadcast(&fstrans_state_cv); 874 875 mutex_exit(&fstrans_lock); 876} 877 878/* 879 * Add a handler to this mount. 880 */ 881int 882fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 883 void *arg) 884{ 885 struct fstrans_mount_info *fmi; 886 struct fscow_handler *newch; 887 888 KASSERT(mp != dead_rootmount); 889 890 mutex_enter(&fstrans_lock); 891 fmi = fstrans_mount_get(mp); 892 KASSERT(fmi != NULL); 893 fmi->fmi_ref_cnt += 1; 894 mutex_exit(&fstrans_lock); 895 896 newch = kmem_alloc(sizeof(*newch), KM_SLEEP); 897 newch->ch_func = func; 898 newch->ch_arg = arg; 899 900 cow_change_enter(fmi); 901 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list); 902 cow_change_done(fmi); 903 904 return 0; 905} 906 907/* 908 * Remove a handler from this mount. 909 */ 910int 911fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 912 void *arg) 913{ 914 struct fstrans_mount_info *fmi; 915 struct fscow_handler *hp = NULL; 916 917 KASSERT(mp != dead_rootmount); 918 919 mutex_enter(&fstrans_lock); 920 fmi = fstrans_mount_get(mp); 921 KASSERT(fmi != NULL); 922 mutex_exit(&fstrans_lock); 923 924 cow_change_enter(fmi); 925 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 926 if (hp->ch_func == func && hp->ch_arg == arg) 927 break; 928 if (hp != NULL) { 929 LIST_REMOVE(hp, ch_list); 930 kmem_free(hp, sizeof(*hp)); 931 } 932 fstrans_mount_dtor(fmi); 933 cow_change_done(fmi); 934 935 return hp ? 0 : EINVAL; 936} 937 938/* 939 * Check for need to copy block that is about to be written. 940 */ 941int 942fscow_run(struct buf *bp, bool data_valid) 943{ 944 int error, s; 945 struct mount *mp; 946 struct fstrans_lwp_info *fli; 947 struct fstrans_mount_info *fmi; 948 struct fscow_handler *hp; 949 950 /* 951 * First check if we need run the copy-on-write handler. 952 */ 953 if ((bp->b_flags & B_COWDONE)) 954 return 0; 955 if (bp->b_vp == NULL) { 956 bp->b_flags |= B_COWDONE; 957 return 0; 958 } 959 if (bp->b_vp->v_type == VBLK) 960 mp = spec_node_getmountedfs(bp->b_vp); 961 else 962 mp = bp->b_vp->v_mount; 963 if (mp == NULL || mp == dead_rootmount) { 964 bp->b_flags |= B_COWDONE; 965 return 0; 966 } 967 968 fli = fstrans_get_lwp_info(mp, true); 969 KASSERT(fli != NULL); 970 fmi = fli->fli_mountinfo; 971 972 /* 973 * On non-recursed run check if other threads 974 * want to change the list. 975 */ 976 if (fli->fli_cow_cnt == 0) { 977 s = pserialize_read_enter(); 978 if (__predict_false(fmi->fmi_cow_change)) { 979 pserialize_read_exit(s); 980 mutex_enter(&fstrans_lock); 981 while (fmi->fmi_cow_change) 982 cv_wait(&fstrans_state_cv, &fstrans_lock); 983 fli->fli_cow_cnt = 1; 984 mutex_exit(&fstrans_lock); 985 } else { 986 fli->fli_cow_cnt = 1; 987 pserialize_read_exit(s); 988 } 989 } else 990 fli->fli_cow_cnt += 1; 991 992 /* 993 * Run all copy-on-write handlers, stop on error. 994 */ 995 error = 0; 996 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 997 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 998 break; 999 if (error == 0) 1000 bp->b_flags |= B_COWDONE; 1001 1002 /* 1003 * Check if other threads want to change the list. 1004 */ 1005 if (fli->fli_cow_cnt > 1) { 1006 fli->fli_cow_cnt -= 1; 1007 } else { 1008 s = pserialize_read_enter(); 1009 if (__predict_false(fmi->fmi_cow_change)) { 1010 pserialize_read_exit(s); 1011 mutex_enter(&fstrans_lock); 1012 fli->fli_cow_cnt = 0; 1013 cv_signal(&fstrans_count_cv); 1014 mutex_exit(&fstrans_lock); 1015 } else { 1016 fli->fli_cow_cnt = 0; 1017 pserialize_read_exit(s); 1018 } 1019 } 1020 1021 return error; 1022} 1023 1024#if defined(DDB) 1025void fstrans_dump(int); 1026 1027static void 1028fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 1029{ 1030 char prefix[9]; 1031 struct fstrans_lwp_info *fli; 1032 1033 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 1034 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 1035 if (fli->fli_self != l) 1036 continue; 1037 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) { 1038 if (! verbose) 1039 continue; 1040 } 1041 printf("%-8s", prefix); 1042 if (verbose) 1043 printf(" @%p", fli); 1044 if (fli->fli_mount == dead_rootmount) 1045 printf(" <dead>"); 1046 else if (fli->fli_mount != NULL) 1047 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 1048 else 1049 printf(" NULL"); 1050 if (fli->fli_alias != NULL) { 1051 struct mount *amp = fli->fli_alias->fli_mount; 1052 1053 printf(" alias"); 1054 if (verbose) 1055 printf(" @%p", fli->fli_alias); 1056 if (amp == NULL) 1057 printf(" NULL"); 1058 else 1059 printf(" (%s)", amp->mnt_stat.f_mntonname); 1060 } 1061 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone) 1062 printf(" gone"); 1063 if (fli->fli_trans_cnt == 0) { 1064 printf(" -"); 1065 } else { 1066 switch (fli->fli_lock_type) { 1067 case FSTRANS_LAZY: 1068 printf(" lazy"); 1069 break; 1070 case FSTRANS_SHARED: 1071 printf(" shared"); 1072 break; 1073 default: 1074 printf(" %#x", fli->fli_lock_type); 1075 break; 1076 } 1077 } 1078 printf(" %d cow %d alias %d\n", 1079 fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt); 1080 prefix[0] = '\0'; 1081 } 1082} 1083 1084static void 1085fstrans_print_mount(struct mount *mp, int verbose) 1086{ 1087 uint32_t indx; 1088 struct fstrans_mount_info *fmi; 1089 1090 indx = fstrans_mount_hash(mp); 1091 SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) 1092 if (fmi->fmi_mount == mp) 1093 break; 1094 1095 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 1096 return; 1097 1098 printf("%-16s ", mp->mnt_stat.f_mntonname); 1099 if (fmi == NULL) { 1100 printf("(null)\n"); 1101 return; 1102 } 1103 printf("owner %p ", fmi->fmi_owner); 1104 switch (fmi->fmi_state) { 1105 case FSTRANS_NORMAL: 1106 printf("state normal\n"); 1107 break; 1108 case FSTRANS_SUSPENDING: 1109 printf("state suspending\n"); 1110 break; 1111 case FSTRANS_SUSPENDED: 1112 printf("state suspended\n"); 1113 break; 1114 default: 1115 printf("state %#x\n", fmi->fmi_state); 1116 break; 1117 } 1118} 1119 1120void 1121fstrans_dump(int full) 1122{ 1123 const struct proclist_desc *pd; 1124 struct proc *p; 1125 struct lwp *l; 1126 struct mount *mp; 1127 1128 printf("Fstrans locks by lwp:\n"); 1129 for (pd = proclists; pd->pd_list != NULL; pd++) 1130 PROCLIST_FOREACH(p, pd->pd_list) 1131 LIST_FOREACH(l, &p->p_lwps, l_sibling) 1132 fstrans_print_lwp(p, l, full == 1); 1133 1134 printf("Fstrans state by mount:\n"); 1135 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) 1136 fstrans_print_mount(mp, full == 1); 1137} 1138#endif /* defined(DDB) */ 1139