vfs_trans.c revision 1.69
1/* $NetBSD: vfs_trans.c,v 1.69 2022/10/26 23:39:43 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2007, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.69 2022/10/26 23:39:43 riastradh Exp $"); 34 35/* 36 * File system transaction operations. 37 */ 38 39#ifdef _KERNEL_OPT 40#include "opt_ddb.h" 41#endif 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/atomic.h> 46#include <sys/buf.h> 47#include <sys/hash.h> 48#include <sys/kmem.h> 49#include <sys/mount.h> 50#include <sys/pserialize.h> 51#include <sys/vnode.h> 52#include <sys/fstrans.h> 53#include <sys/proc.h> 54#include <sys/pool.h> 55 56#include <miscfs/deadfs/deadfs.h> 57#include <miscfs/specfs/specdev.h> 58 59#define FSTRANS_MOUNT_HASHSIZE 32 60 61enum fstrans_lock_type { 62 FSTRANS_LAZY, /* Granted while not suspended */ 63 FSTRANS_SHARED /* Granted while not suspending */ 64}; 65 66struct fscow_handler { 67 LIST_ENTRY(fscow_handler) ch_list; 68 int (*ch_func)(void *, struct buf *, bool); 69 void *ch_arg; 70}; 71struct fstrans_lwp_info { 72 struct fstrans_lwp_info *fli_succ; 73 struct lwp *fli_self; 74 struct mount *fli_mount; 75 struct fstrans_lwp_info *fli_alias; 76 struct fstrans_mount_info *fli_mountinfo; 77 int fli_trans_cnt; 78 int fli_alias_cnt; 79 int fli_cow_cnt; 80 enum fstrans_lock_type fli_lock_type; 81 LIST_ENTRY(fstrans_lwp_info) fli_list; 82}; 83struct fstrans_mount_info { 84 enum fstrans_state fmi_state; 85 unsigned int fmi_ref_cnt; 86 bool fmi_gone; 87 bool fmi_cow_change; 88 SLIST_ENTRY(fstrans_mount_info) fmi_hash; 89 LIST_HEAD(, fscow_handler) fmi_cow_handler; 90 struct mount *fmi_mount; 91 struct fstrans_mount_info *fmi_lower_info; 92 struct lwp *fmi_owner; 93}; 94SLIST_HEAD(fstrans_mount_hashhead, fstrans_mount_info); 95 96static kmutex_t vfs_suspend_lock /* Serialize suspensions. */ 97 __cacheline_aligned; 98static kmutex_t fstrans_lock /* Fstrans big lock. */ 99 __cacheline_aligned; 100static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */ 101static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */ 102static pserialize_t fstrans_psz; /* Pserialize state. */ 103static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head; 104 /* List of all fstrans_lwp_info. */ 105static pool_cache_t fstrans_lwp_cache; /* Cache of fstrans_lwp_info. */ 106 107static u_long fstrans_mount_hashmask; 108static struct fstrans_mount_hashhead *fstrans_mount_hashtab; 109static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */ 110 111static inline uint32_t fstrans_mount_hash(struct mount *); 112static inline struct fstrans_mount_info *fstrans_mount_get(struct mount *); 113static void fstrans_mount_dtor(struct fstrans_mount_info *); 114static void fstrans_clear_lwp_info(void); 115static inline struct fstrans_lwp_info * 116 fstrans_get_lwp_info(struct mount *, bool); 117static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *); 118static int fstrans_lwp_pcc(void *, void *, int); 119static void fstrans_lwp_pcd(void *, void *); 120static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int); 121static bool grant_lock(const struct fstrans_mount_info *, 122 const enum fstrans_lock_type); 123static bool state_change_done(const struct fstrans_mount_info *); 124static bool cow_state_change_done(const struct fstrans_mount_info *); 125static void cow_change_enter(struct fstrans_mount_info *); 126static void cow_change_done(struct fstrans_mount_info *); 127 128/* 129 * Initialize. 130 */ 131void 132fstrans_init(void) 133{ 134 135 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 136 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE); 137 cv_init(&fstrans_state_cv, "fstchg"); 138 cv_init(&fstrans_count_cv, "fstcnt"); 139 fstrans_psz = pserialize_create(); 140 LIST_INIT(&fstrans_fli_head); 141 fstrans_lwp_cache = pool_cache_init(sizeof(struct fstrans_lwp_info), 142 coherency_unit, 0, 0, "fstlwp", NULL, IPL_NONE, 143 fstrans_lwp_pcc, fstrans_lwp_pcd, NULL); 144 KASSERT(fstrans_lwp_cache != NULL); 145 fstrans_mount_hashtab = hashinit(FSTRANS_MOUNT_HASHSIZE, HASH_SLIST, 146 true, &fstrans_mount_hashmask); 147} 148 149/* 150 * pool_cache constructor for fstrans_lwp_info. Updating the global list 151 * produces cache misses on MP. Minimise by keeping free entries on list. 152 */ 153int 154fstrans_lwp_pcc(void *arg, void *obj, int flags) 155{ 156 struct fstrans_lwp_info *fli = obj; 157 158 memset(fli, 0, sizeof(*fli)); 159 160 mutex_enter(&fstrans_lock); 161 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list); 162 mutex_exit(&fstrans_lock); 163 164 return 0; 165} 166 167/* 168 * pool_cache destructor 169 */ 170void 171fstrans_lwp_pcd(void *arg, void *obj) 172{ 173 struct fstrans_lwp_info *fli = obj; 174 175 mutex_enter(&fstrans_lock); 176 LIST_REMOVE(fli, fli_list); 177 mutex_exit(&fstrans_lock); 178} 179 180/* 181 * Deallocate lwp state. 182 */ 183void 184fstrans_lwp_dtor(lwp_t *l) 185{ 186 struct fstrans_lwp_info *fli, *fli_next; 187 188 if (l->l_fstrans == NULL) 189 return; 190 191 mutex_enter(&fstrans_lock); 192 for (fli = l->l_fstrans; fli; fli = fli_next) { 193 KASSERT(fli->fli_trans_cnt == 0); 194 KASSERT(fli->fli_cow_cnt == 0); 195 KASSERT(fli->fli_self == l); 196 if (fli->fli_mount != NULL) 197 fstrans_mount_dtor(fli->fli_mountinfo); 198 fli_next = fli->fli_succ; 199 fli->fli_alias_cnt = 0; 200 fli->fli_mount = NULL; 201 fli->fli_alias = NULL; 202 fli->fli_mountinfo = NULL; 203 fli->fli_self = NULL; 204 } 205 mutex_exit(&fstrans_lock); 206 207 for (fli = l->l_fstrans; fli; fli = fli_next) { 208 fli_next = fli->fli_succ; 209 pool_cache_put(fstrans_lwp_cache, fli); 210 } 211 l->l_fstrans = NULL; 212} 213 214/* 215 * mount pointer to hash 216 */ 217static inline uint32_t 218fstrans_mount_hash(struct mount *mp) 219{ 220 221 return hash32_buf(&mp, sizeof(mp), HASH32_BUF_INIT) & 222 fstrans_mount_hashmask; 223} 224 225/* 226 * retrieve fstrans_mount_info by mount or NULL 227 */ 228static inline struct fstrans_mount_info * 229fstrans_mount_get(struct mount *mp) 230{ 231 uint32_t indx; 232 struct fstrans_mount_info *fmi, *fmi_lower; 233 234 KASSERT(mutex_owned(&fstrans_lock)); 235 236 indx = fstrans_mount_hash(mp); 237 SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) { 238 if (fmi->fmi_mount == mp) { 239 if (__predict_false(mp->mnt_lower != NULL && 240 fmi->fmi_lower_info == NULL)) { 241 /* 242 * Intern the lower/lowest mount into 243 * this mount info on first lookup. 244 */ 245 KASSERT(fmi->fmi_ref_cnt == 1); 246 247 fmi_lower = fstrans_mount_get(mp->mnt_lower); 248 if (fmi_lower && fmi_lower->fmi_lower_info) 249 fmi_lower = fmi_lower->fmi_lower_info; 250 if (fmi_lower == NULL) 251 return NULL; 252 fmi->fmi_lower_info = fmi_lower; 253 fmi->fmi_lower_info->fmi_ref_cnt += 1; 254 } 255 return fmi; 256 } 257 } 258 259 return NULL; 260} 261 262/* 263 * Dereference mount state. 264 */ 265static void 266fstrans_mount_dtor(struct fstrans_mount_info *fmi) 267{ 268 269 KASSERT(mutex_owned(&fstrans_lock)); 270 271 KASSERT(fmi != NULL); 272 fmi->fmi_ref_cnt -= 1; 273 if (__predict_true(fmi->fmi_ref_cnt > 0)) { 274 return; 275 } 276 277 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 278 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL); 279 KASSERT(fmi->fmi_owner == NULL); 280 281 if (fmi->fmi_lower_info) 282 fstrans_mount_dtor(fmi->fmi_lower_info); 283 284 KASSERT(fstrans_gone_count > 0); 285 fstrans_gone_count -= 1; 286 287 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount)); 288 kmem_free(fmi, sizeof(*fmi)); 289} 290 291/* 292 * Allocate mount state. 293 */ 294int 295fstrans_mount(struct mount *mp) 296{ 297 uint32_t indx; 298 struct fstrans_mount_info *newfmi; 299 300 indx = fstrans_mount_hash(mp); 301 302 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP); 303 newfmi->fmi_state = FSTRANS_NORMAL; 304 newfmi->fmi_ref_cnt = 1; 305 newfmi->fmi_gone = false; 306 LIST_INIT(&newfmi->fmi_cow_handler); 307 newfmi->fmi_cow_change = false; 308 newfmi->fmi_mount = mp; 309 newfmi->fmi_lower_info = NULL; 310 newfmi->fmi_owner = NULL; 311 312 mutex_enter(&fstrans_lock); 313 SLIST_INSERT_HEAD(&fstrans_mount_hashtab[indx], newfmi, fmi_hash); 314 mutex_exit(&fstrans_lock); 315 316 return 0; 317} 318 319/* 320 * Deallocate mount state. 321 */ 322void 323fstrans_unmount(struct mount *mp) 324{ 325 uint32_t indx; 326 struct fstrans_mount_info *fmi; 327 328 indx = fstrans_mount_hash(mp); 329 330 mutex_enter(&fstrans_lock); 331 fmi = fstrans_mount_get(mp); 332 KASSERT(fmi != NULL); 333 fmi->fmi_gone = true; 334 SLIST_REMOVE(&fstrans_mount_hashtab[indx], 335 fmi, fstrans_mount_info, fmi_hash); 336 fstrans_gone_count += 1; 337 fstrans_mount_dtor(fmi); 338 mutex_exit(&fstrans_lock); 339} 340 341/* 342 * Clear mount entries whose mount is gone. 343 */ 344static void 345fstrans_clear_lwp_info(void) 346{ 347 struct fstrans_lwp_info **p, *fli, *tofree = NULL; 348 349 /* 350 * Scan our list clearing entries whose mount is gone. 351 */ 352 mutex_enter(&fstrans_lock); 353 for (p = &curlwp->l_fstrans; *p; ) { 354 fli = *p; 355 if (fli->fli_mount != NULL && 356 fli->fli_mountinfo->fmi_gone && 357 fli->fli_trans_cnt == 0 && 358 fli->fli_cow_cnt == 0 && 359 fli->fli_alias_cnt == 0) { 360 *p = (*p)->fli_succ; 361 fstrans_mount_dtor(fli->fli_mountinfo); 362 if (fli->fli_alias) { 363 KASSERT(fli->fli_alias->fli_alias_cnt > 0); 364 fli->fli_alias->fli_alias_cnt--; 365 } 366 fli->fli_mount = NULL; 367 fli->fli_alias = NULL; 368 fli->fli_mountinfo = NULL; 369 fli->fli_self = NULL; 370 p = &curlwp->l_fstrans; 371 fli->fli_succ = tofree; 372 tofree = fli; 373 } else { 374 p = &(*p)->fli_succ; 375 } 376 } 377#ifdef DIAGNOSTIC 378 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) 379 if (fli->fli_alias != NULL) 380 KASSERT(fli->fli_alias->fli_self == curlwp); 381#endif /* DIAGNOSTIC */ 382 mutex_exit(&fstrans_lock); 383 384 while (tofree != NULL) { 385 fli = tofree; 386 tofree = fli->fli_succ; 387 pool_cache_put(fstrans_lwp_cache, fli); 388 } 389} 390 391/* 392 * Allocate and return per lwp info for this mount. 393 */ 394static struct fstrans_lwp_info * 395fstrans_alloc_lwp_info(struct mount *mp) 396{ 397 struct fstrans_lwp_info *fli, *fli_lower; 398 struct fstrans_mount_info *fmi; 399 400 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 401 if (fli->fli_mount == mp) 402 return fli; 403 } 404 405 /* 406 * Lookup mount info and get lower mount per lwp info. 407 */ 408 mutex_enter(&fstrans_lock); 409 fmi = fstrans_mount_get(mp); 410 if (fmi == NULL) { 411 mutex_exit(&fstrans_lock); 412 return NULL; 413 } 414 fmi->fmi_ref_cnt += 1; 415 mutex_exit(&fstrans_lock); 416 417 if (fmi->fmi_lower_info) { 418 fli_lower = 419 fstrans_alloc_lwp_info(fmi->fmi_lower_info->fmi_mount); 420 if (fli_lower == NULL) { 421 mutex_enter(&fstrans_lock); 422 fstrans_mount_dtor(fmi); 423 mutex_exit(&fstrans_lock); 424 425 return NULL; 426 } 427 } else { 428 fli_lower = NULL; 429 } 430 431 /* 432 * Allocate a new entry. 433 */ 434 fli = pool_cache_get(fstrans_lwp_cache, PR_WAITOK); 435 KASSERT(fli->fli_trans_cnt == 0); 436 KASSERT(fli->fli_cow_cnt == 0); 437 KASSERT(fli->fli_alias_cnt == 0); 438 KASSERT(fli->fli_mount == NULL); 439 KASSERT(fli->fli_alias == NULL); 440 KASSERT(fli->fli_mountinfo == NULL); 441 KASSERT(fli->fli_self == NULL); 442 443 /* 444 * Attach the mount info and alias. 445 */ 446 447 fli->fli_self = curlwp; 448 fli->fli_mount = mp; 449 fli->fli_mountinfo = fmi; 450 451 fli->fli_succ = curlwp->l_fstrans; 452 curlwp->l_fstrans = fli; 453 454 if (fli_lower) { 455 fli->fli_alias = fli_lower; 456 fli->fli_alias->fli_alias_cnt++; 457 fli = fli->fli_alias; 458 } 459 460 return fli; 461} 462 463/* 464 * Retrieve the per lwp info for this mount allocating if necessary. 465 */ 466static inline struct fstrans_lwp_info * 467fstrans_get_lwp_info(struct mount *mp, bool do_alloc) 468{ 469 struct fstrans_lwp_info *fli; 470 471 /* 472 * Scan our list for a match. 473 */ 474 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 475 if (fli->fli_mount == mp) { 476 KASSERT((mp->mnt_lower == NULL) == 477 (fli->fli_alias == NULL)); 478 if (fli->fli_alias != NULL) 479 fli = fli->fli_alias; 480 break; 481 } 482 } 483 484 if (do_alloc) { 485 if (__predict_false(fli == NULL)) 486 fli = fstrans_alloc_lwp_info(mp); 487 } 488 489 return fli; 490} 491 492/* 493 * Check if this lock type is granted at this state. 494 */ 495static bool 496grant_lock(const struct fstrans_mount_info *fmi, 497 const enum fstrans_lock_type type) 498{ 499 500 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) 501 return true; 502 if (fmi->fmi_owner == curlwp) 503 return true; 504 if (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY) 505 return true; 506 507 return false; 508} 509 510/* 511 * Start a transaction. If this thread already has a transaction on this 512 * file system increment the reference counter. 513 */ 514static inline int 515_fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 516{ 517 int s; 518 struct fstrans_lwp_info *fli; 519 struct fstrans_mount_info *fmi; 520 521 ASSERT_SLEEPABLE(); 522 523 fli = fstrans_get_lwp_info(mp, true); 524 if (fli == NULL) 525 return 0; 526 fmi = fli->fli_mountinfo; 527 528 if (fli->fli_trans_cnt > 0) { 529 fli->fli_trans_cnt += 1; 530 531 return 0; 532 } 533 534 s = pserialize_read_enter(); 535 if (__predict_true(grant_lock(fmi, lock_type))) { 536 fli->fli_trans_cnt = 1; 537 fli->fli_lock_type = lock_type; 538 pserialize_read_exit(s); 539 540 return 0; 541 } 542 pserialize_read_exit(s); 543 544 if (! wait) 545 return EBUSY; 546 547 mutex_enter(&fstrans_lock); 548 while (! grant_lock(fmi, lock_type)) 549 cv_wait(&fstrans_state_cv, &fstrans_lock); 550 fli->fli_trans_cnt = 1; 551 fli->fli_lock_type = lock_type; 552 mutex_exit(&fstrans_lock); 553 554 return 0; 555} 556 557void 558fstrans_start(struct mount *mp) 559{ 560 int error __diagused; 561 562 error = _fstrans_start(mp, FSTRANS_SHARED, 1); 563 KASSERT(error == 0); 564} 565 566int 567fstrans_start_nowait(struct mount *mp) 568{ 569 570 return _fstrans_start(mp, FSTRANS_SHARED, 0); 571} 572 573void 574fstrans_start_lazy(struct mount *mp) 575{ 576 int error __diagused; 577 578 error = _fstrans_start(mp, FSTRANS_LAZY, 1); 579 KASSERT(error == 0); 580} 581 582/* 583 * Finish a transaction. 584 */ 585void 586fstrans_done(struct mount *mp) 587{ 588 int s; 589 struct fstrans_lwp_info *fli; 590 struct fstrans_mount_info *fmi; 591 592 fli = fstrans_get_lwp_info(mp, false); 593 if (fli == NULL) 594 return; 595 fmi = fli->fli_mountinfo; 596 KASSERT(fli->fli_trans_cnt > 0); 597 598 if (fli->fli_trans_cnt > 1) { 599 fli->fli_trans_cnt -= 1; 600 601 return; 602 } 603 604 if (__predict_false(fstrans_gone_count > 0)) 605 fstrans_clear_lwp_info(); 606 607 s = pserialize_read_enter(); 608 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) { 609 fli->fli_trans_cnt = 0; 610 pserialize_read_exit(s); 611 612 return; 613 } 614 pserialize_read_exit(s); 615 616 mutex_enter(&fstrans_lock); 617 fli->fli_trans_cnt = 0; 618 cv_signal(&fstrans_count_cv); 619 mutex_exit(&fstrans_lock); 620} 621 622/* 623 * Check if we hold an lock. 624 */ 625int 626fstrans_held(struct mount *mp) 627{ 628 struct fstrans_lwp_info *fli; 629 struct fstrans_mount_info *fmi; 630 631 KASSERT(mp != dead_rootmount); 632 633 fli = fstrans_get_lwp_info(mp, false); 634 if (fli == NULL) 635 return 0; 636 fmi = fli->fli_mountinfo; 637 638 return (fli->fli_trans_cnt > 0 || fmi->fmi_owner == curlwp); 639} 640 641/* 642 * Check if this thread has an exclusive lock. 643 */ 644int 645fstrans_is_owner(struct mount *mp) 646{ 647 struct fstrans_lwp_info *fli; 648 struct fstrans_mount_info *fmi; 649 650 KASSERT(mp != dead_rootmount); 651 652 fli = fstrans_get_lwp_info(mp, false); 653 if (fli == NULL) 654 return 0; 655 fmi = fli->fli_mountinfo; 656 657 return (fmi->fmi_owner == curlwp); 658} 659 660/* 661 * True, if no thread is in a transaction not granted at the current state. 662 */ 663static bool 664state_change_done(const struct fstrans_mount_info *fmi) 665{ 666 struct fstrans_lwp_info *fli; 667 668 KASSERT(mutex_owned(&fstrans_lock)); 669 670 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 671 if (fli->fli_mountinfo != fmi) 672 continue; 673 if (fli->fli_trans_cnt == 0) 674 continue; 675 if (fli->fli_self == curlwp) 676 continue; 677 if (grant_lock(fmi, fli->fli_lock_type)) 678 continue; 679 680 return false; 681 } 682 683 return true; 684} 685 686/* 687 * Set new file system state. 688 */ 689int 690fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 691{ 692 int error; 693 enum fstrans_state old_state; 694 struct fstrans_lwp_info *fli; 695 struct fstrans_mount_info *fmi; 696 697 KASSERT(mp != dead_rootmount); 698 699 fli = fstrans_get_lwp_info(mp, true); 700 if (fli == NULL) 701 return ENOENT; 702 fmi = fli->fli_mountinfo; 703 old_state = fmi->fmi_state; 704 if (old_state == new_state) 705 return 0; 706 707 mutex_enter(&fstrans_lock); 708 fmi->fmi_state = new_state; 709 pserialize_perform(fstrans_psz); 710 711 /* 712 * All threads see the new state now. 713 * Wait for transactions invalid at this state to leave. 714 */ 715 error = 0; 716 while (! state_change_done(fmi)) { 717 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock); 718 if (error) { 719 new_state = fmi->fmi_state = FSTRANS_NORMAL; 720 break; 721 } 722 } 723 if (old_state != new_state) { 724 if (old_state == FSTRANS_NORMAL) { 725 KASSERT(fmi->fmi_owner == NULL); 726 fmi->fmi_owner = curlwp; 727 } 728 if (new_state == FSTRANS_NORMAL) { 729 KASSERT(fmi->fmi_owner == curlwp); 730 fmi->fmi_owner = NULL; 731 } 732 } 733 cv_broadcast(&fstrans_state_cv); 734 mutex_exit(&fstrans_lock); 735 736 return error; 737} 738 739/* 740 * Get current file system state. 741 */ 742enum fstrans_state 743fstrans_getstate(struct mount *mp) 744{ 745 struct fstrans_lwp_info *fli; 746 struct fstrans_mount_info *fmi; 747 748 KASSERT(mp != dead_rootmount); 749 750 fli = fstrans_get_lwp_info(mp, true); 751 KASSERT(fli != NULL); 752 fmi = fli->fli_mountinfo; 753 754 return fmi->fmi_state; 755} 756 757/* 758 * Request a filesystem to suspend all operations. 759 */ 760int 761vfs_suspend(struct mount *mp, int nowait) 762{ 763 struct fstrans_lwp_info *fli; 764 int error; 765 766 if (mp == dead_rootmount) 767 return EOPNOTSUPP; 768 769 fli = fstrans_get_lwp_info(mp, true); 770 if (fli == NULL) 771 return ENOENT; 772 773 if (nowait) { 774 if (!mutex_tryenter(&vfs_suspend_lock)) 775 return EWOULDBLOCK; 776 } else 777 mutex_enter(&vfs_suspend_lock); 778 779 if ((error = VFS_SUSPENDCTL(fli->fli_mount, SUSPEND_SUSPEND)) != 0) { 780 mutex_exit(&vfs_suspend_lock); 781 return error; 782 } 783 784 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 785 vfs_resume(mp); 786 return ENOENT; 787 } 788 789 return 0; 790} 791 792/* 793 * Request a filesystem to resume all operations. 794 */ 795void 796vfs_resume(struct mount *mp) 797{ 798 struct fstrans_lwp_info *fli; 799 800 KASSERT(mp != dead_rootmount); 801 802 fli = fstrans_get_lwp_info(mp, false); 803 mp = fli->fli_mount; 804 805 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 806 mutex_exit(&vfs_suspend_lock); 807} 808 809 810/* 811 * True, if no thread is running a cow handler. 812 */ 813static bool 814cow_state_change_done(const struct fstrans_mount_info *fmi) 815{ 816 struct fstrans_lwp_info *fli; 817 818 KASSERT(mutex_owned(&fstrans_lock)); 819 KASSERT(fmi->fmi_cow_change); 820 821 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 822 if (fli->fli_mount != fmi->fmi_mount) 823 continue; 824 if (fli->fli_cow_cnt == 0) 825 continue; 826 827 return false; 828 } 829 830 return true; 831} 832 833/* 834 * Prepare for changing this mounts cow list. 835 * Returns with fstrans_lock locked. 836 */ 837static void 838cow_change_enter(struct fstrans_mount_info *fmi) 839{ 840 841 mutex_enter(&fstrans_lock); 842 843 /* 844 * Wait for other threads changing the list. 845 */ 846 while (fmi->fmi_cow_change) 847 cv_wait(&fstrans_state_cv, &fstrans_lock); 848 849 /* 850 * Wait until all threads are aware of a state change. 851 */ 852 fmi->fmi_cow_change = true; 853 pserialize_perform(fstrans_psz); 854 855 while (! cow_state_change_done(fmi)) 856 cv_wait(&fstrans_count_cv, &fstrans_lock); 857} 858 859/* 860 * Done changing this mounts cow list. 861 */ 862static void 863cow_change_done(struct fstrans_mount_info *fmi) 864{ 865 866 KASSERT(mutex_owned(&fstrans_lock)); 867 868 fmi->fmi_cow_change = false; 869 pserialize_perform(fstrans_psz); 870 871 cv_broadcast(&fstrans_state_cv); 872 873 mutex_exit(&fstrans_lock); 874} 875 876/* 877 * Add a handler to this mount. 878 */ 879int 880fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 881 void *arg) 882{ 883 struct fstrans_mount_info *fmi; 884 struct fscow_handler *newch; 885 886 KASSERT(mp != dead_rootmount); 887 888 mutex_enter(&fstrans_lock); 889 fmi = fstrans_mount_get(mp); 890 KASSERT(fmi != NULL); 891 fmi->fmi_ref_cnt += 1; 892 mutex_exit(&fstrans_lock); 893 894 newch = kmem_alloc(sizeof(*newch), KM_SLEEP); 895 newch->ch_func = func; 896 newch->ch_arg = arg; 897 898 cow_change_enter(fmi); 899 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list); 900 cow_change_done(fmi); 901 902 return 0; 903} 904 905/* 906 * Remove a handler from this mount. 907 */ 908int 909fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 910 void *arg) 911{ 912 struct fstrans_mount_info *fmi; 913 struct fscow_handler *hp = NULL; 914 915 KASSERT(mp != dead_rootmount); 916 917 mutex_enter(&fstrans_lock); 918 fmi = fstrans_mount_get(mp); 919 KASSERT(fmi != NULL); 920 mutex_exit(&fstrans_lock); 921 922 cow_change_enter(fmi); 923 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 924 if (hp->ch_func == func && hp->ch_arg == arg) 925 break; 926 if (hp != NULL) { 927 LIST_REMOVE(hp, ch_list); 928 kmem_free(hp, sizeof(*hp)); 929 } 930 fstrans_mount_dtor(fmi); 931 cow_change_done(fmi); 932 933 return hp ? 0 : EINVAL; 934} 935 936/* 937 * Check for need to copy block that is about to be written. 938 */ 939int 940fscow_run(struct buf *bp, bool data_valid) 941{ 942 int error, s; 943 struct mount *mp; 944 struct fstrans_lwp_info *fli; 945 struct fstrans_mount_info *fmi; 946 struct fscow_handler *hp; 947 948 /* 949 * First check if we need run the copy-on-write handler. 950 */ 951 if ((bp->b_flags & B_COWDONE)) 952 return 0; 953 if (bp->b_vp == NULL) { 954 bp->b_flags |= B_COWDONE; 955 return 0; 956 } 957 if (bp->b_vp->v_type == VBLK) 958 mp = spec_node_getmountedfs(bp->b_vp); 959 else 960 mp = bp->b_vp->v_mount; 961 if (mp == NULL || mp == dead_rootmount) { 962 bp->b_flags |= B_COWDONE; 963 return 0; 964 } 965 966 fli = fstrans_get_lwp_info(mp, true); 967 KASSERT(fli != NULL); 968 fmi = fli->fli_mountinfo; 969 970 /* 971 * On non-recursed run check if other threads 972 * want to change the list. 973 */ 974 if (fli->fli_cow_cnt == 0) { 975 s = pserialize_read_enter(); 976 if (__predict_false(fmi->fmi_cow_change)) { 977 pserialize_read_exit(s); 978 mutex_enter(&fstrans_lock); 979 while (fmi->fmi_cow_change) 980 cv_wait(&fstrans_state_cv, &fstrans_lock); 981 fli->fli_cow_cnt = 1; 982 mutex_exit(&fstrans_lock); 983 } else { 984 fli->fli_cow_cnt = 1; 985 pserialize_read_exit(s); 986 } 987 } else 988 fli->fli_cow_cnt += 1; 989 990 /* 991 * Run all copy-on-write handlers, stop on error. 992 */ 993 error = 0; 994 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 995 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 996 break; 997 if (error == 0) 998 bp->b_flags |= B_COWDONE; 999 1000 /* 1001 * Check if other threads want to change the list. 1002 */ 1003 if (fli->fli_cow_cnt > 1) { 1004 fli->fli_cow_cnt -= 1; 1005 } else { 1006 s = pserialize_read_enter(); 1007 if (__predict_false(fmi->fmi_cow_change)) { 1008 pserialize_read_exit(s); 1009 mutex_enter(&fstrans_lock); 1010 fli->fli_cow_cnt = 0; 1011 cv_signal(&fstrans_count_cv); 1012 mutex_exit(&fstrans_lock); 1013 } else { 1014 fli->fli_cow_cnt = 0; 1015 pserialize_read_exit(s); 1016 } 1017 } 1018 1019 return error; 1020} 1021 1022#if defined(DDB) 1023void fstrans_dump(int); 1024 1025static void 1026fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 1027{ 1028 char prefix[9]; 1029 struct fstrans_lwp_info *fli; 1030 1031 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 1032 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 1033 if (fli->fli_self != l) 1034 continue; 1035 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) { 1036 if (! verbose) 1037 continue; 1038 } 1039 printf("%-8s", prefix); 1040 if (verbose) 1041 printf(" @%p", fli); 1042 if (fli->fli_mount == dead_rootmount) 1043 printf(" <dead>"); 1044 else if (fli->fli_mount != NULL) 1045 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 1046 else 1047 printf(" NULL"); 1048 if (fli->fli_alias != NULL) { 1049 struct mount *amp = fli->fli_alias->fli_mount; 1050 1051 printf(" alias"); 1052 if (verbose) 1053 printf(" @%p", fli->fli_alias); 1054 if (amp == NULL) 1055 printf(" NULL"); 1056 else 1057 printf(" (%s)", amp->mnt_stat.f_mntonname); 1058 } 1059 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone) 1060 printf(" gone"); 1061 if (fli->fli_trans_cnt == 0) { 1062 printf(" -"); 1063 } else { 1064 switch (fli->fli_lock_type) { 1065 case FSTRANS_LAZY: 1066 printf(" lazy"); 1067 break; 1068 case FSTRANS_SHARED: 1069 printf(" shared"); 1070 break; 1071 default: 1072 printf(" %#x", fli->fli_lock_type); 1073 break; 1074 } 1075 } 1076 printf(" %d cow %d alias %d\n", 1077 fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt); 1078 prefix[0] = '\0'; 1079 } 1080} 1081 1082static void 1083fstrans_print_mount(struct mount *mp, int verbose) 1084{ 1085 uint32_t indx; 1086 struct fstrans_mount_info *fmi; 1087 1088 indx = fstrans_mount_hash(mp); 1089 SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) 1090 if (fmi->fmi_mount == mp) 1091 break; 1092 1093 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 1094 return; 1095 1096 printf("%-16s ", mp->mnt_stat.f_mntonname); 1097 if (fmi == NULL) { 1098 printf("(null)\n"); 1099 return; 1100 } 1101 printf("owner %p ", fmi->fmi_owner); 1102 switch (fmi->fmi_state) { 1103 case FSTRANS_NORMAL: 1104 printf("state normal\n"); 1105 break; 1106 case FSTRANS_SUSPENDING: 1107 printf("state suspending\n"); 1108 break; 1109 case FSTRANS_SUSPENDED: 1110 printf("state suspended\n"); 1111 break; 1112 default: 1113 printf("state %#x\n", fmi->fmi_state); 1114 break; 1115 } 1116} 1117 1118void 1119fstrans_dump(int full) 1120{ 1121 const struct proclist_desc *pd; 1122 struct proc *p; 1123 struct lwp *l; 1124 struct mount *mp; 1125 1126 printf("Fstrans locks by lwp:\n"); 1127 for (pd = proclists; pd->pd_list != NULL; pd++) 1128 PROCLIST_FOREACH(p, pd->pd_list) 1129 LIST_FOREACH(l, &p->p_lwps, l_sibling) 1130 fstrans_print_lwp(p, l, full == 1); 1131 1132 printf("Fstrans state by mount:\n"); 1133 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) 1134 fstrans_print_mount(mp, full == 1); 1135} 1136#endif /* defined(DDB) */ 1137