vfs_trans.c revision 1.19
1/* $NetBSD: vfs_trans.c,v 1.19 2008/04/28 20:24:05 martin Exp $ */ 2 3/*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.19 2008/04/28 20:24:05 martin Exp $"); 34 35/* 36 * File system transaction operations. 37 */ 38 39#include "opt_ddb.h" 40 41#if defined(DDB) 42#define _LWP_API_PRIVATE /* Need _lwp_getspecific_by_lwp() */ 43#endif 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/kmem.h> 48#include <sys/mount.h> 49#include <sys/rwlock.h> 50#include <sys/vnode.h> 51#define _FSTRANS_API_PRIVATE 52#include <sys/fstrans.h> 53#include <sys/proc.h> 54 55#include <miscfs/specfs/specdev.h> 56#include <miscfs/syncfs/syncfs.h> 57 58struct fscow_handler { 59 SLIST_ENTRY(fscow_handler) ch_list; 60 int (*ch_func)(void *, struct buf *, bool); 61 void *ch_arg; 62}; 63struct fstrans_lwp_info { 64 struct fstrans_lwp_info *fli_succ; 65 struct mount *fli_mount; 66 int fli_count; 67 enum fstrans_lock_type fli_lock_type; 68}; 69struct fstrans_mount_info { 70 enum fstrans_state fmi_state; 71 krwlock_t fmi_shared_lock; 72 krwlock_t fmi_lazy_lock; 73 krwlock_t fmi_cow_lock; 74 SLIST_HEAD(, fscow_handler) fmi_cow_handler; 75}; 76 77static specificdata_key_t lwp_data_key; 78static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */ 79 80POOL_INIT(fstrans_pl, sizeof(struct fstrans_lwp_info), 0, 0, 0, 81 "fstrans", NULL, IPL_NONE); 82 83static void fstrans_lwp_dtor(void *); 84 85/* 86 * Initialize 87 */ 88void 89fstrans_init(void) 90{ 91 int error; 92 93 error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor); 94 KASSERT(error == 0); 95 96 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 97} 98 99/* 100 * Deallocate lwp state 101 */ 102static void 103fstrans_lwp_dtor(void *arg) 104{ 105 struct fstrans_lwp_info *fli, *fli_next; 106 107 for (fli = arg; fli; fli = fli_next) { 108 KASSERT(fli->fli_mount == NULL); 109 KASSERT(fli->fli_count == 0); 110 fli_next = fli->fli_succ; 111 pool_put(&fstrans_pl, fli); 112 } 113} 114 115/* 116 * Allocate mount state 117 */ 118int 119fstrans_mount(struct mount *mp) 120{ 121 struct fstrans_mount_info *new; 122 123 if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL) 124 return ENOMEM; 125 new->fmi_state = FSTRANS_NORMAL; 126 rw_init(&new->fmi_lazy_lock); 127 rw_init(&new->fmi_shared_lock); 128 SLIST_INIT(&new->fmi_cow_handler); 129 rw_init(&new->fmi_cow_lock); 130 131 mp->mnt_transinfo = new; 132 mp->mnt_iflag |= IMNT_HAS_TRANS; 133 134 return 0; 135} 136 137/* 138 * Deallocate mount state 139 */ 140void 141fstrans_unmount(struct mount *mp) 142{ 143 struct fstrans_mount_info *fmi; 144 145 if ((fmi = mp->mnt_transinfo) == NULL) 146 return; 147 148 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 149 rw_destroy(&fmi->fmi_lazy_lock); 150 rw_destroy(&fmi->fmi_shared_lock); 151 KASSERT(SLIST_EMPTY(&fmi->fmi_cow_handler)); 152 rw_destroy(&fmi->fmi_cow_lock); 153 kmem_free(fmi, sizeof(*fmi)); 154 mp->mnt_iflag &= ~IMNT_HAS_TRANS; 155 mp->mnt_transinfo = NULL; 156} 157 158/* 159 * Start a transaction. If this thread already has a transaction on this 160 * file system increment the reference counter. 161 * A thread with an exclusive transaction lock may get a shared or lazy one. 162 * A thread with a shared or lazy transaction lock cannot upgrade to an 163 * exclusive one yet. 164 */ 165int 166_fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 167{ 168 krwlock_t *lock_p; 169 krw_t lock_op; 170 struct fstrans_lwp_info *fli, *new_fli; 171 struct fstrans_mount_info *fmi; 172 173 ASSERT_SLEEPABLE(); 174 175 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 176 return 0; 177 178 new_fli = NULL; 179 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) { 180 if (fli->fli_mount == NULL && new_fli == NULL) 181 new_fli = fli; 182 if (fli->fli_mount == mp) { 183 KASSERT(fli->fli_count > 0); 184 if (fli->fli_lock_type != FSTRANS_EXCL && 185 lock_type == FSTRANS_EXCL) 186 panic("fstrans_start: cannot upgrade lock"); 187 fli->fli_count += 1; 188 return 0; 189 } 190 } 191 192 if (new_fli == NULL) { 193 new_fli = pool_get(&fstrans_pl, PR_WAITOK); 194 new_fli->fli_mount = NULL; 195 new_fli->fli_count = 0; 196 new_fli->fli_succ = lwp_getspecific(lwp_data_key); 197 lwp_setspecific(lwp_data_key, new_fli); 198 } 199 200 KASSERT(new_fli->fli_mount == NULL); 201 KASSERT(new_fli->fli_count == 0); 202 203 fmi = mp->mnt_transinfo; 204 205 if (lock_type == FSTRANS_LAZY) 206 lock_p = &fmi->fmi_lazy_lock; 207 else 208 lock_p = &fmi->fmi_shared_lock; 209 lock_op = (lock_type == FSTRANS_EXCL ? RW_WRITER : RW_READER); 210 211 if (wait) 212 rw_enter(lock_p, lock_op); 213 else if (rw_tryenter(lock_p, lock_op) == 0) 214 return EBUSY; 215 216 new_fli->fli_mount = mp; 217 new_fli->fli_count = 1; 218 new_fli->fli_lock_type = lock_type; 219 220 return 0; 221} 222 223/* 224 * Finish a transaction. 225 */ 226void 227fstrans_done(struct mount *mp) 228{ 229 struct fstrans_lwp_info *fli; 230 struct fstrans_mount_info *fmi; 231 232 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 233 return; 234 235 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) { 236 if (fli->fli_mount == mp) { 237 fli->fli_count -= 1; 238 if (fli->fli_count > 0) 239 return; 240 break; 241 } 242 } 243 244 KASSERT(fli != NULL); 245 KASSERT(fli->fli_mount == mp); 246 KASSERT(fli->fli_count == 0); 247 fli->fli_mount = NULL; 248 fmi = mp->mnt_transinfo; 249 KASSERT(fmi != NULL); 250 if (fli->fli_lock_type == FSTRANS_LAZY) 251 rw_exit(&fmi->fmi_lazy_lock); 252 else 253 rw_exit(&fmi->fmi_shared_lock); 254} 255 256/* 257 * Check if this thread has an exclusive lock. 258 */ 259int 260fstrans_is_owner(struct mount *mp) 261{ 262 struct fstrans_lwp_info *fli; 263 264 if (mp == NULL) 265 return 0; 266 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 267 return 0; 268 269 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) 270 if (fli->fli_mount == mp) 271 break; 272 273 if (fli == NULL) 274 return 0; 275 276 KASSERT(fli->fli_mount == mp); 277 KASSERT(fli->fli_count > 0); 278 return (fli->fli_lock_type == FSTRANS_EXCL); 279} 280 281/* 282 * Set new file system state. 283 */ 284int 285fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 286{ 287 struct fstrans_mount_info *fmi; 288 289 fmi = mp->mnt_transinfo; 290 291 switch (new_state) { 292 case FSTRANS_SUSPENDING: 293 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 294 fstrans_start(mp, FSTRANS_EXCL); 295 fmi->fmi_state = FSTRANS_SUSPENDING; 296 break; 297 298 case FSTRANS_SUSPENDED: 299 KASSERT(fmi->fmi_state == FSTRANS_NORMAL || 300 fmi->fmi_state == FSTRANS_SUSPENDING); 301 KASSERT(fmi->fmi_state == FSTRANS_NORMAL || 302 fstrans_is_owner(mp)); 303 if (fmi->fmi_state == FSTRANS_NORMAL) 304 fstrans_start(mp, FSTRANS_EXCL); 305 rw_enter(&fmi->fmi_lazy_lock, RW_WRITER); 306 fmi->fmi_state = FSTRANS_SUSPENDED; 307 break; 308 309 case FSTRANS_NORMAL: 310 KASSERT(fmi->fmi_state == FSTRANS_NORMAL || 311 fstrans_is_owner(mp)); 312 if (fmi->fmi_state == FSTRANS_SUSPENDED) 313 rw_exit(&fmi->fmi_lazy_lock); 314 if (fmi->fmi_state == FSTRANS_SUSPENDING || 315 fmi->fmi_state == FSTRANS_SUSPENDED) { 316 fmi->fmi_state = FSTRANS_NORMAL; 317 fstrans_done(mp); 318 } 319 break; 320 321 default: 322 panic("%s: illegal state %d", __func__, new_state); 323 } 324 325 return 0; 326} 327 328/* 329 * Get current file system state 330 */ 331enum fstrans_state 332fstrans_getstate(struct mount *mp) 333{ 334 struct fstrans_mount_info *fmi; 335 336 fmi = mp->mnt_transinfo; 337 338 return fmi->fmi_state; 339} 340 341/* 342 * Request a filesystem to suspend all operations. 343 */ 344int 345vfs_suspend(struct mount *mp, int nowait) 346{ 347 int error; 348 349 if (nowait) { 350 if (!mutex_tryenter(&vfs_suspend_lock)) 351 return EWOULDBLOCK; 352 } else 353 mutex_enter(&vfs_suspend_lock); 354 355 mutex_enter(&syncer_mutex); 356 357 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) { 358 mutex_exit(&syncer_mutex); 359 mutex_exit(&vfs_suspend_lock); 360 } 361 362 return error; 363} 364 365/* 366 * Request a filesystem to resume all operations. 367 */ 368void 369vfs_resume(struct mount *mp) 370{ 371 372 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 373 mutex_exit(&syncer_mutex); 374 mutex_exit(&vfs_suspend_lock); 375} 376 377#if defined(DDB) 378void fstrans_dump(int); 379 380static void 381fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 382{ 383 char prefix[9]; 384 struct fstrans_lwp_info *fli; 385 386 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 387 for (fli = _lwp_getspecific_by_lwp(l, lwp_data_key); 388 fli; 389 fli = fli->fli_succ) { 390 if (!verbose && fli->fli_count == 0) 391 continue; 392 printf("%-8s", prefix); 393 if (verbose) 394 printf(" @%p", fli); 395 if (fli->fli_mount != NULL) 396 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 397 else 398 printf(" NULL"); 399 switch (fli->fli_lock_type) { 400 case FSTRANS_LAZY: 401 printf(" lazy"); 402 break; 403 case FSTRANS_SHARED: 404 printf(" shared"); 405 break; 406 case FSTRANS_EXCL: 407 printf(" excl"); 408 break; 409 default: 410 printf(" %#x", fli->fli_lock_type); 411 break; 412 } 413 printf(" %d\n", fli->fli_count); 414 prefix[0] = '\0'; 415 } 416} 417 418static void 419fstrans_print_mount(struct mount *mp, int verbose) 420{ 421 struct fstrans_mount_info *fmi; 422 423 fmi = mp->mnt_transinfo; 424 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 425 return; 426 427 printf("%-16s ", mp->mnt_stat.f_mntonname); 428 if (fmi == NULL) { 429 printf("(null)\n"); 430 return; 431 } 432 switch (fmi->fmi_state) { 433 case FSTRANS_NORMAL: 434 printf("state normal\n"); 435 break; 436 case FSTRANS_SUSPENDING: 437 printf("state suspending\n"); 438 break; 439 case FSTRANS_SUSPENDED: 440 printf("state suspended\n"); 441 break; 442 default: 443 printf("state %#x\n", fmi->fmi_state); 444 break; 445 } 446 printf("%16s r=%d w=%d\n", "lock_lazy:", 447 rw_read_held(&fmi->fmi_lazy_lock), 448 rw_write_held(&fmi->fmi_lazy_lock)); 449 printf("%16s r=%d w=%d\n", "lock_shared:", 450 rw_read_held(&fmi->fmi_shared_lock), 451 rw_write_held(&fmi->fmi_shared_lock)); 452} 453 454void 455fstrans_dump(int full) 456{ 457 const struct proclist_desc *pd; 458 struct proc *p; 459 struct lwp *l; 460 struct mount *mp; 461 462 printf("Fstrans locks by lwp:\n"); 463 for (pd = proclists; pd->pd_list != NULL; pd++) 464 LIST_FOREACH(p, pd->pd_list, p_list) 465 LIST_FOREACH(l, &p->p_lwps, l_sibling) 466 fstrans_print_lwp(p, l, full == 1); 467 468 printf("Fstrans state by mount:\n"); 469 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) 470 fstrans_print_mount(mp, full == 1); 471} 472#endif /* defined(DDB) */ 473 474int 475fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 476 void *arg) 477{ 478 struct fstrans_mount_info *fmi; 479 struct fscow_handler *new; 480 481 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 482 return EINVAL; 483 484 fmi = mp->mnt_transinfo; 485 486 if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL) 487 return ENOMEM; 488 new->ch_func = func; 489 new->ch_arg = arg; 490 rw_enter(&fmi->fmi_cow_lock, RW_WRITER); 491 SLIST_INSERT_HEAD(&fmi->fmi_cow_handler, new, ch_list); 492 rw_exit(&fmi->fmi_cow_lock); 493 494 return 0; 495} 496 497int 498fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 499 void *arg) 500{ 501 struct fstrans_mount_info *fmi; 502 struct fscow_handler *hp = NULL; 503 504 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 505 return EINVAL; 506 507 fmi = mp->mnt_transinfo; 508 509 rw_enter(&fmi->fmi_cow_lock, RW_WRITER); 510 SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 511 if (hp->ch_func == func && hp->ch_arg == arg) 512 break; 513 if (hp != NULL) { 514 SLIST_REMOVE(&fmi->fmi_cow_handler, hp, fscow_handler, ch_list); 515 kmem_free(hp, sizeof(*hp)); 516 } 517 rw_exit(&fmi->fmi_cow_lock); 518 519 return hp ? 0 : EINVAL; 520} 521 522int 523fscow_run(struct buf *bp, bool data_valid) 524{ 525 int error = 0; 526 struct mount *mp; 527 struct fstrans_mount_info *fmi; 528 struct fscow_handler *hp; 529 530 if ((bp->b_flags & B_COWDONE)) 531 goto done; 532 if (bp->b_vp == NULL) 533 goto done; 534 if (bp->b_vp->v_type == VBLK) 535 mp = bp->b_vp->v_specmountpoint; 536 else 537 mp = bp->b_vp->v_mount; 538 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 539 goto done; 540 541 fmi = mp->mnt_transinfo; 542 543 rw_enter(&fmi->fmi_cow_lock, RW_READER); 544 SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 545 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 546 break; 547 rw_exit(&fmi->fmi_cow_lock); 548 549 done: 550 if (error == 0) 551 bp->b_flags |= B_COWDONE; 552 553 return error; 554} 555