zfs_vfsops.c revision 170431
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#pragma ident "%Z%%M% %I% %E% SMI" 27 28#include <sys/types.h> 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/kernel.h> 32#include <sys/sysmacros.h> 33#include <sys/kmem.h> 34#include <sys/acl.h> 35#include <sys/vnode.h> 36#include <sys/vfs.h> 37#include <sys/mntent.h> 38#include <sys/mount.h> 39#include <sys/cmn_err.h> 40#include <sys/zfs_znode.h> 41#include <sys/zfs_dir.h> 42#include <sys/zil.h> 43#include <sys/fs/zfs.h> 44#include <sys/dmu.h> 45#include <sys/dsl_prop.h> 46#include <sys/dsl_dataset.h> 47#include <sys/spa.h> 48#include <sys/zap.h> 49#include <sys/varargs.h> 50#include <sys/policy.h> 51#include <sys/atomic.h> 52#include <sys/zfs_ioctl.h> 53#include <sys/zfs_ctldir.h> 54#include <sys/sunddi.h> 55#include <sys/dnlc.h> 56 57struct mtx zfs_debug_mtx; 58MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 59SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 60int zfs_debug_level = 0; 61TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 62SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 63 "Debug level"); 64 65static int zfs_mount(vfs_t *vfsp, kthread_t *td); 66static int zfs_umount(vfs_t *vfsp, int fflag, kthread_t *td); 67static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp, kthread_t *td); 68static int zfs_statfs(vfs_t *vfsp, struct statfs *statp, kthread_t *td); 69static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 70static int zfs_sync(vfs_t *vfsp, int waitfor, kthread_t *td); 71static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 72static void zfs_objset_close(zfsvfs_t *zfsvfs); 73static void zfs_freevfs(vfs_t *vfsp); 74 75static struct vfsops zfs_vfsops = { 76 .vfs_mount = zfs_mount, 77 .vfs_unmount = zfs_umount, 78 .vfs_root = zfs_root, 79 .vfs_statfs = zfs_statfs, 80 .vfs_vget = zfs_vget, 81 .vfs_sync = zfs_sync, 82 .vfs_fhtovp = zfs_fhtovp, 83}; 84 85VFS_SET(zfs_vfsops, zfs, VFCF_JAIL); 86 87/* 88 * We need to keep a count of active fs's. 89 * This is necessary to prevent our module 90 * from being unloaded after a umount -f 91 */ 92static uint32_t zfs_active_fs_count = 0; 93 94/*ARGSUSED*/ 95static int 96zfs_sync(vfs_t *vfsp, int waitfor, kthread_t *td) 97{ 98 99 /* 100 * Data integrity is job one. We don't want a compromised kernel 101 * writing to the storage pool, so we never sync during panic. 102 */ 103 if (panicstr) 104 return (0); 105 106 if (vfsp != NULL) { 107 /* 108 * Sync a specific filesystem. 109 */ 110 zfsvfs_t *zfsvfs = vfsp->vfs_data; 111 int error; 112 113 error = vfs_stdsync(vfsp, waitfor, td); 114 if (error != 0) 115 return (error); 116 117 ZFS_ENTER(zfsvfs); 118 if (zfsvfs->z_log != NULL) 119 zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 120 else 121 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 122 ZFS_EXIT(zfsvfs); 123 } else { 124 /* 125 * Sync all ZFS filesystems. This is what happens when you 126 * run sync(1M). Unlike other filesystems, ZFS honors the 127 * request by waiting for all pools to commit all dirty data. 128 */ 129 spa_sync_allpools(); 130 } 131 132 return (0); 133} 134 135static void 136atime_changed_cb(void *arg, uint64_t newval) 137{ 138 zfsvfs_t *zfsvfs = arg; 139 140 if (newval == TRUE) { 141 zfsvfs->z_atime = TRUE; 142 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 143 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 144 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 145 } else { 146 zfsvfs->z_atime = FALSE; 147 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 148 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 149 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 150 } 151} 152 153static void 154xattr_changed_cb(void *arg, uint64_t newval) 155{ 156 zfsvfs_t *zfsvfs = arg; 157 158 if (newval == TRUE) { 159 /* XXX locking on vfs_flag? */ 160#ifdef TODO 161 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 162#endif 163 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 164 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 165 } else { 166 /* XXX locking on vfs_flag? */ 167#ifdef TODO 168 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 169#endif 170 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 171 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 172 } 173} 174 175static void 176blksz_changed_cb(void *arg, uint64_t newval) 177{ 178 zfsvfs_t *zfsvfs = arg; 179 180 if (newval < SPA_MINBLOCKSIZE || 181 newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 182 newval = SPA_MAXBLOCKSIZE; 183 184 zfsvfs->z_max_blksz = newval; 185 zfsvfs->z_vfs->vfs_bsize = newval; 186} 187 188static void 189readonly_changed_cb(void *arg, uint64_t newval) 190{ 191 zfsvfs_t *zfsvfs = arg; 192 193 if (newval) { 194 /* XXX locking on vfs_flag? */ 195 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 196 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 197 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 198 } else { 199 /* XXX locking on vfs_flag? */ 200 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 201 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 202 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 203 } 204} 205 206static void 207setuid_changed_cb(void *arg, uint64_t newval) 208{ 209 zfsvfs_t *zfsvfs = arg; 210 211 if (newval == FALSE) { 212 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 213 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 214 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 215 } else { 216 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 217 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 218 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 219 } 220} 221 222static void 223exec_changed_cb(void *arg, uint64_t newval) 224{ 225 zfsvfs_t *zfsvfs = arg; 226 227 if (newval == FALSE) { 228 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 229 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 230 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 231 } else { 232 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 233 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 234 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 235 } 236} 237 238static void 239snapdir_changed_cb(void *arg, uint64_t newval) 240{ 241 zfsvfs_t *zfsvfs = arg; 242 243 zfsvfs->z_show_ctldir = newval; 244} 245 246static void 247acl_mode_changed_cb(void *arg, uint64_t newval) 248{ 249 zfsvfs_t *zfsvfs = arg; 250 251 zfsvfs->z_acl_mode = newval; 252} 253 254static void 255acl_inherit_changed_cb(void *arg, uint64_t newval) 256{ 257 zfsvfs_t *zfsvfs = arg; 258 259 zfsvfs->z_acl_inherit = newval; 260} 261 262static int 263zfs_refresh_properties(vfs_t *vfsp) 264{ 265 zfsvfs_t *zfsvfs = vfsp->vfs_data; 266 267 /* 268 * Remount operations default to "rw" unless "ro" is explicitly 269 * specified. 270 */ 271 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 272 readonly_changed_cb(zfsvfs, B_TRUE); 273 } else { 274 if (!dmu_objset_is_snapshot(zfsvfs->z_os)) 275 readonly_changed_cb(zfsvfs, B_FALSE); 276 else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 277 return (EROFS); 278 } 279 280 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 281 setuid_changed_cb(zfsvfs, B_FALSE); 282 } else { 283 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 284 setuid_changed_cb(zfsvfs, B_FALSE); 285 else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 286 setuid_changed_cb(zfsvfs, B_TRUE); 287 } 288 289 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 290 exec_changed_cb(zfsvfs, B_FALSE); 291 else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 292 exec_changed_cb(zfsvfs, B_TRUE); 293 294 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 295 atime_changed_cb(zfsvfs, B_TRUE); 296 else if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 297 atime_changed_cb(zfsvfs, B_FALSE); 298 299 if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 300 xattr_changed_cb(zfsvfs, B_TRUE); 301 else if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) 302 xattr_changed_cb(zfsvfs, B_FALSE); 303 304 return (0); 305} 306 307static int 308zfs_register_callbacks(vfs_t *vfsp) 309{ 310 struct dsl_dataset *ds = NULL; 311 objset_t *os = NULL; 312 zfsvfs_t *zfsvfs = NULL; 313 int readonly, do_readonly = FALSE; 314 int setuid, do_setuid = FALSE; 315 int exec, do_exec = FALSE; 316 int xattr, do_xattr = FALSE; 317 int error = 0; 318 319 ASSERT(vfsp); 320 zfsvfs = vfsp->vfs_data; 321 ASSERT(zfsvfs); 322 os = zfsvfs->z_os; 323 324 /* 325 * The act of registering our callbacks will destroy any mount 326 * options we may have. In order to enable temporary overrides 327 * of mount options, we stash away the current values and 328 * restore them after we register the callbacks. 329 */ 330 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 331 readonly = B_TRUE; 332 do_readonly = B_TRUE; 333 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 334 readonly = B_FALSE; 335 do_readonly = B_TRUE; 336 } 337 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 338 setuid = B_FALSE; 339 do_setuid = B_TRUE; 340 } else { 341 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 342 setuid = B_FALSE; 343 do_setuid = B_TRUE; 344 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 345 setuid = B_TRUE; 346 do_setuid = B_TRUE; 347 } 348 } 349 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 350 exec = B_FALSE; 351 do_exec = B_TRUE; 352 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 353 exec = B_TRUE; 354 do_exec = B_TRUE; 355 } 356 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 357 xattr = B_FALSE; 358 do_xattr = B_TRUE; 359 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 360 xattr = B_TRUE; 361 do_xattr = B_TRUE; 362 } 363 364 /* 365 * Register property callbacks. 366 * 367 * It would probably be fine to just check for i/o error from 368 * the first prop_register(), but I guess I like to go 369 * overboard... 370 */ 371 ds = dmu_objset_ds(os); 372 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 373 error = error ? error : dsl_prop_register(ds, 374 "xattr", xattr_changed_cb, zfsvfs); 375 error = error ? error : dsl_prop_register(ds, 376 "recordsize", blksz_changed_cb, zfsvfs); 377 error = error ? error : dsl_prop_register(ds, 378 "readonly", readonly_changed_cb, zfsvfs); 379 error = error ? error : dsl_prop_register(ds, 380 "setuid", setuid_changed_cb, zfsvfs); 381 error = error ? error : dsl_prop_register(ds, 382 "exec", exec_changed_cb, zfsvfs); 383 error = error ? error : dsl_prop_register(ds, 384 "snapdir", snapdir_changed_cb, zfsvfs); 385 error = error ? error : dsl_prop_register(ds, 386 "aclmode", acl_mode_changed_cb, zfsvfs); 387 error = error ? error : dsl_prop_register(ds, 388 "aclinherit", acl_inherit_changed_cb, zfsvfs); 389 if (error) 390 goto unregister; 391 392 /* 393 * Invoke our callbacks to restore temporary mount options. 394 */ 395 if (do_readonly) 396 readonly_changed_cb(zfsvfs, readonly); 397 if (do_setuid) 398 setuid_changed_cb(zfsvfs, setuid); 399 if (do_exec) 400 exec_changed_cb(zfsvfs, exec); 401 if (do_xattr) 402 xattr_changed_cb(zfsvfs, xattr); 403 404 return (0); 405 406unregister: 407 /* 408 * We may attempt to unregister some callbacks that are not 409 * registered, but this is OK; it will simply return ENOMSG, 410 * which we will ignore. 411 */ 412 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 413 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 414 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 415 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 416 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 417 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 418 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 419 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 420 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 421 zfsvfs); 422 return (error); 423 424} 425 426static int 427zfs_domount(vfs_t *vfsp, char *osname, kthread_t *td) 428{ 429 cred_t *cr = td->td_ucred; 430 uint64_t recordsize, readonly; 431 int error = 0; 432 int mode; 433 zfsvfs_t *zfsvfs; 434 znode_t *zp = NULL; 435 436 ASSERT(vfsp); 437 ASSERT(osname); 438 439 /* 440 * Initialize the zfs-specific filesystem structure. 441 * Should probably make this a kmem cache, shuffle fields, 442 * and just bzero up to z_hold_mtx[]. 443 */ 444 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 445 zfsvfs->z_vfs = vfsp; 446 zfsvfs->z_parent = zfsvfs; 447 zfsvfs->z_assign = TXG_NOWAIT; 448 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 449 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 450 451 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 452 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 453 offsetof(znode_t, z_link_node)); 454 rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); 455 456 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 457 NULL)) 458 goto out; 459 zfsvfs->z_vfs->vfs_bsize = recordsize; 460 461 vfsp->vfs_data = zfsvfs; 462 vfsp->mnt_flag |= MNT_LOCAL; 463 vfsp->mnt_kern_flag |= MNTK_MPSAFE; 464 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 465 466 if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 467 goto out; 468 469 if (readonly) 470 mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 471 else 472 mode = DS_MODE_PRIMARY; 473 474 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 475 if (error == EROFS) { 476 mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 477 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 478 &zfsvfs->z_os); 479 } 480 481 if (error) 482 goto out; 483 484 if (error = zfs_init_fs(zfsvfs, &zp, cr)) 485 goto out; 486 487 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 488 uint64_t xattr; 489 490 ASSERT(mode & DS_MODE_READONLY); 491 atime_changed_cb(zfsvfs, B_FALSE); 492 readonly_changed_cb(zfsvfs, B_TRUE); 493 if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 494 goto out; 495 xattr_changed_cb(zfsvfs, xattr); 496 zfsvfs->z_issnap = B_TRUE; 497 } else { 498 error = zfs_register_callbacks(vfsp); 499 if (error) 500 goto out; 501 502 zfs_unlinked_drain(zfsvfs); 503 504 /* 505 * Parse and replay the intent log. 506 */ 507 zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 508 zfs_replay_vector); 509 510 if (!zil_disable) 511 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 512 } 513 514 vfs_mountedfrom(vfsp, osname); 515 516 if (!zfsvfs->z_issnap) 517 zfsctl_create(zfsvfs); 518out: 519 if (error) { 520 if (zfsvfs->z_os) 521 dmu_objset_close(zfsvfs->z_os); 522 rw_destroy(&zfsvfs->z_um_lock); 523 mutex_destroy(&zfsvfs->z_znodes_lock); 524 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 525 } else { 526 atomic_add_32(&zfs_active_fs_count, 1); 527 } 528 529 return (error); 530 531} 532 533void 534zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 535{ 536 objset_t *os = zfsvfs->z_os; 537 struct dsl_dataset *ds; 538 539 /* 540 * Unregister properties. 541 */ 542 if (!dmu_objset_is_snapshot(os)) { 543 ds = dmu_objset_ds(os); 544 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 545 zfsvfs) == 0); 546 547 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 548 zfsvfs) == 0); 549 550 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 551 zfsvfs) == 0); 552 553 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 554 zfsvfs) == 0); 555 556 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 557 zfsvfs) == 0); 558 559 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 560 zfsvfs) == 0); 561 562 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 563 zfsvfs) == 0); 564 565 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 566 zfsvfs) == 0); 567 568 VERIFY(dsl_prop_unregister(ds, "aclinherit", 569 acl_inherit_changed_cb, zfsvfs) == 0); 570 } 571} 572 573/*ARGSUSED*/ 574static int 575zfs_mount(vfs_t *vfsp, kthread_t *td) 576{ 577 char *from; 578 int error; 579 580 /* 581 * When doing a remount, we simply refresh our temporary properties 582 * according to those options set in the current VFS options. 583 */ 584 if (vfsp->vfs_flag & MS_REMOUNT) 585 return (zfs_refresh_properties(vfsp)); 586 587 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&from, NULL)) 588 return (EINVAL); 589 590 DROP_GIANT(); 591 error = zfs_domount(vfsp, from, td); 592 PICKUP_GIANT(); 593 return (error); 594} 595 596static int 597zfs_statfs(vfs_t *vfsp, struct statfs *statp, kthread_t *td) 598{ 599 zfsvfs_t *zfsvfs = vfsp->vfs_data; 600 uint64_t refdbytes, availbytes, usedobjs, availobjs; 601 602 statp->f_version = STATFS_VERSION; 603 604 ZFS_ENTER(zfsvfs); 605 606 dmu_objset_space(zfsvfs->z_os, 607 &refdbytes, &availbytes, &usedobjs, &availobjs); 608 609 /* 610 * The underlying storage pool actually uses multiple block sizes. 611 * We report the fragsize as the smallest block size we support, 612 * and we report our blocksize as the filesystem's maximum blocksize. 613 */ 614 statp->f_bsize = zfsvfs->z_vfs->vfs_bsize; 615 statp->f_iosize = zfsvfs->z_vfs->vfs_bsize; 616 617 /* 618 * The following report "total" blocks of various kinds in the 619 * file system, but reported in terms of f_frsize - the 620 * "fragment" size. 621 */ 622 623 statp->f_blocks = (refdbytes + availbytes) / statp->f_bsize; 624 statp->f_bfree = availbytes / statp->f_bsize; 625 statp->f_bavail = statp->f_bfree; /* no root reservation */ 626 627 /* 628 * statvfs() should really be called statufs(), because it assumes 629 * static metadata. ZFS doesn't preallocate files, so the best 630 * we can do is report the max that could possibly fit in f_files, 631 * and that minus the number actually used in f_ffree. 632 * For f_ffree, report the smaller of the number of object available 633 * and the number of blocks (each object will take at least a block). 634 */ 635 statp->f_ffree = MIN(availobjs, statp->f_bfree); 636 statp->f_files = statp->f_ffree + usedobjs; 637 638 /* 639 * We're a zfs filesystem. 640 */ 641 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 642 643 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 644 sizeof(statp->f_mntfromname)); 645 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 646 sizeof(statp->f_mntonname)); 647 648 statp->f_namemax = ZFS_MAXNAMELEN; 649 650 ZFS_EXIT(zfsvfs); 651 return (0); 652} 653 654static int 655zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp, kthread_t *td) 656{ 657 zfsvfs_t *zfsvfs = vfsp->vfs_data; 658 znode_t *rootzp; 659 int error; 660 661 ZFS_ENTER(zfsvfs); 662 663 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 664 if (error == 0) { 665 *vpp = ZTOV(rootzp); 666 error = vn_lock(*vpp, flags, td); 667 (*vpp)->v_vflag |= VV_ROOT; 668 } 669 670 ZFS_EXIT(zfsvfs); 671 return (error); 672} 673 674/*ARGSUSED*/ 675static int 676zfs_umount(vfs_t *vfsp, int fflag, kthread_t *td) 677{ 678 zfsvfs_t *zfsvfs = vfsp->vfs_data; 679 cred_t *cr = td->td_ucred; 680 int ret; 681 682 if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0) 683 return (ret); 684 685 (void) dnlc_purge_vfsp(vfsp, 0); 686 687 /* 688 * Unmount any snapshots mounted under .zfs before unmounting the 689 * dataset itself. 690 */ 691 if (zfsvfs->z_ctldir != NULL) { 692 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 693 return (ret); 694 ret = vflush(vfsp, 0, 0, td); 695 ASSERT(ret == EBUSY); 696 if (!(fflag & MS_FORCE)) { 697 if (zfsvfs->z_ctldir->v_count > 1) 698 return (EBUSY); 699 ASSERT(zfsvfs->z_ctldir->v_count == 1); 700 } 701 zfsctl_destroy(zfsvfs); 702 ASSERT(zfsvfs->z_ctldir == NULL); 703 } 704 705 /* 706 * Flush all the files. 707 */ 708 ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 709 if (ret != 0) { 710 if (!zfsvfs->z_issnap) { 711 zfsctl_create(zfsvfs); 712 ASSERT(zfsvfs->z_ctldir != NULL); 713 } 714 return (ret); 715 } 716 717 if (fflag & MS_FORCE) { 718 MNT_ILOCK(vfsp); 719 vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 720 MNT_IUNLOCK(vfsp); 721 zfsvfs->z_unmounted1 = B_TRUE; 722 723 /* 724 * Wait for all zfs threads to leave zfs. 725 * Grabbing a rwlock as reader in all vops and 726 * as writer here doesn't work because it too easy to get 727 * multiple reader enters as zfs can re-enter itself. 728 * This can lead to deadlock if there is an intervening 729 * rw_enter as writer. 730 * So a file system threads ref count (z_op_cnt) is used. 731 * A polling loop on z_op_cnt may seem inefficient, but 732 * - this saves all threads on exit from having to grab a 733 * mutex in order to cv_signal 734 * - only occurs on forced unmount in the rare case when 735 * there are outstanding threads within the file system. 736 */ 737 while (zfsvfs->z_op_cnt) { 738 delay(1); 739 } 740 } 741 742 zfs_objset_close(zfsvfs); 743 VFS_RELE(vfsp); 744 zfs_freevfs(vfsp); 745 746 return (0); 747} 748 749static int 750zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 751{ 752 zfsvfs_t *zfsvfs = vfsp->vfs_data; 753 znode_t *zp; 754 int err; 755 756 ZFS_ENTER(zfsvfs); 757 err = zfs_zget(zfsvfs, ino, &zp); 758 if (err == 0 && zp->z_unlinked) { 759 VN_RELE(ZTOV(zp)); 760 err = EINVAL; 761 } 762 if (err != 0) 763 *vpp = NULL; 764 else { 765 *vpp = ZTOV(zp); 766 vn_lock(*vpp, flags, curthread); 767 } 768 ZFS_EXIT(zfsvfs); 769 return (0); 770} 771 772static int 773zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 774{ 775 kthread_t *td = curthread; 776 zfsvfs_t *zfsvfs = vfsp->vfs_data; 777 znode_t *zp; 778 uint64_t object = 0; 779 uint64_t fid_gen = 0; 780 uint64_t gen_mask; 781 uint64_t zp_gen; 782 int i, err; 783 784 *vpp = NULL; 785 786 ZFS_ENTER(zfsvfs); 787 788 if (fidp->fid_len == LONG_FID_LEN) { 789 zfid_long_t *zlfid = (zfid_long_t *)fidp; 790 uint64_t objsetid = 0; 791 uint64_t setgen = 0; 792 793 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 794 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 795 796 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 797 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 798 799 ZFS_EXIT(zfsvfs); 800 801 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 802 if (err) 803 return (EINVAL); 804 ZFS_ENTER(zfsvfs); 805 } 806 807 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 808 zfid_short_t *zfid = (zfid_short_t *)fidp; 809 810 for (i = 0; i < sizeof (zfid->zf_object); i++) 811 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 812 813 for (i = 0; i < sizeof (zfid->zf_gen); i++) 814 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 815 } else { 816 ZFS_EXIT(zfsvfs); 817 return (EINVAL); 818 } 819 820 /* A zero fid_gen means we are in the .zfs control directories */ 821 if (fid_gen == 0 && 822 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 823 *vpp = zfsvfs->z_ctldir; 824 ASSERT(*vpp != NULL); 825 if (object == ZFSCTL_INO_SNAPDIR) { 826 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 827 0, NULL, NULL) == 0); 828 } else { 829 VN_HOLD(*vpp); 830 } 831 ZFS_EXIT(zfsvfs); 832 /* XXX: LK_RETRY? */ 833 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 834 return (0); 835 } 836 837 gen_mask = -1ULL >> (64 - 8 * i); 838 839 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 840 if (err = zfs_zget(zfsvfs, object, &zp)) { 841 ZFS_EXIT(zfsvfs); 842 return (err); 843 } 844 zp_gen = zp->z_phys->zp_gen & gen_mask; 845 if (zp_gen == 0) 846 zp_gen = 1; 847 if (zp->z_unlinked || zp_gen != fid_gen) { 848 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 849 VN_RELE(ZTOV(zp)); 850 ZFS_EXIT(zfsvfs); 851 return (EINVAL); 852 } 853 854 *vpp = ZTOV(zp); 855 /* XXX: LK_RETRY? */ 856 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 857 vnode_create_vobject(*vpp, zp->z_phys->zp_size, td); 858 ZFS_EXIT(zfsvfs); 859 return (0); 860} 861 862static void 863zfs_objset_close(zfsvfs_t *zfsvfs) 864{ 865 znode_t *zp, *nextzp; 866 objset_t *os = zfsvfs->z_os; 867 868 /* 869 * For forced unmount, at this point all vops except zfs_inactive 870 * are erroring EIO. We need to now suspend zfs_inactive threads 871 * while we are freeing dbufs before switching zfs_inactive 872 * to use behaviour without a objset. 873 */ 874 rw_enter(&zfsvfs->z_um_lock, RW_WRITER); 875 876 /* 877 * Release all holds on dbufs 878 * Note, although we have stopped all other vop threads and 879 * zfs_inactive(), the dmu can callback via znode_pageout_func() 880 * which can zfs_znode_free() the znode. 881 * So we lock z_all_znodes; search the list for a held 882 * dbuf; drop the lock (we know zp can't disappear if we hold 883 * a dbuf lock; then regrab the lock and restart. 884 */ 885 mutex_enter(&zfsvfs->z_znodes_lock); 886 for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 887 nextzp = list_next(&zfsvfs->z_all_znodes, zp); 888 if (zp->z_dbuf_held) { 889 /* dbufs should only be held when force unmounting */ 890 zp->z_dbuf_held = 0; 891 mutex_exit(&zfsvfs->z_znodes_lock); 892 dmu_buf_rele(zp->z_dbuf, NULL); 893 /* Start again */ 894 mutex_enter(&zfsvfs->z_znodes_lock); 895 nextzp = list_head(&zfsvfs->z_all_znodes); 896 } 897 } 898 mutex_exit(&zfsvfs->z_znodes_lock); 899 900 /* 901 * Unregister properties. 902 */ 903 if (!dmu_objset_is_snapshot(os)) 904 zfs_unregister_callbacks(zfsvfs); 905 906 /* 907 * Switch zfs_inactive to behaviour without an objset. 908 * It just tosses cached pages and frees the znode & vnode. 909 * Then re-enable zfs_inactive threads in that new behaviour. 910 */ 911 zfsvfs->z_unmounted2 = B_TRUE; 912 rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ 913 914 /* 915 * Close the zil. Can't close the zil while zfs_inactive 916 * threads are blocked as zil_close can call zfs_inactive. 917 */ 918 if (zfsvfs->z_log) { 919 zil_close(zfsvfs->z_log); 920 zfsvfs->z_log = NULL; 921 } 922 923 /* 924 * Evict all dbufs so that cached znodes will be freed 925 */ 926 if (dmu_objset_evict_dbufs(os, 1)) { 927 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 928 (void) dmu_objset_evict_dbufs(os, 0); 929 } 930 931 /* 932 * Finally close the objset 933 */ 934 dmu_objset_close(os); 935} 936 937static void 938zfs_freevfs(vfs_t *vfsp) 939{ 940 zfsvfs_t *zfsvfs = vfsp->vfs_data; 941 int i; 942 943 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 944 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 945 rw_destroy(&zfsvfs->z_um_lock); 946 mutex_destroy(&zfsvfs->z_znodes_lock); 947 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 948 949 atomic_add_32(&zfs_active_fs_count, -1); 950} 951 952void 953zfs_init(void) 954{ 955 956 printf("ZFS filesystem version " ZFS_VERSION_STRING "\n"); 957 958 /* 959 * Initialize .zfs directory structures 960 */ 961 zfsctl_init(); 962 963 /* 964 * Initialize znode cache, vnode ops, etc... 965 */ 966 zfs_znode_init(); 967} 968 969void 970zfs_fini(void) 971{ 972 zfsctl_fini(); 973 zfs_znode_fini(); 974} 975 976int 977zfs_busy(void) 978{ 979 return (zfs_active_fs_count != 0); 980} 981