zfs_vfsops.c revision 168962
190075Sobrien/* 2169689Skan * CDDL HEADER START 390075Sobrien * 490075Sobrien * The contents of this file are subject to the terms of the 5169689Skan * Common Development and Distribution License (the "License"). 690075Sobrien * You may not use this file except in compliance with the License. 790075Sobrien * 890075Sobrien * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 990075Sobrien * or http://www.opensolaris.org/os/licensing. 1090075Sobrien * See the License for the specific language governing permissions 1190075Sobrien * and limitations under the License. 1290075Sobrien * 1390075Sobrien * When distributing Covered Code, include this CDDL HEADER in each 1490075Sobrien * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1590075Sobrien * If applicable, add the following below this CDDL HEADER, with the 1690075Sobrien * fields enclosed by brackets "[]" replaced with your own identifying 1790075Sobrien * information: Portions Copyright [yyyy] [name of copyright owner] 1890075Sobrien * 1990075Sobrien * CDDL HEADER END 2090075Sobrien */ 2190075Sobrien/* 22169689Skan * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23169689Skan * Use is subject to license terms. 2490075Sobrien */ 25169689Skan 26169689Skan#pragma ident "%Z%%M% %I% %E% SMI" 2790075Sobrien 2890075Sobrien#include <sys/types.h> 2990075Sobrien#include <sys/param.h> 3090075Sobrien#include <sys/systm.h> 3190075Sobrien#include <sys/kernel.h> 3290075Sobrien#include <sys/sysmacros.h> 33169689Skan#include <sys/kmem.h> 3490075Sobrien#include <sys/acl.h> 3590075Sobrien#include <sys/vnode.h> 3690075Sobrien#include <sys/vfs.h> 3790075Sobrien#include <sys/mntent.h> 3890075Sobrien#include <sys/mount.h> 3990075Sobrien#include <sys/cmn_err.h> 40#include <sys/zfs_znode.h> 41#include <sys/zfs_dir.h> 42#include <sys/zil.h> 43#include <sys/fs/zfs.h> 44#include <sys/dmu.h> 45#include <sys/dsl_prop.h> 46#include <sys/dsl_dataset.h> 47#include <sys/spa.h> 48#include <sys/zap.h> 49#include <sys/varargs.h> 50#include <sys/policy.h> 51#include <sys/atomic.h> 52#include <sys/zfs_ioctl.h> 53#include <sys/zfs_ctldir.h> 54#include <sys/sunddi.h> 55#include <sys/dnlc.h> 56 57struct mtx atomic_mtx; 58MTX_SYSINIT(atomic, &atomic_mtx, "atomic", MTX_DEF); 59 60struct mtx zfs_debug_mtx; 61MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 62SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 63int zfs_debug_level = 0; 64TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 65SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 66 "Debug level"); 67 68static int zfs_mount(vfs_t *vfsp, kthread_t *td); 69static int zfs_umount(vfs_t *vfsp, int fflag, kthread_t *td); 70static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp, kthread_t *td); 71static int zfs_statfs(vfs_t *vfsp, struct statfs *statp, kthread_t *td); 72static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 73static int zfs_sync(vfs_t *vfsp, int waitfor, kthread_t *td); 74static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 75static void zfs_objset_close(zfsvfs_t *zfsvfs); 76static void zfs_freevfs(vfs_t *vfsp); 77 78static struct vfsops zfs_vfsops = { 79 .vfs_mount = zfs_mount, 80 .vfs_unmount = zfs_umount, 81 .vfs_root = zfs_root, 82 .vfs_statfs = zfs_statfs, 83 .vfs_vget = zfs_vget, 84 .vfs_sync = zfs_sync, 85 .vfs_fhtovp = zfs_fhtovp, 86}; 87 88VFS_SET(zfs_vfsops, zfs, VFCF_JAIL); 89 90/* 91 * We need to keep a count of active fs's. 92 * This is necessary to prevent our module 93 * from being unloaded after a umount -f 94 */ 95static uint32_t zfs_active_fs_count = 0; 96 97/*ARGSUSED*/ 98static int 99zfs_sync(vfs_t *vfsp, int waitfor, kthread_t *td) 100{ 101 102 /* 103 * Data integrity is job one. We don't want a compromised kernel 104 * writing to the storage pool, so we never sync during panic. 105 */ 106 if (panicstr) 107 return (0); 108 109 if (vfsp != NULL) { 110 /* 111 * Sync a specific filesystem. 112 */ 113 zfsvfs_t *zfsvfs = vfsp->vfs_data; 114 int error; 115 116 error = vfs_stdsync(vfsp, waitfor, td); 117 if (error != 0) 118 return (error); 119 120 ZFS_ENTER(zfsvfs); 121 if (zfsvfs->z_log != NULL) 122 zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 123 else 124 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 125 ZFS_EXIT(zfsvfs); 126 } else { 127 /* 128 * Sync all ZFS filesystems. This is what happens when you 129 * run sync(1M). Unlike other filesystems, ZFS honors the 130 * request by waiting for all pools to commit all dirty data. 131 */ 132 spa_sync_allpools(); 133 } 134 135 return (0); 136} 137 138static void 139atime_changed_cb(void *arg, uint64_t newval) 140{ 141 zfsvfs_t *zfsvfs = arg; 142 143 if (newval == TRUE) { 144 zfsvfs->z_atime = TRUE; 145 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 146 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 147 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 148 } else { 149 zfsvfs->z_atime = FALSE; 150 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 151 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 152 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 153 } 154} 155 156static void 157xattr_changed_cb(void *arg, uint64_t newval) 158{ 159 zfsvfs_t *zfsvfs = arg; 160 161 if (newval == TRUE) { 162 /* XXX locking on vfs_flag? */ 163#ifdef TODO 164 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 165#endif 166 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 167 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 168 } else { 169 /* XXX locking on vfs_flag? */ 170#ifdef TODO 171 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 172#endif 173 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 174 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 175 } 176} 177 178static void 179blksz_changed_cb(void *arg, uint64_t newval) 180{ 181 zfsvfs_t *zfsvfs = arg; 182 183 if (newval < SPA_MINBLOCKSIZE || 184 newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 185 newval = SPA_MAXBLOCKSIZE; 186 187 zfsvfs->z_max_blksz = newval; 188 zfsvfs->z_vfs->vfs_bsize = newval; 189} 190 191static void 192readonly_changed_cb(void *arg, uint64_t newval) 193{ 194 zfsvfs_t *zfsvfs = arg; 195 196 if (newval) { 197 /* XXX locking on vfs_flag? */ 198 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 199 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 200 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 201 } else { 202 /* XXX locking on vfs_flag? */ 203 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 204 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 205 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 206 } 207} 208 209static void 210setuid_changed_cb(void *arg, uint64_t newval) 211{ 212 zfsvfs_t *zfsvfs = arg; 213 214 if (newval == FALSE) { 215 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 216 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 217 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 218 } else { 219 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 220 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 221 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 222 } 223} 224 225static void 226exec_changed_cb(void *arg, uint64_t newval) 227{ 228 zfsvfs_t *zfsvfs = arg; 229 230 if (newval == FALSE) { 231 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 232 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 233 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 234 } else { 235 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 236 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 237 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 238 } 239} 240 241static void 242snapdir_changed_cb(void *arg, uint64_t newval) 243{ 244 zfsvfs_t *zfsvfs = arg; 245 246 zfsvfs->z_show_ctldir = newval; 247} 248 249static void 250acl_mode_changed_cb(void *arg, uint64_t newval) 251{ 252 zfsvfs_t *zfsvfs = arg; 253 254 zfsvfs->z_acl_mode = newval; 255} 256 257static void 258acl_inherit_changed_cb(void *arg, uint64_t newval) 259{ 260 zfsvfs_t *zfsvfs = arg; 261 262 zfsvfs->z_acl_inherit = newval; 263} 264 265static int 266zfs_refresh_properties(vfs_t *vfsp) 267{ 268 zfsvfs_t *zfsvfs = vfsp->vfs_data; 269 270 /* 271 * Remount operations default to "rw" unless "ro" is explicitly 272 * specified. 273 */ 274 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 275 readonly_changed_cb(zfsvfs, B_TRUE); 276 } else { 277 if (!dmu_objset_is_snapshot(zfsvfs->z_os)) 278 readonly_changed_cb(zfsvfs, B_FALSE); 279 else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 280 return (EROFS); 281 } 282 283 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 284 setuid_changed_cb(zfsvfs, B_FALSE); 285 } else { 286 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 287 setuid_changed_cb(zfsvfs, B_FALSE); 288 else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 289 setuid_changed_cb(zfsvfs, B_TRUE); 290 } 291 292 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 293 exec_changed_cb(zfsvfs, B_FALSE); 294 else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 295 exec_changed_cb(zfsvfs, B_TRUE); 296 297 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 298 atime_changed_cb(zfsvfs, B_TRUE); 299 else if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 300 atime_changed_cb(zfsvfs, B_FALSE); 301 302 if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 303 xattr_changed_cb(zfsvfs, B_TRUE); 304 else if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) 305 xattr_changed_cb(zfsvfs, B_FALSE); 306 307 return (0); 308} 309 310static int 311zfs_register_callbacks(vfs_t *vfsp) 312{ 313 struct dsl_dataset *ds = NULL; 314 objset_t *os = NULL; 315 zfsvfs_t *zfsvfs = NULL; 316 int readonly, do_readonly = FALSE; 317 int setuid, do_setuid = FALSE; 318 int exec, do_exec = FALSE; 319 int xattr, do_xattr = FALSE; 320 int error = 0; 321 322 ASSERT(vfsp); 323 zfsvfs = vfsp->vfs_data; 324 ASSERT(zfsvfs); 325 os = zfsvfs->z_os; 326 327 /* 328 * The act of registering our callbacks will destroy any mount 329 * options we may have. In order to enable temporary overrides 330 * of mount options, we stash away the current values and 331 * restore them after we register the callbacks. 332 */ 333 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 334 readonly = B_TRUE; 335 do_readonly = B_TRUE; 336 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 337 readonly = B_FALSE; 338 do_readonly = B_TRUE; 339 } 340 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 341 setuid = B_FALSE; 342 do_setuid = B_TRUE; 343 } else { 344 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 345 setuid = B_FALSE; 346 do_setuid = B_TRUE; 347 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 348 setuid = B_TRUE; 349 do_setuid = B_TRUE; 350 } 351 } 352 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 353 exec = B_FALSE; 354 do_exec = B_TRUE; 355 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 356 exec = B_TRUE; 357 do_exec = B_TRUE; 358 } 359 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 360 xattr = B_FALSE; 361 do_xattr = B_TRUE; 362 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 363 xattr = B_TRUE; 364 do_xattr = B_TRUE; 365 } 366 367 /* 368 * Register property callbacks. 369 * 370 * It would probably be fine to just check for i/o error from 371 * the first prop_register(), but I guess I like to go 372 * overboard... 373 */ 374 ds = dmu_objset_ds(os); 375 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 376 error = error ? error : dsl_prop_register(ds, 377 "xattr", xattr_changed_cb, zfsvfs); 378 error = error ? error : dsl_prop_register(ds, 379 "recordsize", blksz_changed_cb, zfsvfs); 380 error = error ? error : dsl_prop_register(ds, 381 "readonly", readonly_changed_cb, zfsvfs); 382 error = error ? error : dsl_prop_register(ds, 383 "setuid", setuid_changed_cb, zfsvfs); 384 error = error ? error : dsl_prop_register(ds, 385 "exec", exec_changed_cb, zfsvfs); 386 error = error ? error : dsl_prop_register(ds, 387 "snapdir", snapdir_changed_cb, zfsvfs); 388 error = error ? error : dsl_prop_register(ds, 389 "aclmode", acl_mode_changed_cb, zfsvfs); 390 error = error ? error : dsl_prop_register(ds, 391 "aclinherit", acl_inherit_changed_cb, zfsvfs); 392 if (error) 393 goto unregister; 394 395 /* 396 * Invoke our callbacks to restore temporary mount options. 397 */ 398 if (do_readonly) 399 readonly_changed_cb(zfsvfs, readonly); 400 if (do_setuid) 401 setuid_changed_cb(zfsvfs, setuid); 402 if (do_exec) 403 exec_changed_cb(zfsvfs, exec); 404 if (do_xattr) 405 xattr_changed_cb(zfsvfs, xattr); 406 407 return (0); 408 409unregister: 410 /* 411 * We may attempt to unregister some callbacks that are not 412 * registered, but this is OK; it will simply return ENOMSG, 413 * which we will ignore. 414 */ 415 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 416 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 417 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 418 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 419 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 420 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 421 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 422 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 423 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 424 zfsvfs); 425 return (error); 426 427} 428 429static int 430zfs_domount(vfs_t *vfsp, char *osname, kthread_t *td) 431{ 432 cred_t *cr = td->td_ucred; 433 uint64_t recordsize, readonly; 434 int error = 0; 435 int mode; 436 zfsvfs_t *zfsvfs; 437 znode_t *zp = NULL; 438 439 ASSERT(vfsp); 440 ASSERT(osname); 441 442 /* 443 * Initialize the zfs-specific filesystem structure. 444 * Should probably make this a kmem cache, shuffle fields, 445 * and just bzero up to z_hold_mtx[]. 446 */ 447 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 448 zfsvfs->z_vfs = vfsp; 449 zfsvfs->z_parent = zfsvfs; 450 zfsvfs->z_assign = TXG_NOWAIT; 451 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 452 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 453 454 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 455 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 456 offsetof(znode_t, z_link_node)); 457 rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); 458 459 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 460 NULL)) 461 goto out; 462 zfsvfs->z_vfs->vfs_bsize = recordsize; 463 464 vfsp->vfs_data = zfsvfs; 465 vfsp->mnt_flag |= MNT_LOCAL; 466 vfsp->mnt_kern_flag |= MNTK_MPSAFE; 467 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 468 469 if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 470 goto out; 471 472 if (readonly) 473 mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 474 else 475 mode = DS_MODE_PRIMARY; 476 477 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 478 if (error == EROFS) { 479 mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 480 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 481 &zfsvfs->z_os); 482 } 483 484 if (error) 485 goto out; 486 487 if (error = zfs_init_fs(zfsvfs, &zp, cr)) 488 goto out; 489 490 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 491 uint64_t xattr; 492 493 ASSERT(mode & DS_MODE_READONLY); 494 atime_changed_cb(zfsvfs, B_FALSE); 495 readonly_changed_cb(zfsvfs, B_TRUE); 496 if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 497 goto out; 498 xattr_changed_cb(zfsvfs, xattr); 499 zfsvfs->z_issnap = B_TRUE; 500 } else { 501 error = zfs_register_callbacks(vfsp); 502 if (error) 503 goto out; 504 505 zfs_unlinked_drain(zfsvfs); 506 507 /* 508 * Parse and replay the intent log. 509 */ 510 zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 511 zfs_replay_vector); 512 513 if (!zil_disable) 514 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 515 } 516 517 vfs_mountedfrom(vfsp, osname); 518 519 if (!zfsvfs->z_issnap) 520 zfsctl_create(zfsvfs); 521out: 522 if (error) { 523 if (zfsvfs->z_os) 524 dmu_objset_close(zfsvfs->z_os); 525 rw_destroy(&zfsvfs->z_um_lock); 526 mutex_destroy(&zfsvfs->z_znodes_lock); 527 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 528 } else { 529 atomic_add_32(&zfs_active_fs_count, 1); 530 } 531 532 return (error); 533 534} 535 536void 537zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 538{ 539 objset_t *os = zfsvfs->z_os; 540 struct dsl_dataset *ds; 541 542 /* 543 * Unregister properties. 544 */ 545 if (!dmu_objset_is_snapshot(os)) { 546 ds = dmu_objset_ds(os); 547 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 548 zfsvfs) == 0); 549 550 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 551 zfsvfs) == 0); 552 553 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 554 zfsvfs) == 0); 555 556 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 557 zfsvfs) == 0); 558 559 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 560 zfsvfs) == 0); 561 562 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 563 zfsvfs) == 0); 564 565 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 566 zfsvfs) == 0); 567 568 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 569 zfsvfs) == 0); 570 571 VERIFY(dsl_prop_unregister(ds, "aclinherit", 572 acl_inherit_changed_cb, zfsvfs) == 0); 573 } 574} 575 576/*ARGSUSED*/ 577static int 578zfs_mount(vfs_t *vfsp, kthread_t *td) 579{ 580 char *from; 581 int error; 582 583 /* 584 * When doing a remount, we simply refresh our temporary properties 585 * according to those options set in the current VFS options. 586 */ 587 if (vfsp->vfs_flag & MS_REMOUNT) 588 return (zfs_refresh_properties(vfsp)); 589 590 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&from, NULL)) 591 return (EINVAL); 592 593 DROP_GIANT(); 594 error = zfs_domount(vfsp, from, td); 595 PICKUP_GIANT(); 596 return (error); 597} 598 599static int 600zfs_statfs(vfs_t *vfsp, struct statfs *statp, kthread_t *td) 601{ 602 zfsvfs_t *zfsvfs = vfsp->vfs_data; 603 uint64_t refdbytes, availbytes, usedobjs, availobjs; 604 605 statp->f_version = STATFS_VERSION; 606 607 ZFS_ENTER(zfsvfs); 608 609 dmu_objset_space(zfsvfs->z_os, 610 &refdbytes, &availbytes, &usedobjs, &availobjs); 611 612 /* 613 * The underlying storage pool actually uses multiple block sizes. 614 * We report the fragsize as the smallest block size we support, 615 * and we report our blocksize as the filesystem's maximum blocksize. 616 */ 617 statp->f_bsize = zfsvfs->z_vfs->vfs_bsize; 618 statp->f_iosize = zfsvfs->z_vfs->vfs_bsize; 619 620 /* 621 * The following report "total" blocks of various kinds in the 622 * file system, but reported in terms of f_frsize - the 623 * "fragment" size. 624 */ 625 626 statp->f_blocks = (refdbytes + availbytes) / statp->f_bsize; 627 statp->f_bfree = availbytes / statp->f_bsize; 628 statp->f_bavail = statp->f_bfree; /* no root reservation */ 629 630 /* 631 * statvfs() should really be called statufs(), because it assumes 632 * static metadata. ZFS doesn't preallocate files, so the best 633 * we can do is report the max that could possibly fit in f_files, 634 * and that minus the number actually used in f_ffree. 635 * For f_ffree, report the smaller of the number of object available 636 * and the number of blocks (each object will take at least a block). 637 */ 638 statp->f_ffree = MIN(availobjs, statp->f_bfree); 639 statp->f_files = statp->f_ffree + usedobjs; 640 641 /* 642 * We're a zfs filesystem. 643 */ 644 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 645 646 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 647 sizeof(statp->f_mntfromname)); 648 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 649 sizeof(statp->f_mntonname)); 650 651 statp->f_namemax = ZFS_MAXNAMELEN; 652 653 ZFS_EXIT(zfsvfs); 654 return (0); 655} 656 657static int 658zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp, kthread_t *td) 659{ 660 zfsvfs_t *zfsvfs = vfsp->vfs_data; 661 znode_t *rootzp; 662 int error; 663 664 ZFS_ENTER(zfsvfs); 665 666 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 667 if (error == 0) { 668 *vpp = ZTOV(rootzp); 669 error = vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 670 (*vpp)->v_vflag |= VV_ROOT; 671 } 672 673 ZFS_EXIT(zfsvfs); 674 return (error); 675} 676 677/*ARGSUSED*/ 678static int 679zfs_umount(vfs_t *vfsp, int fflag, kthread_t *td) 680{ 681 zfsvfs_t *zfsvfs = vfsp->vfs_data; 682 cred_t *cr = td->td_ucred; 683 int ret; 684 685 if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0) 686 return (ret); 687 688 (void) dnlc_purge_vfsp(vfsp, 0); 689 690 /* 691 * Unmount any snapshots mounted under .zfs before unmounting the 692 * dataset itself. 693 */ 694 if (zfsvfs->z_ctldir != NULL) { 695 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 696 return (ret); 697 ret = vflush(vfsp, 0, 0, td); 698 ASSERT(ret == EBUSY); 699 if (!(fflag & MS_FORCE)) { 700 if (zfsvfs->z_ctldir->v_count > 1) 701 return (EBUSY); 702 ASSERT(zfsvfs->z_ctldir->v_count == 1); 703 } 704 zfsctl_destroy(zfsvfs); 705 ASSERT(zfsvfs->z_ctldir == NULL); 706 } 707 708 /* 709 * Flush all the files. 710 */ 711 ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 712 if (ret != 0) { 713 if (!zfsvfs->z_issnap) { 714 zfsctl_create(zfsvfs); 715 ASSERT(zfsvfs->z_ctldir != NULL); 716 } 717 return (ret); 718 } 719 720 if (fflag & MS_FORCE) { 721 MNT_ILOCK(vfsp); 722 vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 723 MNT_IUNLOCK(vfsp); 724 zfsvfs->z_unmounted1 = B_TRUE; 725 726 /* 727 * Wait for all zfs threads to leave zfs. 728 * Grabbing a rwlock as reader in all vops and 729 * as writer here doesn't work because it too easy to get 730 * multiple reader enters as zfs can re-enter itself. 731 * This can lead to deadlock if there is an intervening 732 * rw_enter as writer. 733 * So a file system threads ref count (z_op_cnt) is used. 734 * A polling loop on z_op_cnt may seem inefficient, but 735 * - this saves all threads on exit from having to grab a 736 * mutex in order to cv_signal 737 * - only occurs on forced unmount in the rare case when 738 * there are outstanding threads within the file system. 739 */ 740 while (zfsvfs->z_op_cnt) { 741 delay(1); 742 } 743 } 744 745 zfs_objset_close(zfsvfs); 746 VFS_RELE(vfsp); 747 zfs_freevfs(vfsp); 748 749 return (0); 750} 751 752static int 753zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 754{ 755 zfsvfs_t *zfsvfs = vfsp->vfs_data; 756 znode_t *zp; 757 int err; 758 759 ZFS_ENTER(zfsvfs); 760 err = zfs_zget(zfsvfs, ino, &zp); 761 if (err == 0 && zp->z_unlinked) { 762 VN_RELE(ZTOV(zp)); 763 err = EINVAL; 764 } 765 if (err != 0) 766 *vpp = NULL; 767 else { 768 *vpp = ZTOV(zp); 769 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, curthread); 770 } 771 ZFS_EXIT(zfsvfs); 772 return (0); 773} 774 775static int 776zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 777{ 778 kthread_t *td = curthread; 779 zfsvfs_t *zfsvfs = vfsp->vfs_data; 780 znode_t *zp; 781 uint64_t object = 0; 782 uint64_t fid_gen = 0; 783 uint64_t gen_mask; 784 uint64_t zp_gen; 785 int i, err; 786 787 *vpp = NULL; 788 789 ZFS_ENTER(zfsvfs); 790 791 if (fidp->fid_len == LONG_FID_LEN) { 792 zfid_long_t *zlfid = (zfid_long_t *)fidp; 793 uint64_t objsetid = 0; 794 uint64_t setgen = 0; 795 796 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 797 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 798 799 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 800 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 801 802 ZFS_EXIT(zfsvfs); 803 804 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 805 if (err) 806 return (EINVAL); 807 ZFS_ENTER(zfsvfs); 808 } 809 810 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 811 zfid_short_t *zfid = (zfid_short_t *)fidp; 812 813 for (i = 0; i < sizeof (zfid->zf_object); i++) 814 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 815 816 for (i = 0; i < sizeof (zfid->zf_gen); i++) 817 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 818 } else { 819 ZFS_EXIT(zfsvfs); 820 return (EINVAL); 821 } 822 823 /* A zero fid_gen means we are in the .zfs control directories */ 824 if (fid_gen == 0 && 825 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 826 *vpp = zfsvfs->z_ctldir; 827 ASSERT(*vpp != NULL); 828 if (object == ZFSCTL_INO_SNAPDIR) { 829 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 830 0, NULL, NULL) == 0); 831 } else { 832 VN_HOLD(*vpp); 833 } 834 ZFS_EXIT(zfsvfs); 835 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 836 return (0); 837 } 838 839 gen_mask = -1ULL >> (64 - 8 * i); 840 841 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 842 if (err = zfs_zget(zfsvfs, object, &zp)) { 843 ZFS_EXIT(zfsvfs); 844 return (err); 845 } 846 zp_gen = zp->z_phys->zp_gen & gen_mask; 847 if (zp_gen == 0) 848 zp_gen = 1; 849 if (zp->z_unlinked || zp_gen != fid_gen) { 850 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 851 VN_RELE(ZTOV(zp)); 852 ZFS_EXIT(zfsvfs); 853 return (EINVAL); 854 } 855 856 *vpp = ZTOV(zp); 857 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 858 vnode_create_vobject(*vpp, zp->z_phys->zp_size, td); 859 ZFS_EXIT(zfsvfs); 860 return (0); 861} 862 863static void 864zfs_objset_close(zfsvfs_t *zfsvfs) 865{ 866 znode_t *zp, *nextzp; 867 objset_t *os = zfsvfs->z_os; 868 869 /* 870 * For forced unmount, at this point all vops except zfs_inactive 871 * are erroring EIO. We need to now suspend zfs_inactive threads 872 * while we are freeing dbufs before switching zfs_inactive 873 * to use behaviour without a objset. 874 */ 875 rw_enter(&zfsvfs->z_um_lock, RW_WRITER); 876 877 /* 878 * Release all holds on dbufs 879 * Note, although we have stopped all other vop threads and 880 * zfs_inactive(), the dmu can callback via znode_pageout_func() 881 * which can zfs_znode_free() the znode. 882 * So we lock z_all_znodes; search the list for a held 883 * dbuf; drop the lock (we know zp can't disappear if we hold 884 * a dbuf lock; then regrab the lock and restart. 885 */ 886 mutex_enter(&zfsvfs->z_znodes_lock); 887 for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 888 nextzp = list_next(&zfsvfs->z_all_znodes, zp); 889 if (zp->z_dbuf_held) { 890 /* dbufs should only be held when force unmounting */ 891 zp->z_dbuf_held = 0; 892 mutex_exit(&zfsvfs->z_znodes_lock); 893 dmu_buf_rele(zp->z_dbuf, NULL); 894 /* Start again */ 895 mutex_enter(&zfsvfs->z_znodes_lock); 896 nextzp = list_head(&zfsvfs->z_all_znodes); 897 } 898 } 899 mutex_exit(&zfsvfs->z_znodes_lock); 900 901 /* 902 * Unregister properties. 903 */ 904 if (!dmu_objset_is_snapshot(os)) 905 zfs_unregister_callbacks(zfsvfs); 906 907 /* 908 * Switch zfs_inactive to behaviour without an objset. 909 * It just tosses cached pages and frees the znode & vnode. 910 * Then re-enable zfs_inactive threads in that new behaviour. 911 */ 912 zfsvfs->z_unmounted2 = B_TRUE; 913 rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ 914 915 /* 916 * Close the zil. Can't close the zil while zfs_inactive 917 * threads are blocked as zil_close can call zfs_inactive. 918 */ 919 if (zfsvfs->z_log) { 920 zil_close(zfsvfs->z_log); 921 zfsvfs->z_log = NULL; 922 } 923 924 /* 925 * Evict all dbufs so that cached znodes will be freed 926 */ 927 if (dmu_objset_evict_dbufs(os, 1)) { 928 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 929 (void) dmu_objset_evict_dbufs(os, 0); 930 } 931 932 /* 933 * Finally close the objset 934 */ 935 dmu_objset_close(os); 936} 937 938static void 939zfs_freevfs(vfs_t *vfsp) 940{ 941 zfsvfs_t *zfsvfs = vfsp->vfs_data; 942 int i; 943 944 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 945 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 946 rw_destroy(&zfsvfs->z_um_lock); 947 mutex_destroy(&zfsvfs->z_znodes_lock); 948 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 949 950 atomic_add_32(&zfs_active_fs_count, -1); 951} 952 953void 954zfs_init(void) 955{ 956 957 printf("ZFS filesystem version " ZFS_VERSION_STRING "\n"); 958 959 /* 960 * Initialize .zfs directory structures 961 */ 962 zfsctl_init(); 963 964 /* 965 * Initialize znode cache, vnode ops, etc... 966 */ 967 zfs_znode_init(); 968} 969 970void 971zfs_fini(void) 972{ 973 zfsctl_fini(); 974 zfs_znode_fini(); 975} 976 977int 978zfs_busy(void) 979{ 980 return (zfs_active_fs_count != 0); 981} 982