zfs_vfsops.c revision 168404
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#pragma ident "%Z%%M% %I% %E% SMI" 27 28#include <sys/types.h> 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/kernel.h> 32#include <sys/sysmacros.h> 33#include <sys/kmem.h> 34#include <sys/acl.h> 35#include <sys/vnode.h> 36#include <sys/vfs.h> 37#include <sys/mntent.h> 38#include <sys/mount.h> 39#include <sys/cmn_err.h> 40#include <sys/zfs_znode.h> 41#include <sys/zfs_dir.h> 42#include <sys/zil.h> 43#include <sys/fs/zfs.h> 44#include <sys/dmu.h> 45#include <sys/dsl_prop.h> 46#include <sys/dsl_dataset.h> 47#include <sys/spa.h> 48#include <sys/zap.h> 49#include <sys/varargs.h> 50#include <sys/atomic.h> 51#include <sys/zfs_ioctl.h> 52#include <sys/zfs_ctldir.h> 53#include <sys/dnlc.h> 54 55struct mtx atomic_mtx; 56MTX_SYSINIT(atomic, &atomic_mtx, "atomic", MTX_DEF); 57 58struct mtx zfs_debug_mtx; 59MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 60SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 61int zfs_debug_level = 0; 62SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 63 "Debug level"); 64 65static int zfs_mount(vfs_t *vfsp, kthread_t *td); 66static int zfs_umount(vfs_t *vfsp, int fflag, kthread_t *td); 67static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp, kthread_t *td); 68static int zfs_statfs(vfs_t *vfsp, struct statfs *statp, kthread_t *td); 69static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 70static int zfs_sync(vfs_t *vfsp, int waitfor, kthread_t *td); 71static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 72static void zfs_objset_close(zfsvfs_t *zfsvfs); 73static void zfs_freevfs(vfs_t *vfsp); 74 75static struct vfsops zfs_vfsops = { 76 .vfs_mount = zfs_mount, 77 .vfs_unmount = zfs_umount, 78 .vfs_root = zfs_root, 79 .vfs_statfs = zfs_statfs, 80 .vfs_vget = zfs_vget, 81 .vfs_sync = zfs_sync, 82 .vfs_fhtovp = zfs_fhtovp, 83}; 84 85VFS_SET(zfs_vfsops, zfs, VFCF_JAIL); 86 87/* 88 * We need to keep a count of active fs's. 89 * This is necessary to prevent our module 90 * from being unloaded after a umount -f 91 */ 92static uint32_t zfs_active_fs_count = 0; 93 94/*ARGSUSED*/ 95static int 96zfs_sync(vfs_t *vfsp, int waitfor, kthread_t *td) 97{ 98 99 /* 100 * Data integrity is job one. We don't want a compromised kernel 101 * writing to the storage pool, so we never sync during panic. 102 */ 103 if (panicstr) 104 return (0); 105 106 if (vfsp != NULL) { 107 /* 108 * Sync a specific filesystem. 109 */ 110 zfsvfs_t *zfsvfs = vfsp->vfs_data; 111 int error; 112 113 error = vfs_stdsync(vfsp, waitfor, td); 114 if (error != 0) 115 return (error); 116 117 ZFS_ENTER(zfsvfs); 118 if (zfsvfs->z_log != NULL) 119 zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 120 else 121 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 122 ZFS_EXIT(zfsvfs); 123 } else { 124 /* 125 * Sync all ZFS filesystems. This is what happens when you 126 * run sync(1M). Unlike other filesystems, ZFS honors the 127 * request by waiting for all pools to commit all dirty data. 128 */ 129 spa_sync_allpools(); 130 } 131 132 return (0); 133} 134 135static void 136atime_changed_cb(void *arg, uint64_t newval) 137{ 138 zfsvfs_t *zfsvfs = arg; 139 140 if (newval == TRUE) { 141 zfsvfs->z_atime = TRUE; 142 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 143 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 144 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 145 } else { 146 zfsvfs->z_atime = FALSE; 147 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 148 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 149 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 150 } 151} 152 153static void 154xattr_changed_cb(void *arg, uint64_t newval) 155{ 156 zfsvfs_t *zfsvfs = arg; 157 158 if (newval == TRUE) { 159 /* XXX locking on vfs_flag? */ 160#ifdef TODO 161 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 162#endif 163 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 164 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 165 } else { 166 /* XXX locking on vfs_flag? */ 167#ifdef TODO 168 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 169#endif 170 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 171 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 172 } 173} 174 175static void 176blksz_changed_cb(void *arg, uint64_t newval) 177{ 178 zfsvfs_t *zfsvfs = arg; 179 180 if (newval < SPA_MINBLOCKSIZE || 181 newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 182 newval = SPA_MAXBLOCKSIZE; 183 184 zfsvfs->z_max_blksz = newval; 185 zfsvfs->z_vfs->vfs_bsize = newval; 186} 187 188static void 189readonly_changed_cb(void *arg, uint64_t newval) 190{ 191 zfsvfs_t *zfsvfs = arg; 192 193 if (newval) { 194 /* XXX locking on vfs_flag? */ 195 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 196 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 197 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 198 } else { 199 /* XXX locking on vfs_flag? */ 200 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 201 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 202 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 203 } 204} 205 206static void 207setuid_changed_cb(void *arg, uint64_t newval) 208{ 209 zfsvfs_t *zfsvfs = arg; 210 211 if (newval == FALSE) { 212 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 213 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 214 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 215 } else { 216 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 217 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 218 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 219 } 220} 221 222static void 223exec_changed_cb(void *arg, uint64_t newval) 224{ 225 zfsvfs_t *zfsvfs = arg; 226 227 if (newval == FALSE) { 228 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 229 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 230 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 231 } else { 232 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 233 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 234 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 235 } 236} 237 238static void 239snapdir_changed_cb(void *arg, uint64_t newval) 240{ 241 zfsvfs_t *zfsvfs = arg; 242 243 zfsvfs->z_show_ctldir = newval; 244} 245 246static void 247acl_mode_changed_cb(void *arg, uint64_t newval) 248{ 249 zfsvfs_t *zfsvfs = arg; 250 251 zfsvfs->z_acl_mode = newval; 252} 253 254static void 255acl_inherit_changed_cb(void *arg, uint64_t newval) 256{ 257 zfsvfs_t *zfsvfs = arg; 258 259 zfsvfs->z_acl_inherit = newval; 260} 261 262static int 263zfs_refresh_properties(vfs_t *vfsp) 264{ 265 zfsvfs_t *zfsvfs = vfsp->vfs_data; 266 267 /* 268 * Remount operations default to "rw" unless "ro" is explicitly 269 * specified. 270 */ 271 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 272 readonly_changed_cb(zfsvfs, B_TRUE); 273 } else { 274 if (!dmu_objset_is_snapshot(zfsvfs->z_os)) 275 readonly_changed_cb(zfsvfs, B_FALSE); 276 else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 277 return (EROFS); 278 } 279 280 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 281 setuid_changed_cb(zfsvfs, B_FALSE); 282 } else { 283 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 284 setuid_changed_cb(zfsvfs, B_FALSE); 285 else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 286 setuid_changed_cb(zfsvfs, B_TRUE); 287 } 288 289 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 290 exec_changed_cb(zfsvfs, B_FALSE); 291 else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 292 exec_changed_cb(zfsvfs, B_TRUE); 293 294 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 295 atime_changed_cb(zfsvfs, B_TRUE); 296 else if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 297 atime_changed_cb(zfsvfs, B_FALSE); 298 299 if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 300 xattr_changed_cb(zfsvfs, B_TRUE); 301 else if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) 302 xattr_changed_cb(zfsvfs, B_FALSE); 303 304 return (0); 305} 306 307static int 308zfs_register_callbacks(vfs_t *vfsp) 309{ 310 struct dsl_dataset *ds = NULL; 311 objset_t *os = NULL; 312 zfsvfs_t *zfsvfs = NULL; 313 int readonly, do_readonly = FALSE; 314 int setuid, do_setuid = FALSE; 315 int exec, do_exec = FALSE; 316 int xattr, do_xattr = FALSE; 317 int error = 0; 318 319 ASSERT(vfsp); 320 zfsvfs = vfsp->vfs_data; 321 ASSERT(zfsvfs); 322 os = zfsvfs->z_os; 323 324 /* 325 * The act of registering our callbacks will destroy any mount 326 * options we may have. In order to enable temporary overrides 327 * of mount options, we stash away the current values and 328 * restore them after we register the callbacks. 329 */ 330 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 331 readonly = B_TRUE; 332 do_readonly = B_TRUE; 333 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 334 readonly = B_FALSE; 335 do_readonly = B_TRUE; 336 } 337 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 338 setuid = B_FALSE; 339 do_setuid = B_TRUE; 340 } else { 341 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 342 setuid = B_FALSE; 343 do_setuid = B_TRUE; 344 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 345 setuid = B_TRUE; 346 do_setuid = B_TRUE; 347 } 348 } 349 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 350 exec = B_FALSE; 351 do_exec = B_TRUE; 352 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 353 exec = B_TRUE; 354 do_exec = B_TRUE; 355 } 356 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 357 xattr = B_FALSE; 358 do_xattr = B_TRUE; 359 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 360 xattr = B_TRUE; 361 do_xattr = B_TRUE; 362 } 363 364 /* 365 * Register property callbacks. 366 * 367 * It would probably be fine to just check for i/o error from 368 * the first prop_register(), but I guess I like to go 369 * overboard... 370 */ 371 ds = dmu_objset_ds(os); 372 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 373 error = error ? error : dsl_prop_register(ds, 374 "xattr", xattr_changed_cb, zfsvfs); 375 error = error ? error : dsl_prop_register(ds, 376 "recordsize", blksz_changed_cb, zfsvfs); 377 error = error ? error : dsl_prop_register(ds, 378 "readonly", readonly_changed_cb, zfsvfs); 379 error = error ? error : dsl_prop_register(ds, 380 "setuid", setuid_changed_cb, zfsvfs); 381 error = error ? error : dsl_prop_register(ds, 382 "exec", exec_changed_cb, zfsvfs); 383 error = error ? error : dsl_prop_register(ds, 384 "snapdir", snapdir_changed_cb, zfsvfs); 385 error = error ? error : dsl_prop_register(ds, 386 "aclmode", acl_mode_changed_cb, zfsvfs); 387 error = error ? error : dsl_prop_register(ds, 388 "aclinherit", acl_inherit_changed_cb, zfsvfs); 389 if (error) 390 goto unregister; 391 392 /* 393 * Invoke our callbacks to restore temporary mount options. 394 */ 395 if (do_readonly) 396 readonly_changed_cb(zfsvfs, readonly); 397 if (do_setuid) 398 setuid_changed_cb(zfsvfs, setuid); 399 if (do_exec) 400 exec_changed_cb(zfsvfs, exec); 401 if (do_xattr) 402 xattr_changed_cb(zfsvfs, xattr); 403 404 return (0); 405 406unregister: 407 /* 408 * We may attempt to unregister some callbacks that are not 409 * registered, but this is OK; it will simply return ENOMSG, 410 * which we will ignore. 411 */ 412 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 413 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 414 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 415 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 416 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 417 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 418 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 419 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 420 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 421 zfsvfs); 422 return (error); 423 424} 425 426static int 427zfs_domount(vfs_t *vfsp, char *osname, kthread_t *td) 428{ 429 cred_t *cr = td->td_ucred; 430 uint64_t recordsize, readonly; 431 int error = 0; 432 int mode; 433 zfsvfs_t *zfsvfs; 434 znode_t *zp = NULL; 435 436 ASSERT(vfsp); 437 ASSERT(osname); 438 439 /* 440 * Initialize the zfs-specific filesystem structure. 441 * Should probably make this a kmem cache, shuffle fields, 442 * and just bzero up to z_hold_mtx[]. 443 */ 444 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 445 zfsvfs->z_vfs = vfsp; 446 zfsvfs->z_parent = zfsvfs; 447 zfsvfs->z_assign = TXG_NOWAIT; 448 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 449 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 450 451 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 452 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 453 offsetof(znode_t, z_link_node)); 454 rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); 455 456 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 457 NULL)) 458 goto out; 459 zfsvfs->z_vfs->vfs_bsize = recordsize; 460 461 vfsp->vfs_data = zfsvfs; 462 vfsp->mnt_flag |= MNT_LOCAL; 463 vfsp->mnt_kern_flag |= MNTK_MPSAFE; 464 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 465 466 if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 467 goto out; 468 469 if (readonly) 470 mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 471 else 472 mode = DS_MODE_PRIMARY; 473 474 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 475 if (error == EROFS) { 476 /* 477 * FreeBSD: In Solaris there is DS_MODE_PRIMARY instead of 478 * DS_MODE_STANDARD, but it doesn't work on FreeBSD and 479 * I don't know why. It looks like the dataset is opened 480 * on mount DS_MODE_PRIMARY mode and snapshot cannot open 481 * the same dataset in DS_MODE_PRIMARY mode again. 482 */ 483 mode = DS_MODE_STANDARD | DS_MODE_READONLY; 484 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 485 &zfsvfs->z_os); 486 } 487 488 if (error) 489 goto out; 490 491 if (error = zfs_init_fs(zfsvfs, &zp, cr)) 492 goto out; 493 494 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 495 uint64_t xattr; 496 497 ASSERT(mode & DS_MODE_READONLY); 498 atime_changed_cb(zfsvfs, B_FALSE); 499 readonly_changed_cb(zfsvfs, B_TRUE); 500 if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 501 goto out; 502 xattr_changed_cb(zfsvfs, xattr); 503 zfsvfs->z_issnap = B_TRUE; 504 } else { 505 error = zfs_register_callbacks(vfsp); 506 if (error) 507 goto out; 508 509 zfs_unlinked_drain(zfsvfs); 510 511 /* 512 * Parse and replay the intent log. 513 */ 514 zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 515 zfs_replay_vector); 516 517 if (!zil_disable) 518 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 519 } 520 521 vfs_mountedfrom(vfsp, osname); 522 523 if (!zfsvfs->z_issnap) 524 zfsctl_create(zfsvfs); 525out: 526 if (error) { 527 if (zfsvfs->z_os) 528 dmu_objset_close(zfsvfs->z_os); 529 rw_destroy(&zfsvfs->z_um_lock); 530 mutex_destroy(&zfsvfs->z_znodes_lock); 531 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 532 } else { 533 atomic_add_32(&zfs_active_fs_count, 1); 534 } 535 536 return (error); 537 538} 539 540void 541zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 542{ 543 objset_t *os = zfsvfs->z_os; 544 struct dsl_dataset *ds; 545 546 /* 547 * Unregister properties. 548 */ 549 if (!dmu_objset_is_snapshot(os)) { 550 ds = dmu_objset_ds(os); 551 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 552 zfsvfs) == 0); 553 554 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 555 zfsvfs) == 0); 556 557 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 558 zfsvfs) == 0); 559 560 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 561 zfsvfs) == 0); 562 563 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 564 zfsvfs) == 0); 565 566 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 567 zfsvfs) == 0); 568 569 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 570 zfsvfs) == 0); 571 572 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 573 zfsvfs) == 0); 574 575 VERIFY(dsl_prop_unregister(ds, "aclinherit", 576 acl_inherit_changed_cb, zfsvfs) == 0); 577 } 578} 579 580/*ARGSUSED*/ 581static int 582zfs_mount(vfs_t *vfsp, kthread_t *td) 583{ 584 char *from; 585 int error; 586 587 /* TODO: For now deny user mounts. */ 588 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0) 589 return (error); 590 591 /* 592 * When doing a remount, we simply refresh our temporary properties 593 * according to those options set in the current VFS options. 594 */ 595 if (vfsp->vfs_flag & MS_REMOUNT) 596 return (zfs_refresh_properties(vfsp)); 597 598 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&from, NULL)) 599 return (EINVAL); 600 601 return (zfs_domount(vfsp, from, td)); 602} 603 604static int 605zfs_statfs(vfs_t *vfsp, struct statfs *statp, kthread_t *td) 606{ 607 zfsvfs_t *zfsvfs = vfsp->vfs_data; 608 uint64_t refdbytes, availbytes, usedobjs, availobjs; 609 610 statp->f_version = STATFS_VERSION; 611 612 ZFS_ENTER(zfsvfs); 613 614 dmu_objset_space(zfsvfs->z_os, 615 &refdbytes, &availbytes, &usedobjs, &availobjs); 616 617 /* 618 * The underlying storage pool actually uses multiple block sizes. 619 * We report the fragsize as the smallest block size we support, 620 * and we report our blocksize as the filesystem's maximum blocksize. 621 */ 622 statp->f_bsize = zfsvfs->z_vfs->vfs_bsize; 623 statp->f_iosize = zfsvfs->z_vfs->vfs_bsize; 624 625 /* 626 * The following report "total" blocks of various kinds in the 627 * file system, but reported in terms of f_frsize - the 628 * "fragment" size. 629 */ 630 631 statp->f_blocks = (refdbytes + availbytes) / statp->f_bsize; 632 statp->f_bfree = availbytes / statp->f_bsize; 633 statp->f_bavail = statp->f_bfree; /* no root reservation */ 634 635 /* 636 * statvfs() should really be called statufs(), because it assumes 637 * static metadata. ZFS doesn't preallocate files, so the best 638 * we can do is report the max that could possibly fit in f_files, 639 * and that minus the number actually used in f_ffree. 640 * For f_ffree, report the smaller of the number of object available 641 * and the number of blocks (each object will take at least a block). 642 */ 643 statp->f_ffree = MIN(availobjs, statp->f_bfree); 644 statp->f_files = statp->f_ffree + usedobjs; 645 646 /* 647 * We're a zfs filesystem. 648 */ 649 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 650 651 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 652 sizeof(statp->f_mntfromname)); 653 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 654 sizeof(statp->f_mntonname)); 655 656 statp->f_namemax = ZFS_MAXNAMELEN; 657 658 ZFS_EXIT(zfsvfs); 659 return (0); 660} 661 662static int 663zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp, kthread_t *td) 664{ 665 zfsvfs_t *zfsvfs = vfsp->vfs_data; 666 znode_t *rootzp; 667 int error; 668 669 ZFS_ENTER(zfsvfs); 670 671 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 672 if (error == 0) { 673 *vpp = ZTOV(rootzp); 674 error = vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 675 (*vpp)->v_vflag |= VV_ROOT; 676 } 677 678 ZFS_EXIT(zfsvfs); 679 return (error); 680} 681 682/*ARGSUSED*/ 683static int 684zfs_umount(vfs_t *vfsp, int fflag, kthread_t *td) 685{ 686 zfsvfs_t *zfsvfs = vfsp->vfs_data; 687 cred_t *cr = td->td_ucred; 688 int ret; 689 690 if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0) 691 return (ret); 692 693 (void) dnlc_purge_vfsp(vfsp, 0); 694 695 /* 696 * Unmount any snapshots mounted under .zfs before unmounting the 697 * dataset itself. 698 */ 699 if (zfsvfs->z_ctldir != NULL) { 700 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 701 return (ret); 702 ret = vflush(vfsp, 0, 0, td); 703 ASSERT(ret == EBUSY); 704 if (!(fflag & MS_FORCE)) { 705 if (zfsvfs->z_ctldir->v_count > 1) 706 return (EBUSY); 707 ASSERT(zfsvfs->z_ctldir->v_count == 1); 708 } 709 zfsctl_destroy(zfsvfs); 710 ASSERT(zfsvfs->z_ctldir == NULL); 711 } 712 713 /* 714 * Flush all the files. 715 */ 716 ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 717 if (ret != 0) { 718 if (!zfsvfs->z_issnap) { 719 zfsctl_create(zfsvfs); 720 ASSERT(zfsvfs->z_ctldir != NULL); 721 } 722 return (ret); 723 } 724 725 if (fflag & MS_FORCE) { 726 MNT_ILOCK(vfsp); 727 vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 728 MNT_IUNLOCK(vfsp); 729 zfsvfs->z_unmounted1 = B_TRUE; 730 731 /* 732 * Wait for all zfs threads to leave zfs. 733 * Grabbing a rwlock as reader in all vops and 734 * as writer here doesn't work because it too easy to get 735 * multiple reader enters as zfs can re-enter itself. 736 * This can lead to deadlock if there is an intervening 737 * rw_enter as writer. 738 * So a file system threads ref count (z_op_cnt) is used. 739 * A polling loop on z_op_cnt may seem inefficient, but 740 * - this saves all threads on exit from having to grab a 741 * mutex in order to cv_signal 742 * - only occurs on forced unmount in the rare case when 743 * there are outstanding threads within the file system. 744 */ 745 while (zfsvfs->z_op_cnt) { 746 delay(1); 747 } 748 } 749 750 zfs_objset_close(zfsvfs); 751 VFS_RELE(vfsp); 752 zfs_freevfs(vfsp); 753 754 return (0); 755} 756 757static int 758zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 759{ 760 zfsvfs_t *zfsvfs = vfsp->vfs_data; 761 znode_t *zp; 762 int err; 763 764 ZFS_ENTER(zfsvfs); 765 err = zfs_zget(zfsvfs, ino, &zp); 766 if (err == 0 && zp->z_unlinked) { 767 VN_RELE(ZTOV(zp)); 768 err = EINVAL; 769 } 770 if (err != 0) 771 *vpp = NULL; 772 else { 773 *vpp = ZTOV(zp); 774 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, curthread); 775 } 776 ZFS_EXIT(zfsvfs); 777 return (0); 778} 779 780static int 781zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 782{ 783 kthread_t *td = curthread; 784 zfsvfs_t *zfsvfs = vfsp->vfs_data; 785 znode_t *zp; 786 uint64_t object = 0; 787 uint64_t fid_gen = 0; 788 uint64_t gen_mask; 789 uint64_t zp_gen; 790 int i, err; 791 792 *vpp = NULL; 793 794 ZFS_ENTER(zfsvfs); 795 796 if (fidp->fid_len == LONG_FID_LEN) { 797 zfid_long_t *zlfid = (zfid_long_t *)fidp; 798 uint64_t objsetid = 0; 799 uint64_t setgen = 0; 800 801 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 802 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 803 804 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 805 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 806 807 ZFS_EXIT(zfsvfs); 808 809 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 810 if (err) 811 return (EINVAL); 812 ZFS_ENTER(zfsvfs); 813 } 814 815 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 816 zfid_short_t *zfid = (zfid_short_t *)fidp; 817 818 for (i = 0; i < sizeof (zfid->zf_object); i++) 819 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 820 821 for (i = 0; i < sizeof (zfid->zf_gen); i++) 822 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 823 } else { 824 ZFS_EXIT(zfsvfs); 825 return (EINVAL); 826 } 827 828 /* A zero fid_gen means we are in the .zfs control directories */ 829 if (fid_gen == 0 && 830 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 831 *vpp = zfsvfs->z_ctldir; 832 ASSERT(*vpp != NULL); 833 if (object == ZFSCTL_INO_SNAPDIR) { 834 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 835 0, NULL, NULL) == 0); 836 } else { 837 VN_HOLD(*vpp); 838 } 839 ZFS_EXIT(zfsvfs); 840 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 841 return (0); 842 } 843 844 gen_mask = -1ULL >> (64 - 8 * i); 845 846 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 847 if (err = zfs_zget(zfsvfs, object, &zp)) { 848 ZFS_EXIT(zfsvfs); 849 return (err); 850 } 851 zp_gen = zp->z_phys->zp_gen & gen_mask; 852 if (zp_gen == 0) 853 zp_gen = 1; 854 if (zp->z_unlinked || zp_gen != fid_gen) { 855 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 856 VN_RELE(ZTOV(zp)); 857 ZFS_EXIT(zfsvfs); 858 return (EINVAL); 859 } 860 861 *vpp = ZTOV(zp); 862 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 863 vnode_create_vobject(*vpp, zp->z_phys->zp_size, td); 864 ZFS_EXIT(zfsvfs); 865 return (0); 866} 867 868static void 869zfs_objset_close(zfsvfs_t *zfsvfs) 870{ 871 znode_t *zp, *nextzp; 872 objset_t *os = zfsvfs->z_os; 873 874 /* 875 * For forced unmount, at this point all vops except zfs_inactive 876 * are erroring EIO. We need to now suspend zfs_inactive threads 877 * while we are freeing dbufs before switching zfs_inactive 878 * to use behaviour without a objset. 879 */ 880 rw_enter(&zfsvfs->z_um_lock, RW_WRITER); 881 882 /* 883 * Release all holds on dbufs 884 * Note, although we have stopped all other vop threads and 885 * zfs_inactive(), the dmu can callback via znode_pageout_func() 886 * which can zfs_znode_free() the znode. 887 * So we lock z_all_znodes; search the list for a held 888 * dbuf; drop the lock (we know zp can't disappear if we hold 889 * a dbuf lock; then regrab the lock and restart. 890 */ 891 mutex_enter(&zfsvfs->z_znodes_lock); 892 for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 893 nextzp = list_next(&zfsvfs->z_all_znodes, zp); 894 if (zp->z_dbuf_held) { 895 /* dbufs should only be held when force unmounting */ 896 zp->z_dbuf_held = 0; 897 mutex_exit(&zfsvfs->z_znodes_lock); 898 dmu_buf_rele(zp->z_dbuf, NULL); 899 /* Start again */ 900 mutex_enter(&zfsvfs->z_znodes_lock); 901 nextzp = list_head(&zfsvfs->z_all_znodes); 902 } 903 } 904 mutex_exit(&zfsvfs->z_znodes_lock); 905 906 /* 907 * Unregister properties. 908 */ 909 if (!dmu_objset_is_snapshot(os)) 910 zfs_unregister_callbacks(zfsvfs); 911 912 /* 913 * Switch zfs_inactive to behaviour without an objset. 914 * It just tosses cached pages and frees the znode & vnode. 915 * Then re-enable zfs_inactive threads in that new behaviour. 916 */ 917 zfsvfs->z_unmounted2 = B_TRUE; 918 rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ 919 920 /* 921 * Close the zil. Can't close the zil while zfs_inactive 922 * threads are blocked as zil_close can call zfs_inactive. 923 */ 924 if (zfsvfs->z_log) { 925 zil_close(zfsvfs->z_log); 926 zfsvfs->z_log = NULL; 927 } 928 929 /* 930 * Evict all dbufs so that cached znodes will be freed 931 */ 932 if (dmu_objset_evict_dbufs(os, 1)) { 933 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 934 (void) dmu_objset_evict_dbufs(os, 0); 935 } 936 937 /* 938 * Finally close the objset 939 */ 940 dmu_objset_close(os); 941} 942 943static void 944zfs_freevfs(vfs_t *vfsp) 945{ 946 zfsvfs_t *zfsvfs = vfsp->vfs_data; 947 int i; 948 949 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 950 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 951 rw_destroy(&zfsvfs->z_um_lock); 952 mutex_destroy(&zfsvfs->z_znodes_lock); 953 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 954 955 atomic_add_32(&zfs_active_fs_count, -1); 956} 957 958void 959zfs_init(void) 960{ 961 962 printf("ZFS filesystem version " ZFS_VERSION_STRING "\n"); 963 964 /* 965 * Initialize .zfs directory structures 966 */ 967 zfsctl_init(); 968 969 /* 970 * Initialize znode cache, vnode ops, etc... 971 */ 972 zfs_znode_init(); 973} 974 975void 976zfs_fini(void) 977{ 978 zfsctl_fini(); 979 zfs_znode_fini(); 980} 981 982int 983zfs_busy(void) 984{ 985 return (zfs_active_fs_count != 0); 986} 987