zfs_vfsops.c revision 211855
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#include <sys/types.h> 27#include <sys/param.h> 28#include <sys/systm.h> 29#include <sys/kernel.h> 30#include <sys/sysmacros.h> 31#include <sys/kmem.h> 32#include <sys/acl.h> 33#include <sys/vnode.h> 34#include <sys/vfs.h> 35#include <sys/mntent.h> 36#include <sys/mount.h> 37#include <sys/cmn_err.h> 38#include <sys/zfs_znode.h> 39#include <sys/zfs_dir.h> 40#include <sys/zil.h> 41#include <sys/fs/zfs.h> 42#include <sys/dmu.h> 43#include <sys/dsl_prop.h> 44#include <sys/dsl_dataset.h> 45#include <sys/dsl_deleg.h> 46#include <sys/spa.h> 47#include <sys/zap.h> 48#include <sys/varargs.h> 49#include <sys/policy.h> 50#include <sys/atomic.h> 51#include <sys/zfs_ioctl.h> 52#include <sys/zfs_ctldir.h> 53#include <sys/zfs_fuid.h> 54#include <sys/sunddi.h> 55#include <sys/dnlc.h> 56#include <sys/dmu_objset.h> 57#include <sys/spa_boot.h> 58 59struct mtx zfs_debug_mtx; 60MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 61 62SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 63 64int zfs_super_owner = 0; 65SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 66 "File system owner can perform privileged operation on his file systems"); 67 68int zfs_debug_level = 0; 69TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 70SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 71 "Debug level"); 72 73SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 74static int zfs_version_acl = ZFS_ACL_VERSION; 75SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 76 "ZFS_ACL_VERSION"); 77static int zfs_version_dmu_backup_header = DMU_BACKUP_HEADER_VERSION; 78SYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_header, CTLFLAG_RD, 79 &zfs_version_dmu_backup_header, 0, "DMU_BACKUP_HEADER_VERSION"); 80static int zfs_version_dmu_backup_stream = DMU_BACKUP_STREAM_VERSION; 81SYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_stream, CTLFLAG_RD, 82 &zfs_version_dmu_backup_stream, 0, "DMU_BACKUP_STREAM_VERSION"); 83static int zfs_version_spa = SPA_VERSION; 84SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 85 "SPA_VERSION"); 86static int zfs_version_zpl = ZPL_VERSION; 87SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 88 "ZPL_VERSION"); 89 90static int zfs_mount(vfs_t *vfsp); 91static int zfs_umount(vfs_t *vfsp, int fflag); 92static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 93static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 94static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 95static int zfs_sync(vfs_t *vfsp, int waitfor); 96static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 97 struct ucred **credanonp, int *numsecflavors, int **secflavors); 98static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 99static void zfs_objset_close(zfsvfs_t *zfsvfs); 100static void zfs_freevfs(vfs_t *vfsp); 101 102static struct vfsops zfs_vfsops = { 103 .vfs_mount = zfs_mount, 104 .vfs_unmount = zfs_umount, 105 .vfs_root = zfs_root, 106 .vfs_statfs = zfs_statfs, 107 .vfs_vget = zfs_vget, 108 .vfs_sync = zfs_sync, 109 .vfs_checkexp = zfs_checkexp, 110 .vfs_fhtovp = zfs_fhtovp, 111}; 112 113VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 114 115/* 116 * We need to keep a count of active fs's. 117 * This is necessary to prevent our module 118 * from being unloaded after a umount -f 119 */ 120static uint32_t zfs_active_fs_count = 0; 121 122/*ARGSUSED*/ 123static int 124zfs_sync(vfs_t *vfsp, int waitfor) 125{ 126 127 /* 128 * Data integrity is job one. We don't want a compromised kernel 129 * writing to the storage pool, so we never sync during panic. 130 */ 131 if (panicstr) 132 return (0); 133 134 if (vfsp != NULL) { 135 /* 136 * Sync a specific filesystem. 137 */ 138 zfsvfs_t *zfsvfs = vfsp->vfs_data; 139 dsl_pool_t *dp; 140 int error; 141 142 error = vfs_stdsync(vfsp, waitfor); 143 if (error != 0) 144 return (error); 145 146 ZFS_ENTER(zfsvfs); 147 dp = dmu_objset_pool(zfsvfs->z_os); 148 149 /* 150 * If the system is shutting down, then skip any 151 * filesystems which may exist on a suspended pool. 152 */ 153 if (sys_shutdown && spa_suspended(dp->dp_spa)) { 154 ZFS_EXIT(zfsvfs); 155 return (0); 156 } 157 158 if (zfsvfs->z_log != NULL) 159 zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 160 else 161 txg_wait_synced(dp, 0); 162 ZFS_EXIT(zfsvfs); 163 } else { 164 /* 165 * Sync all ZFS filesystems. This is what happens when you 166 * run sync(1M). Unlike other filesystems, ZFS honors the 167 * request by waiting for all pools to commit all dirty data. 168 */ 169 spa_sync_allpools(); 170 } 171 172 return (0); 173} 174 175static void 176atime_changed_cb(void *arg, uint64_t newval) 177{ 178 zfsvfs_t *zfsvfs = arg; 179 180 if (newval == TRUE) { 181 zfsvfs->z_atime = TRUE; 182 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 183 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 184 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 185 } else { 186 zfsvfs->z_atime = FALSE; 187 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 188 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 189 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 190 } 191} 192 193static void 194xattr_changed_cb(void *arg, uint64_t newval) 195{ 196 zfsvfs_t *zfsvfs = arg; 197 198 if (newval == TRUE) { 199 /* XXX locking on vfs_flag? */ 200#ifdef TODO 201 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 202#endif 203 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 204 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 205 } else { 206 /* XXX locking on vfs_flag? */ 207#ifdef TODO 208 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 209#endif 210 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 211 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 212 } 213} 214 215static void 216blksz_changed_cb(void *arg, uint64_t newval) 217{ 218 zfsvfs_t *zfsvfs = arg; 219 220 if (newval < SPA_MINBLOCKSIZE || 221 newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 222 newval = SPA_MAXBLOCKSIZE; 223 224 zfsvfs->z_max_blksz = newval; 225 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 226} 227 228static void 229readonly_changed_cb(void *arg, uint64_t newval) 230{ 231 zfsvfs_t *zfsvfs = arg; 232 233 if (newval) { 234 /* XXX locking on vfs_flag? */ 235 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 236 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 237 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 238 } else { 239 /* XXX locking on vfs_flag? */ 240 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 241 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 242 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 243 } 244} 245 246static void 247setuid_changed_cb(void *arg, uint64_t newval) 248{ 249 zfsvfs_t *zfsvfs = arg; 250 251 if (newval == FALSE) { 252 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 253 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 254 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 255 } else { 256 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 257 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 258 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 259 } 260} 261 262static void 263exec_changed_cb(void *arg, uint64_t newval) 264{ 265 zfsvfs_t *zfsvfs = arg; 266 267 if (newval == FALSE) { 268 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 269 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 270 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 271 } else { 272 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 273 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 274 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 275 } 276} 277 278/* 279 * The nbmand mount option can be changed at mount time. 280 * We can't allow it to be toggled on live file systems or incorrect 281 * behavior may be seen from cifs clients 282 * 283 * This property isn't registered via dsl_prop_register(), but this callback 284 * will be called when a file system is first mounted 285 */ 286static void 287nbmand_changed_cb(void *arg, uint64_t newval) 288{ 289 zfsvfs_t *zfsvfs = arg; 290 if (newval == FALSE) { 291 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 292 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 293 } else { 294 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 295 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 296 } 297} 298 299static void 300snapdir_changed_cb(void *arg, uint64_t newval) 301{ 302 zfsvfs_t *zfsvfs = arg; 303 304 zfsvfs->z_show_ctldir = newval; 305} 306 307static void 308vscan_changed_cb(void *arg, uint64_t newval) 309{ 310 zfsvfs_t *zfsvfs = arg; 311 312 zfsvfs->z_vscan = newval; 313} 314 315static void 316acl_mode_changed_cb(void *arg, uint64_t newval) 317{ 318 zfsvfs_t *zfsvfs = arg; 319 320 zfsvfs->z_acl_mode = newval; 321} 322 323static void 324acl_inherit_changed_cb(void *arg, uint64_t newval) 325{ 326 zfsvfs_t *zfsvfs = arg; 327 328 zfsvfs->z_acl_inherit = newval; 329} 330 331static int 332zfs_register_callbacks(vfs_t *vfsp) 333{ 334 struct dsl_dataset *ds = NULL; 335 objset_t *os = NULL; 336 zfsvfs_t *zfsvfs = NULL; 337 uint64_t nbmand; 338 int readonly, do_readonly = FALSE; 339 int setuid, do_setuid = FALSE; 340 int exec, do_exec = FALSE; 341 int xattr, do_xattr = FALSE; 342 int atime, do_atime = FALSE; 343 int error = 0; 344 345 ASSERT(vfsp); 346 zfsvfs = vfsp->vfs_data; 347 ASSERT(zfsvfs); 348 os = zfsvfs->z_os; 349 350 /* 351 * This function can be called for a snapshot when we update snapshot's 352 * mount point, which isn't really supported. 353 */ 354 if (dmu_objset_is_snapshot(os)) 355 return (EOPNOTSUPP); 356 357 /* 358 * The act of registering our callbacks will destroy any mount 359 * options we may have. In order to enable temporary overrides 360 * of mount options, we stash away the current values and 361 * restore them after we register the callbacks. 362 */ 363 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 364 readonly = B_TRUE; 365 do_readonly = B_TRUE; 366 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 367 readonly = B_FALSE; 368 do_readonly = B_TRUE; 369 } 370 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 371 setuid = B_FALSE; 372 do_setuid = B_TRUE; 373 } else { 374 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 375 setuid = B_FALSE; 376 do_setuid = B_TRUE; 377 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 378 setuid = B_TRUE; 379 do_setuid = B_TRUE; 380 } 381 } 382 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 383 exec = B_FALSE; 384 do_exec = B_TRUE; 385 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 386 exec = B_TRUE; 387 do_exec = B_TRUE; 388 } 389 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 390 xattr = B_FALSE; 391 do_xattr = B_TRUE; 392 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 393 xattr = B_TRUE; 394 do_xattr = B_TRUE; 395 } 396 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 397 atime = B_FALSE; 398 do_atime = B_TRUE; 399 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 400 atime = B_TRUE; 401 do_atime = B_TRUE; 402 } 403 404 /* 405 * nbmand is a special property. It can only be changed at 406 * mount time. 407 * 408 * This is weird, but it is documented to only be changeable 409 * at mount time. 410 */ 411 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 412 nbmand = B_FALSE; 413 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 414 nbmand = B_TRUE; 415 } else { 416 char osname[MAXNAMELEN]; 417 418 dmu_objset_name(os, osname); 419 if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 420 NULL)) { 421 return (error); 422 } 423 } 424 425 /* 426 * Register property callbacks. 427 * 428 * It would probably be fine to just check for i/o error from 429 * the first prop_register(), but I guess I like to go 430 * overboard... 431 */ 432 ds = dmu_objset_ds(os); 433 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 434 error = error ? error : dsl_prop_register(ds, 435 "xattr", xattr_changed_cb, zfsvfs); 436 error = error ? error : dsl_prop_register(ds, 437 "recordsize", blksz_changed_cb, zfsvfs); 438 error = error ? error : dsl_prop_register(ds, 439 "readonly", readonly_changed_cb, zfsvfs); 440 error = error ? error : dsl_prop_register(ds, 441 "setuid", setuid_changed_cb, zfsvfs); 442 error = error ? error : dsl_prop_register(ds, 443 "exec", exec_changed_cb, zfsvfs); 444 error = error ? error : dsl_prop_register(ds, 445 "snapdir", snapdir_changed_cb, zfsvfs); 446 error = error ? error : dsl_prop_register(ds, 447 "aclmode", acl_mode_changed_cb, zfsvfs); 448 error = error ? error : dsl_prop_register(ds, 449 "aclinherit", acl_inherit_changed_cb, zfsvfs); 450 error = error ? error : dsl_prop_register(ds, 451 "vscan", vscan_changed_cb, zfsvfs); 452 if (error) 453 goto unregister; 454 455 /* 456 * Invoke our callbacks to restore temporary mount options. 457 */ 458 if (do_readonly) 459 readonly_changed_cb(zfsvfs, readonly); 460 if (do_setuid) 461 setuid_changed_cb(zfsvfs, setuid); 462 if (do_exec) 463 exec_changed_cb(zfsvfs, exec); 464 if (do_xattr) 465 xattr_changed_cb(zfsvfs, xattr); 466 if (do_atime) 467 atime_changed_cb(zfsvfs, atime); 468 469 nbmand_changed_cb(zfsvfs, nbmand); 470 471 return (0); 472 473unregister: 474 /* 475 * We may attempt to unregister some callbacks that are not 476 * registered, but this is OK; it will simply return ENOMSG, 477 * which we will ignore. 478 */ 479 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 480 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 481 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 482 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 483 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 484 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 485 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 486 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 487 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 488 zfsvfs); 489 (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 490 return (error); 491 492} 493 494static void 495uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid, 496 int64_t delta, dmu_tx_t *tx) 497{ 498 uint64_t used = 0; 499 char buf[32]; 500 int err; 501 uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 502 503 if (delta == 0) 504 return; 505 506 (void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid); 507 err = zap_lookup(os, obj, buf, 8, 1, &used); 508 ASSERT(err == 0 || err == ENOENT); 509 /* no underflow/overflow */ 510 ASSERT(delta > 0 || used >= -delta); 511 ASSERT(delta < 0 || used + delta > used); 512 used += delta; 513 if (used == 0) 514 err = zap_remove(os, obj, buf, tx); 515 else 516 err = zap_update(os, obj, buf, 8, 1, &used, tx); 517 ASSERT(err == 0); 518} 519 520static void 521zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype, 522 void *oldbonus, void *newbonus, 523 uint64_t oldused, uint64_t newused, dmu_tx_t *tx) 524{ 525 znode_phys_t *oldznp = oldbonus; 526 znode_phys_t *newznp = newbonus; 527 528 if (bonustype != DMU_OT_ZNODE) 529 return; 530 531 /* We charge 512 for the dnode (if it's allocated). */ 532 if (oldznp->zp_gen != 0) 533 oldused += DNODE_SIZE; 534 if (newznp->zp_gen != 0) 535 newused += DNODE_SIZE; 536 537 if (oldznp->zp_uid == newznp->zp_uid) { 538 uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx); 539 } else { 540 uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx); 541 uidacct(os, B_FALSE, newznp->zp_uid, newused, tx); 542 } 543 544 if (oldznp->zp_gid == newznp->zp_gid) { 545 uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx); 546 } else { 547 uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx); 548 uidacct(os, B_TRUE, newznp->zp_gid, newused, tx); 549 } 550} 551 552static void 553fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, 554 char *domainbuf, int buflen, uid_t *ridp) 555{ 556 uint64_t fuid; 557 const char *domain; 558 559 fuid = strtonum(fuidstr, NULL); 560 561 domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid)); 562 if (domain) 563 (void) strlcpy(domainbuf, domain, buflen); 564 else 565 domainbuf[0] = '\0'; 566 *ridp = FUID_RID(fuid); 567} 568 569static uint64_t 570zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type) 571{ 572 switch (type) { 573 case ZFS_PROP_USERUSED: 574 return (DMU_USERUSED_OBJECT); 575 case ZFS_PROP_GROUPUSED: 576 return (DMU_GROUPUSED_OBJECT); 577 case ZFS_PROP_USERQUOTA: 578 return (zfsvfs->z_userquota_obj); 579 case ZFS_PROP_GROUPQUOTA: 580 return (zfsvfs->z_groupquota_obj); 581 } 582 return (0); 583} 584 585int 586zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 587 uint64_t *cookiep, void *vbuf, uint64_t *bufsizep) 588{ 589 int error; 590 zap_cursor_t zc; 591 zap_attribute_t za; 592 zfs_useracct_t *buf = vbuf; 593 uint64_t obj; 594 595 if (!dmu_objset_userspace_present(zfsvfs->z_os)) 596 return (ENOTSUP); 597 598 obj = zfs_userquota_prop_to_obj(zfsvfs, type); 599 if (obj == 0) { 600 *bufsizep = 0; 601 return (0); 602 } 603 604 for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep); 605 (error = zap_cursor_retrieve(&zc, &za)) == 0; 606 zap_cursor_advance(&zc)) { 607 if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) > 608 *bufsizep) 609 break; 610 611 fuidstr_to_sid(zfsvfs, za.za_name, 612 buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid); 613 614 buf->zu_space = za.za_first_integer; 615 buf++; 616 } 617 if (error == ENOENT) 618 error = 0; 619 620 ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep); 621 *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf; 622 *cookiep = zap_cursor_serialize(&zc); 623 zap_cursor_fini(&zc); 624 return (error); 625} 626 627/* 628 * buf must be big enough (eg, 32 bytes) 629 */ 630static int 631id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, 632 char *buf, boolean_t addok) 633{ 634 uint64_t fuid; 635 int domainid = 0; 636 637 if (domain && domain[0]) { 638 domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok); 639 if (domainid == -1) 640 return (ENOENT); 641 } 642 fuid = FUID_ENCODE(domainid, rid); 643 (void) sprintf(buf, "%llx", (longlong_t)fuid); 644 return (0); 645} 646 647int 648zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 649 const char *domain, uint64_t rid, uint64_t *valp) 650{ 651 char buf[32]; 652 int err; 653 uint64_t obj; 654 655 *valp = 0; 656 657 if (!dmu_objset_userspace_present(zfsvfs->z_os)) 658 return (ENOTSUP); 659 660 obj = zfs_userquota_prop_to_obj(zfsvfs, type); 661 if (obj == 0) 662 return (0); 663 664 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE); 665 if (err) 666 return (err); 667 668 err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp); 669 if (err == ENOENT) 670 err = 0; 671 return (err); 672} 673 674int 675zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 676 const char *domain, uint64_t rid, uint64_t quota) 677{ 678 char buf[32]; 679 int err; 680 dmu_tx_t *tx; 681 uint64_t *objp; 682 boolean_t fuid_dirtied; 683 684 if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) 685 return (EINVAL); 686 687 if (zfsvfs->z_version < ZPL_VERSION_USERSPACE) 688 return (ENOTSUP); 689 690 objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj : 691 &zfsvfs->z_groupquota_obj; 692 693 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE); 694 if (err) 695 return (err); 696 fuid_dirtied = zfsvfs->z_fuid_dirty; 697 698 tx = dmu_tx_create(zfsvfs->z_os); 699 dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL); 700 if (*objp == 0) { 701 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 702 zfs_userquota_prop_prefixes[type]); 703 } 704 if (fuid_dirtied) 705 zfs_fuid_txhold(zfsvfs, tx); 706 err = dmu_tx_assign(tx, TXG_WAIT); 707 if (err) { 708 dmu_tx_abort(tx); 709 return (err); 710 } 711 712 mutex_enter(&zfsvfs->z_lock); 713 if (*objp == 0) { 714 *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA, 715 DMU_OT_NONE, 0, tx); 716 VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 717 zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); 718 } 719 mutex_exit(&zfsvfs->z_lock); 720 721 if (quota == 0) { 722 err = zap_remove(zfsvfs->z_os, *objp, buf, tx); 723 if (err == ENOENT) 724 err = 0; 725 } else { 726 err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx); 727 } 728 ASSERT(err == 0); 729 if (fuid_dirtied) 730 zfs_fuid_sync(zfsvfs, tx); 731 dmu_tx_commit(tx); 732 return (err); 733} 734 735boolean_t 736zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) 737{ 738 char buf[32]; 739 uint64_t used, quota, usedobj, quotaobj; 740 int err; 741 742 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 743 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 744 745 if (quotaobj == 0 || zfsvfs->z_replay) 746 return (B_FALSE); 747 748 (void) sprintf(buf, "%llx", (longlong_t)fuid); 749 err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a); 750 if (err != 0) 751 return (B_FALSE); 752 753 err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used); 754 if (err != 0) 755 return (B_FALSE); 756 return (used >= quota); 757} 758 759int 760zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp) 761{ 762 objset_t *os; 763 zfsvfs_t *zfsvfs; 764 uint64_t zval; 765 int i, error; 766 767 if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL)) 768 return (error); 769 if (zval) 770 mode |= DS_MODE_READONLY; 771 772 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os); 773 if (error == EROFS) { 774 mode |= DS_MODE_READONLY; 775 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os); 776 } 777 if (error) 778 return (error); 779 780 /* 781 * Initialize the zfs-specific filesystem structure. 782 * Should probably make this a kmem cache, shuffle fields, 783 * and just bzero up to z_hold_mtx[]. 784 */ 785 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 786 zfsvfs->z_vfs = NULL; 787 zfsvfs->z_parent = zfsvfs; 788 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 789 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 790 zfsvfs->z_os = os; 791 792 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 793 if (error) { 794 goto out; 795 } else if (zfsvfs->z_version > ZPL_VERSION) { 796 (void) printf("Mismatched versions: File system " 797 "is version %llu on-disk format, which is " 798 "incompatible with this software version %lld!", 799 (u_longlong_t)zfsvfs->z_version, ZPL_VERSION); 800 error = ENOTSUP; 801 goto out; 802 } 803 804 if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) 805 goto out; 806 zfsvfs->z_norm = (int)zval; 807 808 if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0) 809 goto out; 810 zfsvfs->z_utf8 = (zval != 0); 811 812 if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0) 813 goto out; 814 zfsvfs->z_case = (uint_t)zval; 815 816 /* 817 * Fold case on file systems that are always or sometimes case 818 * insensitive. 819 */ 820 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 821 zfsvfs->z_case == ZFS_CASE_MIXED) 822 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 823 824 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 825 826 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 827 &zfsvfs->z_root); 828 if (error) 829 goto out; 830 ASSERT(zfsvfs->z_root != 0); 831 832 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 833 &zfsvfs->z_unlinkedobj); 834 if (error) 835 goto out; 836 837 error = zap_lookup(os, MASTER_NODE_OBJ, 838 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 839 8, 1, &zfsvfs->z_userquota_obj); 840 if (error && error != ENOENT) 841 goto out; 842 843 error = zap_lookup(os, MASTER_NODE_OBJ, 844 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 845 8, 1, &zfsvfs->z_groupquota_obj); 846 if (error && error != ENOENT) 847 goto out; 848 849 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 850 &zfsvfs->z_fuid_obj); 851 if (error && error != ENOENT) 852 goto out; 853 854 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 855 &zfsvfs->z_shares_dir); 856 if (error && error != ENOENT) 857 goto out; 858 859 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 860 mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL); 861 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 862 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 863 offsetof(znode_t, z_link_node)); 864 rrw_init(&zfsvfs->z_teardown_lock); 865 rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 866 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 867 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 868 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 869 870 *zvp = zfsvfs; 871 return (0); 872 873out: 874 dmu_objset_close(os); 875 *zvp = NULL; 876 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 877 return (error); 878} 879 880static int 881zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 882{ 883 int error; 884 885 error = zfs_register_callbacks(zfsvfs->z_vfs); 886 if (error) 887 return (error); 888 889 /* 890 * Set the objset user_ptr to track its zfsvfs. 891 */ 892 mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 893 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 894 mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 895 896 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 897 if (zil_disable) { 898 zil_destroy(zfsvfs->z_log, B_FALSE); 899 zfsvfs->z_log = NULL; 900 } 901 902 /* 903 * If we are not mounting (ie: online recv), then we don't 904 * have to worry about replaying the log as we blocked all 905 * operations out since we closed the ZIL. 906 */ 907 if (mounting) { 908 boolean_t readonly; 909 910 /* 911 * During replay we remove the read only flag to 912 * allow replays to succeed. 913 */ 914 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 915 if (readonly != 0) 916 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 917 else 918 zfs_unlinked_drain(zfsvfs); 919 920 if (zfsvfs->z_log) { 921 /* 922 * Parse and replay the intent log. 923 * 924 * Because of ziltest, this must be done after 925 * zfs_unlinked_drain(). (Further note: ziltest 926 * doesn't use readonly mounts, where 927 * zfs_unlinked_drain() isn't called.) This is because 928 * ziltest causes spa_sync() to think it's committed, 929 * but actually it is not, so the intent log contains 930 * many txg's worth of changes. 931 * 932 * In particular, if object N is in the unlinked set in 933 * the last txg to actually sync, then it could be 934 * actually freed in a later txg and then reallocated 935 * in a yet later txg. This would write a "create 936 * object N" record to the intent log. Normally, this 937 * would be fine because the spa_sync() would have 938 * written out the fact that object N is free, before 939 * we could write the "create object N" intent log 940 * record. 941 * 942 * But when we are in ziltest mode, we advance the "open 943 * txg" without actually spa_sync()-ing the changes to 944 * disk. So we would see that object N is still 945 * allocated and in the unlinked set, and there is an 946 * intent log record saying to allocate it. 947 */ 948 zfsvfs->z_replay = B_TRUE; 949 zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector); 950 zfsvfs->z_replay = B_FALSE; 951 } 952 zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 953 } 954 955 return (0); 956} 957 958extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */ 959 960void 961zfsvfs_free(zfsvfs_t *zfsvfs) 962{ 963 int i; 964 965 /* 966 * This is a barrier to prevent the filesystem from going away in 967 * zfs_znode_move() until we can safely ensure that the filesystem is 968 * not unmounted. We consider the filesystem valid before the barrier 969 * and invalid after the barrier. 970 */ 971 rw_enter(&zfsvfs_lock, RW_READER); 972 rw_exit(&zfsvfs_lock); 973 974 zfs_fuid_destroy(zfsvfs); 975 976 mutex_destroy(&zfsvfs->z_znodes_lock); 977 mutex_destroy(&zfsvfs->z_online_recv_lock); 978 mutex_destroy(&zfsvfs->z_lock); 979 list_destroy(&zfsvfs->z_all_znodes); 980 rrw_destroy(&zfsvfs->z_teardown_lock); 981 rw_destroy(&zfsvfs->z_teardown_inactive_lock); 982 rw_destroy(&zfsvfs->z_fuid_lock); 983 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 984 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 985 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 986} 987 988static void 989zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 990{ 991 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 992 if (zfsvfs->z_use_fuids && zfsvfs->z_vfs) { 993 vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 994 vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 995 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 996 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 997 } 998} 999 1000static int 1001zfs_domount(vfs_t *vfsp, char *osname) 1002{ 1003 uint64_t recordsize, fsid_guid; 1004 int error = 0; 1005 zfsvfs_t *zfsvfs; 1006 vnode_t *vp; 1007 1008 ASSERT(vfsp); 1009 ASSERT(osname); 1010 1011 error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs); 1012 if (error) 1013 return (error); 1014 zfsvfs->z_vfs = vfsp; 1015 1016 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 1017 NULL)) 1018 goto out; 1019 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1020 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1021 1022 vfsp->vfs_data = zfsvfs; 1023 vfsp->mnt_flag |= MNT_LOCAL; 1024 vfsp->mnt_kern_flag |= MNTK_MPSAFE; 1025 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1026 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1027 1028 1029 /* 1030 * The fsid is 64 bits, composed of an 8-bit fs type, which 1031 * separates our fsid from any other filesystem types, and a 1032 * 56-bit objset unique ID. The objset unique ID is unique to 1033 * all objsets open on this system, provided by unique_create(). 1034 * The 8-bit fs type must be put in the low bits of fsid[1] 1035 * because that's where other Solaris filesystems put it. 1036 */ 1037 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1038 ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); 1039 vfsp->vfs_fsid.val[0] = fsid_guid; 1040 vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | 1041 vfsp->mnt_vfc->vfc_typenum & 0xFF; 1042 1043 /* 1044 * Set features for file system. 1045 */ 1046 zfs_set_fuid_feature(zfsvfs); 1047 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 1048 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1049 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1050 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 1051 } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 1052 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1053 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1054 } 1055 1056 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1057 uint64_t pval; 1058 1059 atime_changed_cb(zfsvfs, B_FALSE); 1060 readonly_changed_cb(zfsvfs, B_TRUE); 1061 if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 1062 goto out; 1063 xattr_changed_cb(zfsvfs, pval); 1064 zfsvfs->z_issnap = B_TRUE; 1065 1066 mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 1067 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1068 mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 1069 } else { 1070 error = zfsvfs_setup(zfsvfs, B_TRUE); 1071 } 1072 1073 vfs_mountedfrom(vfsp, osname); 1074 /* Grab extra reference. */ 1075 VERIFY(VFS_ROOT(vfsp, LK_EXCLUSIVE, &vp) == 0); 1076 VOP_UNLOCK(vp, 0); 1077 1078 if (!zfsvfs->z_issnap) 1079 zfsctl_create(zfsvfs); 1080out: 1081 if (error) { 1082 dmu_objset_close(zfsvfs->z_os); 1083 zfsvfs_free(zfsvfs); 1084 } else { 1085 atomic_add_32(&zfs_active_fs_count, 1); 1086 } 1087 1088 return (error); 1089} 1090 1091void 1092zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1093{ 1094 objset_t *os = zfsvfs->z_os; 1095 struct dsl_dataset *ds; 1096 1097 /* 1098 * Unregister properties. 1099 */ 1100 if (!dmu_objset_is_snapshot(os)) { 1101 ds = dmu_objset_ds(os); 1102 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 1103 zfsvfs) == 0); 1104 1105 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 1106 zfsvfs) == 0); 1107 1108 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 1109 zfsvfs) == 0); 1110 1111 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 1112 zfsvfs) == 0); 1113 1114 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 1115 zfsvfs) == 0); 1116 1117 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 1118 zfsvfs) == 0); 1119 1120 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 1121 zfsvfs) == 0); 1122 1123 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 1124 zfsvfs) == 0); 1125 1126 VERIFY(dsl_prop_unregister(ds, "aclinherit", 1127 acl_inherit_changed_cb, zfsvfs) == 0); 1128 1129 VERIFY(dsl_prop_unregister(ds, "vscan", 1130 vscan_changed_cb, zfsvfs) == 0); 1131 } 1132} 1133 1134/*ARGSUSED*/ 1135static int 1136zfs_mount(vfs_t *vfsp) 1137{ 1138 kthread_t *td = curthread; 1139 vnode_t *mvp = vfsp->mnt_vnodecovered; 1140 cred_t *cr = td->td_ucred; 1141 char *osname; 1142 int error = 0; 1143 int canwrite; 1144 1145 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1146 return (EINVAL); 1147 1148 /* 1149 * If full-owner-access is enabled and delegated administration is 1150 * turned on, we must set nosuid. 1151 */ 1152 if (zfs_super_owner && 1153 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1154 secpolicy_fs_mount_clearopts(cr, vfsp); 1155 } 1156 1157 /* 1158 * Check for mount privilege? 1159 * 1160 * If we don't have privilege then see if 1161 * we have local permission to allow it 1162 */ 1163 error = secpolicy_fs_mount(cr, mvp, vfsp); 1164 if (error) { 1165 error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 1166 if (error != 0) 1167 goto out; 1168 1169 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1170 vattr_t vattr; 1171 1172 /* 1173 * Make sure user is the owner of the mount point 1174 * or has sufficient privileges. 1175 */ 1176 1177 vattr.va_mask = AT_UID; 1178 1179 vn_lock(mvp, LK_SHARED | LK_RETRY); 1180 if (error = VOP_GETATTR(mvp, &vattr, cr)) { 1181 VOP_UNLOCK(mvp, 0); 1182 goto out; 1183 } 1184 1185 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1186 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1187 VOP_UNLOCK(mvp, 0); 1188 goto out; 1189 } 1190 VOP_UNLOCK(mvp, 0); 1191 } 1192 1193 secpolicy_fs_mount_clearopts(cr, vfsp); 1194 } 1195 1196 /* 1197 * Refuse to mount a filesystem if we are in a local zone and the 1198 * dataset is not visible. 1199 */ 1200 if (!INGLOBALZONE(curthread) && 1201 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1202 error = EPERM; 1203 goto out; 1204 } 1205 1206 /* 1207 * When doing a remount, we simply refresh our temporary properties 1208 * according to those options set in the current VFS options. 1209 */ 1210 if (vfsp->vfs_flag & MS_REMOUNT) { 1211 /* refresh mount options */ 1212 zfs_unregister_callbacks(vfsp->vfs_data); 1213 error = zfs_register_callbacks(vfsp); 1214 goto out; 1215 } 1216 1217 DROP_GIANT(); 1218 error = zfs_domount(vfsp, osname); 1219 PICKUP_GIANT(); 1220 1221 /* 1222 * Add an extra VFS_HOLD on our parent vfs so that it can't 1223 * disappear due to a forced unmount. 1224 */ 1225 if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) 1226 VFS_HOLD(mvp->v_vfsp); 1227 1228 /* 1229 * Add an extra VFS_HOLD on our parent vfs so that it can't 1230 * disappear due to a forced unmount. 1231 */ 1232 if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) 1233 VFS_HOLD(mvp->v_vfsp); 1234 1235out: 1236 return (error); 1237} 1238 1239static int 1240zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1241{ 1242 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1243 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1244 1245 statp->f_version = STATFS_VERSION; 1246 1247 ZFS_ENTER(zfsvfs); 1248 1249 dmu_objset_space(zfsvfs->z_os, 1250 &refdbytes, &availbytes, &usedobjs, &availobjs); 1251 1252 /* 1253 * The underlying storage pool actually uses multiple block sizes. 1254 * We report the fragsize as the smallest block size we support, 1255 * and we report our blocksize as the filesystem's maximum blocksize. 1256 */ 1257 statp->f_bsize = SPA_MINBLOCKSIZE; 1258 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1259 1260 /* 1261 * The following report "total" blocks of various kinds in the 1262 * file system, but reported in terms of f_frsize - the 1263 * "fragment" size. 1264 */ 1265 1266 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1267 statp->f_bfree = availbytes / statp->f_bsize; 1268 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1269 1270 /* 1271 * statvfs() should really be called statufs(), because it assumes 1272 * static metadata. ZFS doesn't preallocate files, so the best 1273 * we can do is report the max that could possibly fit in f_files, 1274 * and that minus the number actually used in f_ffree. 1275 * For f_ffree, report the smaller of the number of object available 1276 * and the number of blocks (each object will take at least a block). 1277 */ 1278 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1279 statp->f_files = statp->f_ffree + usedobjs; 1280 1281 /* 1282 * We're a zfs filesystem. 1283 */ 1284 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 1285 1286 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1287 sizeof(statp->f_mntfromname)); 1288 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1289 sizeof(statp->f_mntonname)); 1290 1291 statp->f_namemax = ZFS_MAXNAMELEN; 1292 1293 ZFS_EXIT(zfsvfs); 1294 return (0); 1295} 1296 1297static int 1298zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1299{ 1300 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1301 znode_t *rootzp; 1302 int error; 1303 1304 ZFS_ENTER_NOERROR(zfsvfs); 1305 1306 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1307 1308 ZFS_EXIT(zfsvfs); 1309 1310 if (error == 0) { 1311 *vpp = ZTOV(rootzp); 1312 error = vn_lock(*vpp, flags); 1313 (*vpp)->v_vflag |= VV_ROOT; 1314 } 1315 1316 return (error); 1317} 1318 1319/* 1320 * Teardown the zfsvfs::z_os. 1321 * 1322 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 1323 * and 'z_teardown_inactive_lock' held. 1324 */ 1325static int 1326zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1327{ 1328 znode_t *zp; 1329 1330 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1331 1332 if (!unmounting) { 1333 /* 1334 * We purge the parent filesystem's vfsp as the parent 1335 * filesystem and all of its snapshots have their vnode's 1336 * v_vfsp set to the parent's filesystem's vfsp. Note, 1337 * 'z_parent' is self referential for non-snapshots. 1338 */ 1339 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1340#ifdef FREEBSD_NAMECACHE 1341 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1342#endif 1343 } 1344 1345 /* 1346 * Close the zil. NB: Can't close the zil while zfs_inactive 1347 * threads are blocked as zil_close can call zfs_inactive. 1348 */ 1349 if (zfsvfs->z_log) { 1350 zil_close(zfsvfs->z_log); 1351 zfsvfs->z_log = NULL; 1352 } 1353 1354 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 1355 1356 /* 1357 * If we are not unmounting (ie: online recv) and someone already 1358 * unmounted this file system while we were doing the switcheroo, 1359 * or a reopen of z_os failed then just bail out now. 1360 */ 1361 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1362 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1363 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1364 return (EIO); 1365 } 1366 1367 /* 1368 * At this point there are no vops active, and any new vops will 1369 * fail with EIO since we have z_teardown_lock for writer (only 1370 * relavent for forced unmount). 1371 * 1372 * Release all holds on dbufs. 1373 */ 1374 mutex_enter(&zfsvfs->z_znodes_lock); 1375 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1376 zp = list_next(&zfsvfs->z_all_znodes, zp)) 1377 if (zp->z_dbuf) { 1378 ASSERT(ZTOV(zp)->v_count >= 0); 1379 zfs_znode_dmu_fini(zp); 1380 } 1381 mutex_exit(&zfsvfs->z_znodes_lock); 1382 1383 /* 1384 * If we are unmounting, set the unmounted flag and let new vops 1385 * unblock. zfs_inactive will have the unmounted behavior, and all 1386 * other vops will fail with EIO. 1387 */ 1388 if (unmounting) { 1389 zfsvfs->z_unmounted = B_TRUE; 1390 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1391 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1392 1393#ifdef __FreeBSD__ 1394 /* 1395 * Some znodes might not be fully reclaimed, wait for them. 1396 */ 1397 mutex_enter(&zfsvfs->z_znodes_lock); 1398 while (list_head(&zfsvfs->z_all_znodes) != NULL) { 1399 msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0, 1400 "zteardown", 0); 1401 } 1402 mutex_exit(&zfsvfs->z_znodes_lock); 1403#endif 1404 } 1405 1406 /* 1407 * z_os will be NULL if there was an error in attempting to reopen 1408 * zfsvfs, so just return as the properties had already been 1409 * unregistered and cached data had been evicted before. 1410 */ 1411 if (zfsvfs->z_os == NULL) 1412 return (0); 1413 1414 /* 1415 * Unregister properties. 1416 */ 1417 zfs_unregister_callbacks(zfsvfs); 1418 1419 /* 1420 * Evict cached data 1421 */ 1422 if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { 1423 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1424 (void) dmu_objset_evict_dbufs(zfsvfs->z_os); 1425 } 1426 1427 return (0); 1428} 1429 1430/*ARGSUSED*/ 1431static int 1432zfs_umount(vfs_t *vfsp, int fflag) 1433{ 1434 kthread_t *td = curthread; 1435 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1436 objset_t *os; 1437 cred_t *cr = td->td_ucred; 1438 int ret; 1439 1440 ret = secpolicy_fs_unmount(cr, vfsp); 1441 if (ret) { 1442 ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1443 ZFS_DELEG_PERM_MOUNT, cr); 1444 if (ret) 1445 return (ret); 1446 } 1447 /* 1448 * We purge the parent filesystem's vfsp as the parent filesystem 1449 * and all of its snapshots have their vnode's v_vfsp set to the 1450 * parent's filesystem's vfsp. Note, 'z_parent' is self 1451 * referential for non-snapshots. 1452 */ 1453 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1454 1455 /* 1456 * Unmount any snapshots mounted under .zfs before unmounting the 1457 * dataset itself. 1458 */ 1459 if (zfsvfs->z_ctldir != NULL) { 1460 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1461 return (ret); 1462 ret = vflush(vfsp, 0, 0, td); 1463 ASSERT(ret == EBUSY); 1464 if (!(fflag & MS_FORCE)) { 1465 if (zfsvfs->z_ctldir->v_count > 1) 1466 return (EBUSY); 1467 ASSERT(zfsvfs->z_ctldir->v_count == 1); 1468 } 1469 zfsctl_destroy(zfsvfs); 1470 ASSERT(zfsvfs->z_ctldir == NULL); 1471 } 1472 1473 if (fflag & MS_FORCE) { 1474 /* 1475 * Mark file system as unmounted before calling 1476 * vflush(FORCECLOSE). This way we ensure no future vnops 1477 * will be called and risk operating on DOOMED vnodes. 1478 */ 1479 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1480 zfsvfs->z_unmounted = B_TRUE; 1481 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1482 } 1483 1484 /* 1485 * Flush all the files. 1486 */ 1487 ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1488 if (ret != 0) { 1489 if (!zfsvfs->z_issnap) { 1490 zfsctl_create(zfsvfs); 1491 ASSERT(zfsvfs->z_ctldir != NULL); 1492 } 1493 return (ret); 1494 } 1495 1496 if (!(fflag & MS_FORCE)) { 1497 /* 1498 * Check the number of active vnodes in the file system. 1499 * Our count is maintained in the vfs structure, but the 1500 * number is off by 1 to indicate a hold on the vfs 1501 * structure itself. 1502 * 1503 * The '.zfs' directory maintains a reference of its 1504 * own, and any active references underneath are 1505 * reflected in the vnode count. 1506 */ 1507 if (zfsvfs->z_ctldir == NULL) { 1508 if (vfsp->vfs_count > 1) 1509 return (EBUSY); 1510 } else { 1511 if (vfsp->vfs_count > 2 || 1512 zfsvfs->z_ctldir->v_count > 1) 1513 return (EBUSY); 1514 } 1515 } else { 1516 MNT_ILOCK(vfsp); 1517 vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 1518 MNT_IUNLOCK(vfsp); 1519 } 1520 1521 VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1522 os = zfsvfs->z_os; 1523 1524 /* 1525 * z_os will be NULL if there was an error in 1526 * attempting to reopen zfsvfs. 1527 */ 1528 if (os != NULL) { 1529 /* 1530 * Unset the objset user_ptr. 1531 */ 1532 mutex_enter(&os->os->os_user_ptr_lock); 1533 dmu_objset_set_user(os, NULL); 1534 mutex_exit(&os->os->os_user_ptr_lock); 1535 1536 /* 1537 * Finally release the objset 1538 */ 1539 dmu_objset_close(os); 1540 } 1541 1542 /* 1543 * We can now safely destroy the '.zfs' directory node. 1544 */ 1545 if (zfsvfs->z_ctldir != NULL) 1546 zfsctl_destroy(zfsvfs); 1547 if (zfsvfs->z_issnap) { 1548 vnode_t *svp = vfsp->mnt_vnodecovered; 1549 1550 if (svp->v_count >= 2) 1551 VN_RELE(svp); 1552 } 1553 zfs_freevfs(vfsp); 1554 1555 return (0); 1556} 1557 1558static int 1559zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1560{ 1561 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1562 znode_t *zp; 1563 int err; 1564 1565 /* 1566 * zfs_zget() can't operate on virtual entires like .zfs/ or 1567 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 1568 * This will make NFS to switch to LOOKUP instead of using VGET. 1569 */ 1570 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR) 1571 return (EOPNOTSUPP); 1572 1573 ZFS_ENTER(zfsvfs); 1574 err = zfs_zget(zfsvfs, ino, &zp); 1575 if (err == 0 && zp->z_unlinked) { 1576 VN_RELE(ZTOV(zp)); 1577 err = EINVAL; 1578 } 1579 ZFS_EXIT(zfsvfs); 1580 if (err != 0) 1581 *vpp = NULL; 1582 else { 1583 *vpp = ZTOV(zp); 1584 vn_lock(*vpp, flags); 1585 } 1586 return (err); 1587} 1588 1589static int 1590zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1591 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1592{ 1593 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1594 1595 /* 1596 * If this is regular file system vfsp is the same as 1597 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1598 * zfsvfs->z_parent->z_vfs represents parent file system 1599 * which we have to use here, because only this file system 1600 * has mnt_export configured. 1601 */ 1602 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1603 credanonp, numsecflavors, secflavors)); 1604} 1605 1606CTASSERT(SHORT_FID_LEN <= sizeof(struct fid)); 1607CTASSERT(LONG_FID_LEN <= sizeof(struct fid)); 1608 1609static int 1610zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 1611{ 1612 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1613 znode_t *zp; 1614 uint64_t object = 0; 1615 uint64_t fid_gen = 0; 1616 uint64_t gen_mask; 1617 uint64_t zp_gen; 1618 int i, err; 1619 1620 *vpp = NULL; 1621 1622 ZFS_ENTER(zfsvfs); 1623 1624 /* 1625 * On FreeBSD we can get snapshot's mount point or its parent file 1626 * system mount point depending if snapshot is already mounted or not. 1627 */ 1628 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1629 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1630 uint64_t objsetid = 0; 1631 uint64_t setgen = 0; 1632 1633 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1634 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1635 1636 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1637 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1638 1639 ZFS_EXIT(zfsvfs); 1640 1641 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1642 if (err) 1643 return (EINVAL); 1644 ZFS_ENTER(zfsvfs); 1645 } 1646 1647 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1648 zfid_short_t *zfid = (zfid_short_t *)fidp; 1649 1650 for (i = 0; i < sizeof (zfid->zf_object); i++) 1651 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1652 1653 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1654 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1655 } else { 1656 ZFS_EXIT(zfsvfs); 1657 return (EINVAL); 1658 } 1659 1660 /* A zero fid_gen means we are in the .zfs control directories */ 1661 if (fid_gen == 0 && 1662 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1663 *vpp = zfsvfs->z_ctldir; 1664 ASSERT(*vpp != NULL); 1665 if (object == ZFSCTL_INO_SNAPDIR) { 1666 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1667 0, NULL, NULL, NULL, NULL, NULL) == 0); 1668 } else { 1669 VN_HOLD(*vpp); 1670 } 1671 ZFS_EXIT(zfsvfs); 1672 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1673 return (0); 1674 } 1675 1676 gen_mask = -1ULL >> (64 - 8 * i); 1677 1678 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1679 if (err = zfs_zget(zfsvfs, object, &zp)) { 1680 ZFS_EXIT(zfsvfs); 1681 return (err); 1682 } 1683 zp_gen = zp->z_phys->zp_gen & gen_mask; 1684 if (zp_gen == 0) 1685 zp_gen = 1; 1686 if (zp->z_unlinked || zp_gen != fid_gen) { 1687 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1688 VN_RELE(ZTOV(zp)); 1689 ZFS_EXIT(zfsvfs); 1690 return (EINVAL); 1691 } 1692 1693 ZFS_EXIT(zfsvfs); 1694 1695 *vpp = ZTOV(zp); 1696 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1697 vnode_create_vobject(*vpp, zp->z_phys->zp_size, curthread); 1698 return (0); 1699} 1700 1701/* 1702 * Block out VOPs and close zfsvfs_t::z_os 1703 * 1704 * Note, if successful, then we return with the 'z_teardown_lock' and 1705 * 'z_teardown_inactive_lock' write held. 1706 */ 1707int 1708zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep) 1709{ 1710 int error; 1711 1712 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1713 return (error); 1714 1715 *modep = zfsvfs->z_os->os_mode; 1716 if (name) 1717 dmu_objset_name(zfsvfs->z_os, name); 1718 dmu_objset_close(zfsvfs->z_os); 1719 1720 return (0); 1721} 1722 1723/* 1724 * Reopen zfsvfs_t::z_os and release VOPs. 1725 */ 1726int 1727zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) 1728{ 1729 int err; 1730 1731 ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 1732 ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 1733 1734 err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 1735 if (err) { 1736 zfsvfs->z_os = NULL; 1737 } else { 1738 znode_t *zp; 1739 1740 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 1741 1742 /* 1743 * Attempt to re-establish all the active znodes with 1744 * their dbufs. If a zfs_rezget() fails, then we'll let 1745 * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1746 * when they try to use their znode. 1747 */ 1748 mutex_enter(&zfsvfs->z_znodes_lock); 1749 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1750 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1751 (void) zfs_rezget(zp); 1752 } 1753 mutex_exit(&zfsvfs->z_znodes_lock); 1754 1755 } 1756 1757 /* release the VOPs */ 1758 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1759 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1760 1761 if (err) { 1762 /* 1763 * Since we couldn't reopen zfsvfs::z_os, force 1764 * unmount this file system. 1765 */ 1766 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 1767 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 1768 } 1769 return (err); 1770} 1771 1772static void 1773zfs_freevfs(vfs_t *vfsp) 1774{ 1775 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1776 1777 /* 1778 * If this is a snapshot, we have an extra VFS_HOLD on our parent 1779 * from zfs_mount(). Release it here. 1780 */ 1781 if (zfsvfs->z_issnap) 1782 VFS_RELE(zfsvfs->z_parent->z_vfs); 1783 1784 zfsvfs_free(zfsvfs); 1785 1786 atomic_add_32(&zfs_active_fs_count, -1); 1787} 1788 1789#ifdef __i386__ 1790static int desiredvnodes_backup; 1791#endif 1792 1793static void 1794zfs_vnodes_adjust(void) 1795{ 1796#ifdef __i386__ 1797 int newdesiredvnodes; 1798 1799 desiredvnodes_backup = desiredvnodes; 1800 1801 /* 1802 * We calculate newdesiredvnodes the same way it is done in 1803 * vntblinit(). If it is equal to desiredvnodes, it means that 1804 * it wasn't tuned by the administrator and we can tune it down. 1805 */ 1806 newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * 1807 vm_kmem_size / (5 * (sizeof(struct vm_object) + 1808 sizeof(struct vnode)))); 1809 if (newdesiredvnodes == desiredvnodes) 1810 desiredvnodes = (3 * newdesiredvnodes) / 4; 1811#endif 1812} 1813 1814static void 1815zfs_vnodes_adjust_back(void) 1816{ 1817 1818#ifdef __i386__ 1819 desiredvnodes = desiredvnodes_backup; 1820#endif 1821} 1822 1823void 1824zfs_init(void) 1825{ 1826 1827 printf("ZFS filesystem version " ZPL_VERSION_STRING "\n"); 1828 1829 /* 1830 * Initialize znode cache, vnode ops, etc... 1831 */ 1832 zfs_znode_init(); 1833 1834 /* 1835 * Initialize .zfs directory structures 1836 */ 1837 zfsctl_init(); 1838 1839 /* 1840 * Reduce number of vnode. Originally number of vnodes is calculated 1841 * with UFS inode in mind. We reduce it here, because it's too big for 1842 * ZFS/i386. 1843 */ 1844 zfs_vnodes_adjust(); 1845 1846 dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); 1847} 1848 1849void 1850zfs_fini(void) 1851{ 1852 zfsctl_fini(); 1853 zfs_znode_fini(); 1854 zfs_vnodes_adjust_back(); 1855} 1856 1857int 1858zfs_busy(void) 1859{ 1860 return (zfs_active_fs_count != 0); 1861} 1862 1863int 1864zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 1865{ 1866 int error; 1867 objset_t *os = zfsvfs->z_os; 1868 dmu_tx_t *tx; 1869 1870 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 1871 return (EINVAL); 1872 1873 if (newvers < zfsvfs->z_version) 1874 return (EINVAL); 1875 1876 tx = dmu_tx_create(os); 1877 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 1878 error = dmu_tx_assign(tx, TXG_WAIT); 1879 if (error) { 1880 dmu_tx_abort(tx); 1881 return (error); 1882 } 1883 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1884 8, 1, &newvers, tx); 1885 1886 if (error) { 1887 dmu_tx_commit(tx); 1888 return (error); 1889 } 1890 1891 spa_history_internal_log(LOG_DS_UPGRADE, 1892 dmu_objset_spa(os), tx, CRED(), 1893 "oldver=%llu newver=%llu dataset = %llu", 1894 zfsvfs->z_version, newvers, dmu_objset_id(os)); 1895 1896 dmu_tx_commit(tx); 1897 1898 zfsvfs->z_version = newvers; 1899 1900 if (zfsvfs->z_version >= ZPL_VERSION_FUID) 1901 zfs_set_fuid_feature(zfsvfs); 1902 1903 return (0); 1904} 1905/* 1906 * Read a property stored within the master node. 1907 */ 1908int 1909zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 1910{ 1911 const char *pname; 1912 int error = ENOENT; 1913 1914 /* 1915 * Look up the file system's value for the property. For the 1916 * version property, we look up a slightly different string. 1917 */ 1918 if (prop == ZFS_PROP_VERSION) 1919 pname = ZPL_VERSION_STR; 1920 else 1921 pname = zfs_prop_to_name(prop); 1922 1923 if (os != NULL) 1924 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 1925 1926 if (error == ENOENT) { 1927 /* No value set, use the default value */ 1928 switch (prop) { 1929 case ZFS_PROP_VERSION: 1930 *value = ZPL_VERSION; 1931 break; 1932 case ZFS_PROP_NORMALIZE: 1933 case ZFS_PROP_UTF8ONLY: 1934 *value = 0; 1935 break; 1936 case ZFS_PROP_CASE: 1937 *value = ZFS_CASE_SENSITIVE; 1938 break; 1939 default: 1940 return (error); 1941 } 1942 error = 0; 1943 } 1944 return (error); 1945} 1946