zfs_vfsops.c revision 215260
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#include <sys/types.h> 27#include <sys/param.h> 28#include <sys/systm.h> 29#include <sys/kernel.h> 30#include <sys/sysmacros.h> 31#include <sys/kmem.h> 32#include <sys/acl.h> 33#include <sys/vnode.h> 34#include <sys/vfs.h> 35#include <sys/mntent.h> 36#include <sys/mount.h> 37#include <sys/cmn_err.h> 38#include <sys/zfs_znode.h> 39#include <sys/zfs_dir.h> 40#include <sys/zil.h> 41#include <sys/fs/zfs.h> 42#include <sys/dmu.h> 43#include <sys/dsl_prop.h> 44#include <sys/dsl_dataset.h> 45#include <sys/dsl_deleg.h> 46#include <sys/spa.h> 47#include <sys/zap.h> 48#include <sys/varargs.h> 49#include <sys/policy.h> 50#include <sys/atomic.h> 51#include <sys/zfs_ioctl.h> 52#include <sys/zfs_ctldir.h> 53#include <sys/zfs_fuid.h> 54#include <sys/sunddi.h> 55#include <sys/dnlc.h> 56#include <sys/dmu_objset.h> 57#include <sys/spa_boot.h> 58 59struct mtx zfs_debug_mtx; 60MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 61 62SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 63 64int zfs_super_owner = 0; 65SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 66 "File system owner can perform privileged operation on his file systems"); 67 68int zfs_debug_level = 0; 69TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 70SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 71 "Debug level"); 72 73SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 74static int zfs_version_acl = ZFS_ACL_VERSION; 75SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 76 "ZFS_ACL_VERSION"); 77static int zfs_version_dmu_backup_header = DMU_BACKUP_HEADER_VERSION; 78SYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_header, CTLFLAG_RD, 79 &zfs_version_dmu_backup_header, 0, "DMU_BACKUP_HEADER_VERSION"); 80static int zfs_version_dmu_backup_stream = DMU_BACKUP_STREAM_VERSION; 81SYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_stream, CTLFLAG_RD, 82 &zfs_version_dmu_backup_stream, 0, "DMU_BACKUP_STREAM_VERSION"); 83static int zfs_version_spa = SPA_VERSION; 84SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 85 "SPA_VERSION"); 86static int zfs_version_zpl = ZPL_VERSION; 87SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 88 "ZPL_VERSION"); 89 90static int zfs_mount(vfs_t *vfsp); 91static int zfs_umount(vfs_t *vfsp, int fflag); 92static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 93static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 94static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 95static int zfs_sync(vfs_t *vfsp, int waitfor); 96static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 97 struct ucred **credanonp, int *numsecflavors, int **secflavors); 98static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 99static void zfs_objset_close(zfsvfs_t *zfsvfs); 100static void zfs_freevfs(vfs_t *vfsp); 101 102static struct vfsops zfs_vfsops = { 103 .vfs_mount = zfs_mount, 104 .vfs_unmount = zfs_umount, 105 .vfs_root = zfs_root, 106 .vfs_statfs = zfs_statfs, 107 .vfs_vget = zfs_vget, 108 .vfs_sync = zfs_sync, 109 .vfs_checkexp = zfs_checkexp, 110 .vfs_fhtovp = zfs_fhtovp, 111}; 112 113VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 114 115/* 116 * We need to keep a count of active fs's. 117 * This is necessary to prevent our module 118 * from being unloaded after a umount -f 119 */ 120static uint32_t zfs_active_fs_count = 0; 121 122/*ARGSUSED*/ 123static int 124zfs_sync(vfs_t *vfsp, int waitfor) 125{ 126 127 /* 128 * Data integrity is job one. We don't want a compromised kernel 129 * writing to the storage pool, so we never sync during panic. 130 */ 131 if (panicstr) 132 return (0); 133 134 if (vfsp != NULL) { 135 /* 136 * Sync a specific filesystem. 137 */ 138 zfsvfs_t *zfsvfs = vfsp->vfs_data; 139 dsl_pool_t *dp; 140 int error; 141 142 error = vfs_stdsync(vfsp, waitfor); 143 if (error != 0) 144 return (error); 145 146 ZFS_ENTER(zfsvfs); 147 dp = dmu_objset_pool(zfsvfs->z_os); 148 149 /* 150 * If the system is shutting down, then skip any 151 * filesystems which may exist on a suspended pool. 152 */ 153 if (sys_shutdown && spa_suspended(dp->dp_spa)) { 154 ZFS_EXIT(zfsvfs); 155 return (0); 156 } 157 158 if (zfsvfs->z_log != NULL) 159 zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 160 else 161 txg_wait_synced(dp, 0); 162 ZFS_EXIT(zfsvfs); 163 } else { 164 /* 165 * Sync all ZFS filesystems. This is what happens when you 166 * run sync(1M). Unlike other filesystems, ZFS honors the 167 * request by waiting for all pools to commit all dirty data. 168 */ 169 spa_sync_allpools(); 170 } 171 172 return (0); 173} 174 175static void 176atime_changed_cb(void *arg, uint64_t newval) 177{ 178 zfsvfs_t *zfsvfs = arg; 179 180 if (newval == TRUE) { 181 zfsvfs->z_atime = TRUE; 182 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 183 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 184 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 185 } else { 186 zfsvfs->z_atime = FALSE; 187 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 188 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 189 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 190 } 191} 192 193static void 194xattr_changed_cb(void *arg, uint64_t newval) 195{ 196 zfsvfs_t *zfsvfs = arg; 197 198 if (newval == TRUE) { 199 /* XXX locking on vfs_flag? */ 200#ifdef TODO 201 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 202#endif 203 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 204 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 205 } else { 206 /* XXX locking on vfs_flag? */ 207#ifdef TODO 208 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 209#endif 210 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 211 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 212 } 213} 214 215static void 216blksz_changed_cb(void *arg, uint64_t newval) 217{ 218 zfsvfs_t *zfsvfs = arg; 219 220 if (newval < SPA_MINBLOCKSIZE || 221 newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 222 newval = SPA_MAXBLOCKSIZE; 223 224 zfsvfs->z_max_blksz = newval; 225 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 226} 227 228static void 229readonly_changed_cb(void *arg, uint64_t newval) 230{ 231 zfsvfs_t *zfsvfs = arg; 232 233 if (newval) { 234 /* XXX locking on vfs_flag? */ 235 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 236 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 237 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 238 } else { 239 /* XXX locking on vfs_flag? */ 240 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 241 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 242 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 243 } 244} 245 246static void 247setuid_changed_cb(void *arg, uint64_t newval) 248{ 249 zfsvfs_t *zfsvfs = arg; 250 251 if (newval == FALSE) { 252 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 253 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 254 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 255 } else { 256 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 257 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 258 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 259 } 260} 261 262static void 263exec_changed_cb(void *arg, uint64_t newval) 264{ 265 zfsvfs_t *zfsvfs = arg; 266 267 if (newval == FALSE) { 268 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 269 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 270 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 271 } else { 272 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 273 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 274 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 275 } 276} 277 278/* 279 * The nbmand mount option can be changed at mount time. 280 * We can't allow it to be toggled on live file systems or incorrect 281 * behavior may be seen from cifs clients 282 * 283 * This property isn't registered via dsl_prop_register(), but this callback 284 * will be called when a file system is first mounted 285 */ 286static void 287nbmand_changed_cb(void *arg, uint64_t newval) 288{ 289 zfsvfs_t *zfsvfs = arg; 290 if (newval == FALSE) { 291 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 292 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 293 } else { 294 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 295 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 296 } 297} 298 299static void 300snapdir_changed_cb(void *arg, uint64_t newval) 301{ 302 zfsvfs_t *zfsvfs = arg; 303 304 zfsvfs->z_show_ctldir = newval; 305} 306 307static void 308vscan_changed_cb(void *arg, uint64_t newval) 309{ 310 zfsvfs_t *zfsvfs = arg; 311 312 zfsvfs->z_vscan = newval; 313} 314 315static void 316acl_mode_changed_cb(void *arg, uint64_t newval) 317{ 318 zfsvfs_t *zfsvfs = arg; 319 320 zfsvfs->z_acl_mode = newval; 321} 322 323static void 324acl_inherit_changed_cb(void *arg, uint64_t newval) 325{ 326 zfsvfs_t *zfsvfs = arg; 327 328 zfsvfs->z_acl_inherit = newval; 329} 330 331static int 332zfs_register_callbacks(vfs_t *vfsp) 333{ 334 struct dsl_dataset *ds = NULL; 335 objset_t *os = NULL; 336 zfsvfs_t *zfsvfs = NULL; 337 uint64_t nbmand; 338 int readonly, do_readonly = FALSE; 339 int setuid, do_setuid = FALSE; 340 int exec, do_exec = FALSE; 341 int xattr, do_xattr = FALSE; 342 int atime, do_atime = FALSE; 343 int error = 0; 344 345 ASSERT(vfsp); 346 zfsvfs = vfsp->vfs_data; 347 ASSERT(zfsvfs); 348 os = zfsvfs->z_os; 349 350 /* 351 * This function can be called for a snapshot when we update snapshot's 352 * mount point, which isn't really supported. 353 */ 354 if (dmu_objset_is_snapshot(os)) 355 return (EOPNOTSUPP); 356 357 /* 358 * The act of registering our callbacks will destroy any mount 359 * options we may have. In order to enable temporary overrides 360 * of mount options, we stash away the current values and 361 * restore them after we register the callbacks. 362 */ 363 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 364 readonly = B_TRUE; 365 do_readonly = B_TRUE; 366 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 367 readonly = B_FALSE; 368 do_readonly = B_TRUE; 369 } 370 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 371 setuid = B_FALSE; 372 do_setuid = B_TRUE; 373 } else { 374 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 375 setuid = B_FALSE; 376 do_setuid = B_TRUE; 377 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 378 setuid = B_TRUE; 379 do_setuid = B_TRUE; 380 } 381 } 382 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 383 exec = B_FALSE; 384 do_exec = B_TRUE; 385 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 386 exec = B_TRUE; 387 do_exec = B_TRUE; 388 } 389 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 390 xattr = B_FALSE; 391 do_xattr = B_TRUE; 392 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 393 xattr = B_TRUE; 394 do_xattr = B_TRUE; 395 } 396 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 397 atime = B_FALSE; 398 do_atime = B_TRUE; 399 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 400 atime = B_TRUE; 401 do_atime = B_TRUE; 402 } 403 404 /* 405 * nbmand is a special property. It can only be changed at 406 * mount time. 407 * 408 * This is weird, but it is documented to only be changeable 409 * at mount time. 410 */ 411 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 412 nbmand = B_FALSE; 413 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 414 nbmand = B_TRUE; 415 } else { 416 char osname[MAXNAMELEN]; 417 418 dmu_objset_name(os, osname); 419 if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 420 NULL)) { 421 return (error); 422 } 423 } 424 425 /* 426 * Register property callbacks. 427 * 428 * It would probably be fine to just check for i/o error from 429 * the first prop_register(), but I guess I like to go 430 * overboard... 431 */ 432 ds = dmu_objset_ds(os); 433 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 434 error = error ? error : dsl_prop_register(ds, 435 "xattr", xattr_changed_cb, zfsvfs); 436 error = error ? error : dsl_prop_register(ds, 437 "recordsize", blksz_changed_cb, zfsvfs); 438 error = error ? error : dsl_prop_register(ds, 439 "readonly", readonly_changed_cb, zfsvfs); 440 error = error ? error : dsl_prop_register(ds, 441 "setuid", setuid_changed_cb, zfsvfs); 442 error = error ? error : dsl_prop_register(ds, 443 "exec", exec_changed_cb, zfsvfs); 444 error = error ? error : dsl_prop_register(ds, 445 "snapdir", snapdir_changed_cb, zfsvfs); 446 error = error ? error : dsl_prop_register(ds, 447 "aclmode", acl_mode_changed_cb, zfsvfs); 448 error = error ? error : dsl_prop_register(ds, 449 "aclinherit", acl_inherit_changed_cb, zfsvfs); 450 error = error ? error : dsl_prop_register(ds, 451 "vscan", vscan_changed_cb, zfsvfs); 452 if (error) 453 goto unregister; 454 455 /* 456 * Invoke our callbacks to restore temporary mount options. 457 */ 458 if (do_readonly) 459 readonly_changed_cb(zfsvfs, readonly); 460 if (do_setuid) 461 setuid_changed_cb(zfsvfs, setuid); 462 if (do_exec) 463 exec_changed_cb(zfsvfs, exec); 464 if (do_xattr) 465 xattr_changed_cb(zfsvfs, xattr); 466 if (do_atime) 467 atime_changed_cb(zfsvfs, atime); 468 469 nbmand_changed_cb(zfsvfs, nbmand); 470 471 return (0); 472 473unregister: 474 /* 475 * We may attempt to unregister some callbacks that are not 476 * registered, but this is OK; it will simply return ENOMSG, 477 * which we will ignore. 478 */ 479 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 480 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 481 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 482 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 483 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 484 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 485 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 486 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 487 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 488 zfsvfs); 489 (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 490 return (error); 491 492} 493 494static void 495uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid, 496 int64_t delta, dmu_tx_t *tx) 497{ 498 uint64_t used = 0; 499 char buf[32]; 500 int err; 501 uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 502 503 if (delta == 0) 504 return; 505 506 (void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid); 507 err = zap_lookup(os, obj, buf, 8, 1, &used); 508 ASSERT(err == 0 || err == ENOENT); 509 /* no underflow/overflow */ 510 ASSERT(delta > 0 || used >= -delta); 511 ASSERT(delta < 0 || used + delta > used); 512 used += delta; 513 if (used == 0) 514 err = zap_remove(os, obj, buf, tx); 515 else 516 err = zap_update(os, obj, buf, 8, 1, &used, tx); 517 ASSERT(err == 0); 518} 519 520static void 521zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype, 522 void *oldbonus, void *newbonus, 523 uint64_t oldused, uint64_t newused, dmu_tx_t *tx) 524{ 525 znode_phys_t *oldznp = oldbonus; 526 znode_phys_t *newznp = newbonus; 527 528 if (bonustype != DMU_OT_ZNODE) 529 return; 530 531 /* We charge 512 for the dnode (if it's allocated). */ 532 if (oldznp->zp_gen != 0) 533 oldused += DNODE_SIZE; 534 if (newznp->zp_gen != 0) 535 newused += DNODE_SIZE; 536 537 if (oldznp->zp_uid == newznp->zp_uid) { 538 uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx); 539 } else { 540 uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx); 541 uidacct(os, B_FALSE, newznp->zp_uid, newused, tx); 542 } 543 544 if (oldznp->zp_gid == newznp->zp_gid) { 545 uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx); 546 } else { 547 uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx); 548 uidacct(os, B_TRUE, newznp->zp_gid, newused, tx); 549 } 550} 551 552static void 553fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, 554 char *domainbuf, int buflen, uid_t *ridp) 555{ 556 uint64_t fuid; 557 const char *domain; 558 559 fuid = strtonum(fuidstr, NULL); 560 561 domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid)); 562 if (domain) 563 (void) strlcpy(domainbuf, domain, buflen); 564 else 565 domainbuf[0] = '\0'; 566 *ridp = FUID_RID(fuid); 567} 568 569static uint64_t 570zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type) 571{ 572 switch (type) { 573 case ZFS_PROP_USERUSED: 574 return (DMU_USERUSED_OBJECT); 575 case ZFS_PROP_GROUPUSED: 576 return (DMU_GROUPUSED_OBJECT); 577 case ZFS_PROP_USERQUOTA: 578 return (zfsvfs->z_userquota_obj); 579 case ZFS_PROP_GROUPQUOTA: 580 return (zfsvfs->z_groupquota_obj); 581 } 582 return (0); 583} 584 585int 586zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 587 uint64_t *cookiep, void *vbuf, uint64_t *bufsizep) 588{ 589 int error; 590 zap_cursor_t zc; 591 zap_attribute_t za; 592 zfs_useracct_t *buf = vbuf; 593 uint64_t obj; 594 595 if (!dmu_objset_userspace_present(zfsvfs->z_os)) 596 return (ENOTSUP); 597 598 obj = zfs_userquota_prop_to_obj(zfsvfs, type); 599 if (obj == 0) { 600 *bufsizep = 0; 601 return (0); 602 } 603 604 for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep); 605 (error = zap_cursor_retrieve(&zc, &za)) == 0; 606 zap_cursor_advance(&zc)) { 607 if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) > 608 *bufsizep) 609 break; 610 611 fuidstr_to_sid(zfsvfs, za.za_name, 612 buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid); 613 614 buf->zu_space = za.za_first_integer; 615 buf++; 616 } 617 if (error == ENOENT) 618 error = 0; 619 620 ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep); 621 *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf; 622 *cookiep = zap_cursor_serialize(&zc); 623 zap_cursor_fini(&zc); 624 return (error); 625} 626 627/* 628 * buf must be big enough (eg, 32 bytes) 629 */ 630static int 631id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, 632 char *buf, boolean_t addok) 633{ 634 uint64_t fuid; 635 int domainid = 0; 636 637 if (domain && domain[0]) { 638 domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok); 639 if (domainid == -1) 640 return (ENOENT); 641 } 642 fuid = FUID_ENCODE(domainid, rid); 643 (void) sprintf(buf, "%llx", (longlong_t)fuid); 644 return (0); 645} 646 647int 648zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 649 const char *domain, uint64_t rid, uint64_t *valp) 650{ 651 char buf[32]; 652 int err; 653 uint64_t obj; 654 655 *valp = 0; 656 657 if (!dmu_objset_userspace_present(zfsvfs->z_os)) 658 return (ENOTSUP); 659 660 obj = zfs_userquota_prop_to_obj(zfsvfs, type); 661 if (obj == 0) 662 return (0); 663 664 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE); 665 if (err) 666 return (err); 667 668 err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp); 669 if (err == ENOENT) 670 err = 0; 671 return (err); 672} 673 674int 675zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 676 const char *domain, uint64_t rid, uint64_t quota) 677{ 678 char buf[32]; 679 int err; 680 dmu_tx_t *tx; 681 uint64_t *objp; 682 boolean_t fuid_dirtied; 683 684 if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) 685 return (EINVAL); 686 687 if (zfsvfs->z_version < ZPL_VERSION_USERSPACE) 688 return (ENOTSUP); 689 690 objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj : 691 &zfsvfs->z_groupquota_obj; 692 693 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE); 694 if (err) 695 return (err); 696 fuid_dirtied = zfsvfs->z_fuid_dirty; 697 698 tx = dmu_tx_create(zfsvfs->z_os); 699 dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL); 700 if (*objp == 0) { 701 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 702 zfs_userquota_prop_prefixes[type]); 703 } 704 if (fuid_dirtied) 705 zfs_fuid_txhold(zfsvfs, tx); 706 err = dmu_tx_assign(tx, TXG_WAIT); 707 if (err) { 708 dmu_tx_abort(tx); 709 return (err); 710 } 711 712 mutex_enter(&zfsvfs->z_lock); 713 if (*objp == 0) { 714 *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA, 715 DMU_OT_NONE, 0, tx); 716 VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 717 zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); 718 } 719 mutex_exit(&zfsvfs->z_lock); 720 721 if (quota == 0) { 722 err = zap_remove(zfsvfs->z_os, *objp, buf, tx); 723 if (err == ENOENT) 724 err = 0; 725 } else { 726 err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx); 727 } 728 ASSERT(err == 0); 729 if (fuid_dirtied) 730 zfs_fuid_sync(zfsvfs, tx); 731 dmu_tx_commit(tx); 732 return (err); 733} 734 735boolean_t 736zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) 737{ 738 char buf[32]; 739 uint64_t used, quota, usedobj, quotaobj; 740 int err; 741 742 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 743 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 744 745 if (quotaobj == 0 || zfsvfs->z_replay) 746 return (B_FALSE); 747 748 (void) sprintf(buf, "%llx", (longlong_t)fuid); 749 err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a); 750 if (err != 0) 751 return (B_FALSE); 752 753 err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used); 754 if (err != 0) 755 return (B_FALSE); 756 return (used >= quota); 757} 758 759int 760zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp) 761{ 762 objset_t *os; 763 zfsvfs_t *zfsvfs; 764 uint64_t zval; 765 int i, error; 766 767 if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL)) 768 return (error); 769 if (zval) 770 mode |= DS_MODE_READONLY; 771 772 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os); 773 if (error == EROFS) { 774 mode |= DS_MODE_READONLY; 775 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os); 776 } 777 if (error) 778 return (error); 779 780 /* 781 * Initialize the zfs-specific filesystem structure. 782 * Should probably make this a kmem cache, shuffle fields, 783 * and just bzero up to z_hold_mtx[]. 784 */ 785 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 786 zfsvfs->z_vfs = NULL; 787 zfsvfs->z_parent = zfsvfs; 788 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 789 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 790 zfsvfs->z_os = os; 791 792 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 793 if (error) { 794 goto out; 795 } else if (zfsvfs->z_version > ZPL_VERSION) { 796 (void) printf("Mismatched versions: File system " 797 "is version %llu on-disk format, which is " 798 "incompatible with this software version %lld!", 799 (u_longlong_t)zfsvfs->z_version, ZPL_VERSION); 800 error = ENOTSUP; 801 goto out; 802 } 803 804 if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) 805 goto out; 806 zfsvfs->z_norm = (int)zval; 807 808 if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0) 809 goto out; 810 zfsvfs->z_utf8 = (zval != 0); 811 812 if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0) 813 goto out; 814 zfsvfs->z_case = (uint_t)zval; 815 816 /* 817 * Fold case on file systems that are always or sometimes case 818 * insensitive. 819 */ 820 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 821 zfsvfs->z_case == ZFS_CASE_MIXED) 822 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 823 824 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 825 826 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 827 &zfsvfs->z_root); 828 if (error) 829 goto out; 830 ASSERT(zfsvfs->z_root != 0); 831 832 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 833 &zfsvfs->z_unlinkedobj); 834 if (error) 835 goto out; 836 837 error = zap_lookup(os, MASTER_NODE_OBJ, 838 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 839 8, 1, &zfsvfs->z_userquota_obj); 840 if (error && error != ENOENT) 841 goto out; 842 843 error = zap_lookup(os, MASTER_NODE_OBJ, 844 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 845 8, 1, &zfsvfs->z_groupquota_obj); 846 if (error && error != ENOENT) 847 goto out; 848 849 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 850 &zfsvfs->z_fuid_obj); 851 if (error && error != ENOENT) 852 goto out; 853 854 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 855 &zfsvfs->z_shares_dir); 856 if (error && error != ENOENT) 857 goto out; 858 859 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 860 mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL); 861 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 862 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 863 offsetof(znode_t, z_link_node)); 864 rrw_init(&zfsvfs->z_teardown_lock); 865 rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 866 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 867 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 868 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 869 870 *zvp = zfsvfs; 871 return (0); 872 873out: 874 dmu_objset_close(os); 875 *zvp = NULL; 876 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 877 return (error); 878} 879 880static int 881zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 882{ 883 int error; 884 885 error = zfs_register_callbacks(zfsvfs->z_vfs); 886 if (error) 887 return (error); 888 889 /* 890 * Set the objset user_ptr to track its zfsvfs. 891 */ 892 mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 893 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 894 mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 895 896 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 897 if (zil_disable) { 898 zil_destroy(zfsvfs->z_log, B_FALSE); 899 zfsvfs->z_log = NULL; 900 } 901 902 /* 903 * If we are not mounting (ie: online recv), then we don't 904 * have to worry about replaying the log as we blocked all 905 * operations out since we closed the ZIL. 906 */ 907 if (mounting) { 908 boolean_t readonly; 909 910 /* 911 * During replay we remove the read only flag to 912 * allow replays to succeed. 913 */ 914 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 915 if (readonly != 0) 916 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 917 else 918 zfs_unlinked_drain(zfsvfs); 919 920 if (zfsvfs->z_log) { 921 /* 922 * Parse and replay the intent log. 923 * 924 * Because of ziltest, this must be done after 925 * zfs_unlinked_drain(). (Further note: ziltest 926 * doesn't use readonly mounts, where 927 * zfs_unlinked_drain() isn't called.) This is because 928 * ziltest causes spa_sync() to think it's committed, 929 * but actually it is not, so the intent log contains 930 * many txg's worth of changes. 931 * 932 * In particular, if object N is in the unlinked set in 933 * the last txg to actually sync, then it could be 934 * actually freed in a later txg and then reallocated 935 * in a yet later txg. This would write a "create 936 * object N" record to the intent log. Normally, this 937 * would be fine because the spa_sync() would have 938 * written out the fact that object N is free, before 939 * we could write the "create object N" intent log 940 * record. 941 * 942 * But when we are in ziltest mode, we advance the "open 943 * txg" without actually spa_sync()-ing the changes to 944 * disk. So we would see that object N is still 945 * allocated and in the unlinked set, and there is an 946 * intent log record saying to allocate it. 947 */ 948 zfsvfs->z_replay = B_TRUE; 949 zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector); 950 zfsvfs->z_replay = B_FALSE; 951 } 952 zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 953 } 954 955 return (0); 956} 957 958extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */ 959 960void 961zfsvfs_free(zfsvfs_t *zfsvfs) 962{ 963 int i; 964 965 /* 966 * This is a barrier to prevent the filesystem from going away in 967 * zfs_znode_move() until we can safely ensure that the filesystem is 968 * not unmounted. We consider the filesystem valid before the barrier 969 * and invalid after the barrier. 970 */ 971 rw_enter(&zfsvfs_lock, RW_READER); 972 rw_exit(&zfsvfs_lock); 973 974 zfs_fuid_destroy(zfsvfs); 975 976 mutex_destroy(&zfsvfs->z_znodes_lock); 977 mutex_destroy(&zfsvfs->z_online_recv_lock); 978 mutex_destroy(&zfsvfs->z_lock); 979 list_destroy(&zfsvfs->z_all_znodes); 980 rrw_destroy(&zfsvfs->z_teardown_lock); 981 rw_destroy(&zfsvfs->z_teardown_inactive_lock); 982 rw_destroy(&zfsvfs->z_fuid_lock); 983 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 984 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 985 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 986} 987 988static void 989zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 990{ 991 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 992 if (zfsvfs->z_use_fuids && zfsvfs->z_vfs) { 993 vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 994 vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 995 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 996 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 997 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 998 } 999} 1000 1001static int 1002zfs_domount(vfs_t *vfsp, char *osname) 1003{ 1004 uint64_t recordsize, fsid_guid; 1005 int error = 0; 1006 zfsvfs_t *zfsvfs; 1007 vnode_t *vp; 1008 1009 ASSERT(vfsp); 1010 ASSERT(osname); 1011 1012 error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs); 1013 if (error) 1014 return (error); 1015 zfsvfs->z_vfs = vfsp; 1016 1017 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 1018 NULL)) 1019 goto out; 1020 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1021 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1022 1023 vfsp->vfs_data = zfsvfs; 1024 vfsp->mnt_flag |= MNT_LOCAL; 1025 vfsp->mnt_kern_flag |= MNTK_MPSAFE; 1026 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1027 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1028 1029 1030 /* 1031 * The fsid is 64 bits, composed of an 8-bit fs type, which 1032 * separates our fsid from any other filesystem types, and a 1033 * 56-bit objset unique ID. The objset unique ID is unique to 1034 * all objsets open on this system, provided by unique_create(). 1035 * The 8-bit fs type must be put in the low bits of fsid[1] 1036 * because that's where other Solaris filesystems put it. 1037 */ 1038 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1039 ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); 1040 vfsp->vfs_fsid.val[0] = fsid_guid; 1041 vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | 1042 vfsp->mnt_vfc->vfc_typenum & 0xFF; 1043 1044 /* 1045 * Set features for file system. 1046 */ 1047 zfs_set_fuid_feature(zfsvfs); 1048 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 1049 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1050 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1051 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 1052 } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 1053 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1054 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1055 } 1056 1057 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1058 uint64_t pval; 1059 1060 atime_changed_cb(zfsvfs, B_FALSE); 1061 readonly_changed_cb(zfsvfs, B_TRUE); 1062 if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 1063 goto out; 1064 xattr_changed_cb(zfsvfs, pval); 1065 zfsvfs->z_issnap = B_TRUE; 1066 1067 mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 1068 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1069 mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 1070 } else { 1071 error = zfsvfs_setup(zfsvfs, B_TRUE); 1072 } 1073 1074 vfs_mountedfrom(vfsp, osname); 1075 /* Grab extra reference. */ 1076 VERIFY(VFS_ROOT(vfsp, LK_EXCLUSIVE, &vp) == 0); 1077 VOP_UNLOCK(vp, 0); 1078 1079 if (!zfsvfs->z_issnap) 1080 zfsctl_create(zfsvfs); 1081out: 1082 if (error) { 1083 dmu_objset_close(zfsvfs->z_os); 1084 zfsvfs_free(zfsvfs); 1085 } else { 1086 atomic_add_32(&zfs_active_fs_count, 1); 1087 } 1088 1089 return (error); 1090} 1091 1092void 1093zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1094{ 1095 objset_t *os = zfsvfs->z_os; 1096 struct dsl_dataset *ds; 1097 1098 /* 1099 * Unregister properties. 1100 */ 1101 if (!dmu_objset_is_snapshot(os)) { 1102 ds = dmu_objset_ds(os); 1103 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 1104 zfsvfs) == 0); 1105 1106 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 1107 zfsvfs) == 0); 1108 1109 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 1110 zfsvfs) == 0); 1111 1112 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 1113 zfsvfs) == 0); 1114 1115 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 1116 zfsvfs) == 0); 1117 1118 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 1119 zfsvfs) == 0); 1120 1121 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 1122 zfsvfs) == 0); 1123 1124 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 1125 zfsvfs) == 0); 1126 1127 VERIFY(dsl_prop_unregister(ds, "aclinherit", 1128 acl_inherit_changed_cb, zfsvfs) == 0); 1129 1130 VERIFY(dsl_prop_unregister(ds, "vscan", 1131 vscan_changed_cb, zfsvfs) == 0); 1132 } 1133} 1134 1135/*ARGSUSED*/ 1136static int 1137zfs_mount(vfs_t *vfsp) 1138{ 1139 kthread_t *td = curthread; 1140 vnode_t *mvp = vfsp->mnt_vnodecovered; 1141 cred_t *cr = td->td_ucred; 1142 char *osname; 1143 int error = 0; 1144 int canwrite; 1145 1146 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1147 return (EINVAL); 1148 1149 /* 1150 * If full-owner-access is enabled and delegated administration is 1151 * turned on, we must set nosuid. 1152 */ 1153 if (zfs_super_owner && 1154 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1155 secpolicy_fs_mount_clearopts(cr, vfsp); 1156 } 1157 1158 /* 1159 * Check for mount privilege? 1160 * 1161 * If we don't have privilege then see if 1162 * we have local permission to allow it 1163 */ 1164 error = secpolicy_fs_mount(cr, mvp, vfsp); 1165 if (error) { 1166 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 1167 goto out; 1168 1169 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1170 vattr_t vattr; 1171 1172 /* 1173 * Make sure user is the owner of the mount point 1174 * or has sufficient privileges. 1175 */ 1176 1177 vattr.va_mask = AT_UID; 1178 1179 vn_lock(mvp, LK_SHARED | LK_RETRY); 1180 if (VOP_GETATTR(mvp, &vattr, cr)) { 1181 VOP_UNLOCK(mvp, 0); 1182 goto out; 1183 } 1184 1185 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1186 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1187 VOP_UNLOCK(mvp, 0); 1188 goto out; 1189 } 1190 VOP_UNLOCK(mvp, 0); 1191 } 1192 1193 secpolicy_fs_mount_clearopts(cr, vfsp); 1194 } 1195 1196 /* 1197 * Refuse to mount a filesystem if we are in a local zone and the 1198 * dataset is not visible. 1199 */ 1200 if (!INGLOBALZONE(curthread) && 1201 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1202 error = EPERM; 1203 goto out; 1204 } 1205 1206 /* 1207 * When doing a remount, we simply refresh our temporary properties 1208 * according to those options set in the current VFS options. 1209 */ 1210 if (vfsp->vfs_flag & MS_REMOUNT) { 1211 /* refresh mount options */ 1212 zfs_unregister_callbacks(vfsp->vfs_data); 1213 error = zfs_register_callbacks(vfsp); 1214 goto out; 1215 } 1216 1217 DROP_GIANT(); 1218 error = zfs_domount(vfsp, osname); 1219 PICKUP_GIANT(); 1220 1221#ifdef sun 1222 /* 1223 * Add an extra VFS_HOLD on our parent vfs so that it can't 1224 * disappear due to a forced unmount. 1225 */ 1226 if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) 1227 VFS_HOLD(mvp->v_vfsp); 1228#endif /* sun */ 1229 1230out: 1231 return (error); 1232} 1233 1234static int 1235zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1236{ 1237 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1238 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1239 1240 statp->f_version = STATFS_VERSION; 1241 1242 ZFS_ENTER(zfsvfs); 1243 1244 dmu_objset_space(zfsvfs->z_os, 1245 &refdbytes, &availbytes, &usedobjs, &availobjs); 1246 1247 /* 1248 * The underlying storage pool actually uses multiple block sizes. 1249 * We report the fragsize as the smallest block size we support, 1250 * and we report our blocksize as the filesystem's maximum blocksize. 1251 */ 1252 statp->f_bsize = SPA_MINBLOCKSIZE; 1253 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1254 1255 /* 1256 * The following report "total" blocks of various kinds in the 1257 * file system, but reported in terms of f_frsize - the 1258 * "fragment" size. 1259 */ 1260 1261 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1262 statp->f_bfree = availbytes / statp->f_bsize; 1263 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1264 1265 /* 1266 * statvfs() should really be called statufs(), because it assumes 1267 * static metadata. ZFS doesn't preallocate files, so the best 1268 * we can do is report the max that could possibly fit in f_files, 1269 * and that minus the number actually used in f_ffree. 1270 * For f_ffree, report the smaller of the number of object available 1271 * and the number of blocks (each object will take at least a block). 1272 */ 1273 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1274 statp->f_files = statp->f_ffree + usedobjs; 1275 1276 /* 1277 * We're a zfs filesystem. 1278 */ 1279 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 1280 1281 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1282 sizeof(statp->f_mntfromname)); 1283 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1284 sizeof(statp->f_mntonname)); 1285 1286 statp->f_namemax = ZFS_MAXNAMELEN; 1287 1288 ZFS_EXIT(zfsvfs); 1289 return (0); 1290} 1291 1292static int 1293zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1294{ 1295 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1296 znode_t *rootzp; 1297 int error; 1298 1299 ZFS_ENTER_NOERROR(zfsvfs); 1300 1301 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1302 1303 ZFS_EXIT(zfsvfs); 1304 1305 if (error == 0) { 1306 *vpp = ZTOV(rootzp); 1307 error = vn_lock(*vpp, flags); 1308 (*vpp)->v_vflag |= VV_ROOT; 1309 } 1310 1311 return (error); 1312} 1313 1314/* 1315 * Teardown the zfsvfs::z_os. 1316 * 1317 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 1318 * and 'z_teardown_inactive_lock' held. 1319 */ 1320static int 1321zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1322{ 1323 znode_t *zp; 1324 1325 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1326 1327 if (!unmounting) { 1328 /* 1329 * We purge the parent filesystem's vfsp as the parent 1330 * filesystem and all of its snapshots have their vnode's 1331 * v_vfsp set to the parent's filesystem's vfsp. Note, 1332 * 'z_parent' is self referential for non-snapshots. 1333 */ 1334 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1335#ifdef FREEBSD_NAMECACHE 1336 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1337#endif 1338 } 1339 1340 /* 1341 * Close the zil. NB: Can't close the zil while zfs_inactive 1342 * threads are blocked as zil_close can call zfs_inactive. 1343 */ 1344 if (zfsvfs->z_log) { 1345 zil_close(zfsvfs->z_log); 1346 zfsvfs->z_log = NULL; 1347 } 1348 1349 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 1350 1351 /* 1352 * If we are not unmounting (ie: online recv) and someone already 1353 * unmounted this file system while we were doing the switcheroo, 1354 * or a reopen of z_os failed then just bail out now. 1355 */ 1356 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1357 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1358 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1359 return (EIO); 1360 } 1361 1362 /* 1363 * At this point there are no vops active, and any new vops will 1364 * fail with EIO since we have z_teardown_lock for writer (only 1365 * relavent for forced unmount). 1366 * 1367 * Release all holds on dbufs. 1368 */ 1369 mutex_enter(&zfsvfs->z_znodes_lock); 1370 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1371 zp = list_next(&zfsvfs->z_all_znodes, zp)) 1372 if (zp->z_dbuf) { 1373 ASSERT(ZTOV(zp)->v_count >= 0); 1374 zfs_znode_dmu_fini(zp); 1375 } 1376 mutex_exit(&zfsvfs->z_znodes_lock); 1377 1378 /* 1379 * If we are unmounting, set the unmounted flag and let new vops 1380 * unblock. zfs_inactive will have the unmounted behavior, and all 1381 * other vops will fail with EIO. 1382 */ 1383 if (unmounting) { 1384 zfsvfs->z_unmounted = B_TRUE; 1385 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1386 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1387 1388#ifdef __FreeBSD__ 1389 /* 1390 * Some znodes might not be fully reclaimed, wait for them. 1391 */ 1392 mutex_enter(&zfsvfs->z_znodes_lock); 1393 while (list_head(&zfsvfs->z_all_znodes) != NULL) { 1394 msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0, 1395 "zteardown", 0); 1396 } 1397 mutex_exit(&zfsvfs->z_znodes_lock); 1398#endif 1399 } 1400 1401 /* 1402 * z_os will be NULL if there was an error in attempting to reopen 1403 * zfsvfs, so just return as the properties had already been 1404 * unregistered and cached data had been evicted before. 1405 */ 1406 if (zfsvfs->z_os == NULL) 1407 return (0); 1408 1409 /* 1410 * Unregister properties. 1411 */ 1412 zfs_unregister_callbacks(zfsvfs); 1413 1414 /* 1415 * Evict cached data 1416 */ 1417 if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { 1418 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1419 (void) dmu_objset_evict_dbufs(zfsvfs->z_os); 1420 } 1421 1422 return (0); 1423} 1424 1425/*ARGSUSED*/ 1426static int 1427zfs_umount(vfs_t *vfsp, int fflag) 1428{ 1429 kthread_t *td = curthread; 1430 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1431 objset_t *os; 1432 cred_t *cr = td->td_ucred; 1433 int ret; 1434 1435 ret = secpolicy_fs_unmount(cr, vfsp); 1436 if (ret) { 1437 if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1438 ZFS_DELEG_PERM_MOUNT, cr)) 1439 return (ret); 1440 } 1441 /* 1442 * We purge the parent filesystem's vfsp as the parent filesystem 1443 * and all of its snapshots have their vnode's v_vfsp set to the 1444 * parent's filesystem's vfsp. Note, 'z_parent' is self 1445 * referential for non-snapshots. 1446 */ 1447 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1448 1449 /* 1450 * Unmount any snapshots mounted under .zfs before unmounting the 1451 * dataset itself. 1452 */ 1453 if (zfsvfs->z_ctldir != NULL) { 1454 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1455 return (ret); 1456 ret = vflush(vfsp, 0, 0, td); 1457 ASSERT(ret == EBUSY); 1458 if (!(fflag & MS_FORCE)) { 1459 if (zfsvfs->z_ctldir->v_count > 1) 1460 return (EBUSY); 1461 ASSERT(zfsvfs->z_ctldir->v_count == 1); 1462 } 1463 zfsctl_destroy(zfsvfs); 1464 ASSERT(zfsvfs->z_ctldir == NULL); 1465 } 1466 1467 if (fflag & MS_FORCE) { 1468 /* 1469 * Mark file system as unmounted before calling 1470 * vflush(FORCECLOSE). This way we ensure no future vnops 1471 * will be called and risk operating on DOOMED vnodes. 1472 */ 1473 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1474 zfsvfs->z_unmounted = B_TRUE; 1475 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1476 } 1477 1478 /* 1479 * Flush all the files. 1480 */ 1481 ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1482 if (ret != 0) { 1483 if (!zfsvfs->z_issnap) { 1484 zfsctl_create(zfsvfs); 1485 ASSERT(zfsvfs->z_ctldir != NULL); 1486 } 1487 return (ret); 1488 } 1489 1490 if (!(fflag & MS_FORCE)) { 1491 /* 1492 * Check the number of active vnodes in the file system. 1493 * Our count is maintained in the vfs structure, but the 1494 * number is off by 1 to indicate a hold on the vfs 1495 * structure itself. 1496 * 1497 * The '.zfs' directory maintains a reference of its 1498 * own, and any active references underneath are 1499 * reflected in the vnode count. 1500 */ 1501 if (zfsvfs->z_ctldir == NULL) { 1502 if (vfsp->vfs_count > 1) 1503 return (EBUSY); 1504 } else { 1505 if (vfsp->vfs_count > 2 || 1506 zfsvfs->z_ctldir->v_count > 1) 1507 return (EBUSY); 1508 } 1509 } else { 1510 MNT_ILOCK(vfsp); 1511 vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 1512 MNT_IUNLOCK(vfsp); 1513 } 1514 1515 VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1516 os = zfsvfs->z_os; 1517 1518 /* 1519 * z_os will be NULL if there was an error in 1520 * attempting to reopen zfsvfs. 1521 */ 1522 if (os != NULL) { 1523 /* 1524 * Unset the objset user_ptr. 1525 */ 1526 mutex_enter(&os->os->os_user_ptr_lock); 1527 dmu_objset_set_user(os, NULL); 1528 mutex_exit(&os->os->os_user_ptr_lock); 1529 1530 /* 1531 * Finally release the objset 1532 */ 1533 dmu_objset_close(os); 1534 } 1535 1536 /* 1537 * We can now safely destroy the '.zfs' directory node. 1538 */ 1539 if (zfsvfs->z_ctldir != NULL) 1540 zfsctl_destroy(zfsvfs); 1541 if (zfsvfs->z_issnap) { 1542 vnode_t *svp = vfsp->mnt_vnodecovered; 1543 1544 if (svp->v_count >= 2) 1545 VN_RELE(svp); 1546 } 1547 zfs_freevfs(vfsp); 1548 1549 return (0); 1550} 1551 1552static int 1553zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1554{ 1555 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1556 znode_t *zp; 1557 int err; 1558 1559 /* 1560 * zfs_zget() can't operate on virtual entires like .zfs/ or 1561 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 1562 * This will make NFS to switch to LOOKUP instead of using VGET. 1563 */ 1564 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR) 1565 return (EOPNOTSUPP); 1566 1567 ZFS_ENTER(zfsvfs); 1568 err = zfs_zget(zfsvfs, ino, &zp); 1569 if (err == 0 && zp->z_unlinked) { 1570 VN_RELE(ZTOV(zp)); 1571 err = EINVAL; 1572 } 1573 ZFS_EXIT(zfsvfs); 1574 if (err != 0) 1575 *vpp = NULL; 1576 else { 1577 *vpp = ZTOV(zp); 1578 vn_lock(*vpp, flags); 1579 } 1580 return (err); 1581} 1582 1583static int 1584zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1585 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1586{ 1587 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1588 1589 /* 1590 * If this is regular file system vfsp is the same as 1591 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1592 * zfsvfs->z_parent->z_vfs represents parent file system 1593 * which we have to use here, because only this file system 1594 * has mnt_export configured. 1595 */ 1596 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1597 credanonp, numsecflavors, secflavors)); 1598} 1599 1600CTASSERT(SHORT_FID_LEN <= sizeof(struct fid)); 1601CTASSERT(LONG_FID_LEN <= sizeof(struct fid)); 1602 1603static int 1604zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 1605{ 1606 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1607 znode_t *zp; 1608 uint64_t object = 0; 1609 uint64_t fid_gen = 0; 1610 uint64_t gen_mask; 1611 uint64_t zp_gen; 1612 int i, err; 1613 1614 *vpp = NULL; 1615 1616 ZFS_ENTER(zfsvfs); 1617 1618 /* 1619 * On FreeBSD we can get snapshot's mount point or its parent file 1620 * system mount point depending if snapshot is already mounted or not. 1621 */ 1622 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1623 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1624 uint64_t objsetid = 0; 1625 uint64_t setgen = 0; 1626 1627 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1628 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1629 1630 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1631 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1632 1633 ZFS_EXIT(zfsvfs); 1634 1635 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1636 if (err) 1637 return (EINVAL); 1638 ZFS_ENTER(zfsvfs); 1639 } 1640 1641 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1642 zfid_short_t *zfid = (zfid_short_t *)fidp; 1643 1644 for (i = 0; i < sizeof (zfid->zf_object); i++) 1645 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1646 1647 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1648 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1649 } else { 1650 ZFS_EXIT(zfsvfs); 1651 return (EINVAL); 1652 } 1653 1654 /* A zero fid_gen means we are in the .zfs control directories */ 1655 if (fid_gen == 0 && 1656 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1657 *vpp = zfsvfs->z_ctldir; 1658 ASSERT(*vpp != NULL); 1659 if (object == ZFSCTL_INO_SNAPDIR) { 1660 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1661 0, NULL, NULL, NULL, NULL, NULL) == 0); 1662 } else { 1663 VN_HOLD(*vpp); 1664 } 1665 ZFS_EXIT(zfsvfs); 1666 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1667 return (0); 1668 } 1669 1670 gen_mask = -1ULL >> (64 - 8 * i); 1671 1672 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1673 if (err = zfs_zget(zfsvfs, object, &zp)) { 1674 ZFS_EXIT(zfsvfs); 1675 return (err); 1676 } 1677 zp_gen = zp->z_phys->zp_gen & gen_mask; 1678 if (zp_gen == 0) 1679 zp_gen = 1; 1680 if (zp->z_unlinked || zp_gen != fid_gen) { 1681 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1682 VN_RELE(ZTOV(zp)); 1683 ZFS_EXIT(zfsvfs); 1684 return (EINVAL); 1685 } 1686 1687 ZFS_EXIT(zfsvfs); 1688 1689 *vpp = ZTOV(zp); 1690 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1691 vnode_create_vobject(*vpp, zp->z_phys->zp_size, curthread); 1692 return (0); 1693} 1694 1695/* 1696 * Block out VOPs and close zfsvfs_t::z_os 1697 * 1698 * Note, if successful, then we return with the 'z_teardown_lock' and 1699 * 'z_teardown_inactive_lock' write held. 1700 */ 1701int 1702zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep) 1703{ 1704 int error; 1705 1706 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1707 return (error); 1708 1709 *modep = zfsvfs->z_os->os_mode; 1710 if (name) 1711 dmu_objset_name(zfsvfs->z_os, name); 1712 dmu_objset_close(zfsvfs->z_os); 1713 1714 return (0); 1715} 1716 1717/* 1718 * Reopen zfsvfs_t::z_os and release VOPs. 1719 */ 1720int 1721zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) 1722{ 1723 int err; 1724 1725 ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 1726 ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 1727 1728 err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 1729 if (err) { 1730 zfsvfs->z_os = NULL; 1731 } else { 1732 znode_t *zp; 1733 1734 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 1735 1736 /* 1737 * Attempt to re-establish all the active znodes with 1738 * their dbufs. If a zfs_rezget() fails, then we'll let 1739 * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1740 * when they try to use their znode. 1741 */ 1742 mutex_enter(&zfsvfs->z_znodes_lock); 1743 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1744 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1745 (void) zfs_rezget(zp); 1746 } 1747 mutex_exit(&zfsvfs->z_znodes_lock); 1748 1749 } 1750 1751 /* release the VOPs */ 1752 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1753 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1754 1755 if (err) { 1756 /* 1757 * Since we couldn't reopen zfsvfs::z_os, force 1758 * unmount this file system. 1759 */ 1760 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 1761 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 1762 } 1763 return (err); 1764} 1765 1766static void 1767zfs_freevfs(vfs_t *vfsp) 1768{ 1769 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1770 1771#ifdef sun 1772 /* 1773 * If this is a snapshot, we have an extra VFS_HOLD on our parent 1774 * from zfs_mount(). Release it here. 1775 */ 1776 if (zfsvfs->z_issnap) 1777 VFS_RELE(zfsvfs->z_parent->z_vfs); 1778#endif /* sun */ 1779 1780 zfsvfs_free(zfsvfs); 1781 1782 atomic_add_32(&zfs_active_fs_count, -1); 1783} 1784 1785#ifdef __i386__ 1786static int desiredvnodes_backup; 1787#endif 1788 1789static void 1790zfs_vnodes_adjust(void) 1791{ 1792#ifdef __i386__ 1793 int newdesiredvnodes; 1794 1795 desiredvnodes_backup = desiredvnodes; 1796 1797 /* 1798 * We calculate newdesiredvnodes the same way it is done in 1799 * vntblinit(). If it is equal to desiredvnodes, it means that 1800 * it wasn't tuned by the administrator and we can tune it down. 1801 */ 1802 newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * 1803 vm_kmem_size / (5 * (sizeof(struct vm_object) + 1804 sizeof(struct vnode)))); 1805 if (newdesiredvnodes == desiredvnodes) 1806 desiredvnodes = (3 * newdesiredvnodes) / 4; 1807#endif 1808} 1809 1810static void 1811zfs_vnodes_adjust_back(void) 1812{ 1813 1814#ifdef __i386__ 1815 desiredvnodes = desiredvnodes_backup; 1816#endif 1817} 1818 1819void 1820zfs_init(void) 1821{ 1822 1823 printf("ZFS filesystem version " ZPL_VERSION_STRING "\n"); 1824 1825 /* 1826 * Initialize znode cache, vnode ops, etc... 1827 */ 1828 zfs_znode_init(); 1829 1830 /* 1831 * Initialize .zfs directory structures 1832 */ 1833 zfsctl_init(); 1834 1835 /* 1836 * Reduce number of vnode. Originally number of vnodes is calculated 1837 * with UFS inode in mind. We reduce it here, because it's too big for 1838 * ZFS/i386. 1839 */ 1840 zfs_vnodes_adjust(); 1841 1842 dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); 1843} 1844 1845void 1846zfs_fini(void) 1847{ 1848 zfsctl_fini(); 1849 zfs_znode_fini(); 1850 zfs_vnodes_adjust_back(); 1851} 1852 1853int 1854zfs_busy(void) 1855{ 1856 return (zfs_active_fs_count != 0); 1857} 1858 1859int 1860zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 1861{ 1862 int error; 1863 objset_t *os = zfsvfs->z_os; 1864 dmu_tx_t *tx; 1865 1866 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 1867 return (EINVAL); 1868 1869 if (newvers < zfsvfs->z_version) 1870 return (EINVAL); 1871 1872 tx = dmu_tx_create(os); 1873 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 1874 error = dmu_tx_assign(tx, TXG_WAIT); 1875 if (error) { 1876 dmu_tx_abort(tx); 1877 return (error); 1878 } 1879 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1880 8, 1, &newvers, tx); 1881 1882 if (error) { 1883 dmu_tx_commit(tx); 1884 return (error); 1885 } 1886 1887 spa_history_internal_log(LOG_DS_UPGRADE, 1888 dmu_objset_spa(os), tx, CRED(), 1889 "oldver=%llu newver=%llu dataset = %llu", 1890 zfsvfs->z_version, newvers, dmu_objset_id(os)); 1891 1892 dmu_tx_commit(tx); 1893 1894 zfsvfs->z_version = newvers; 1895 1896 if (zfsvfs->z_version >= ZPL_VERSION_FUID) 1897 zfs_set_fuid_feature(zfsvfs); 1898 1899 return (0); 1900} 1901/* 1902 * Read a property stored within the master node. 1903 */ 1904int 1905zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 1906{ 1907 const char *pname; 1908 int error = ENOENT; 1909 1910 /* 1911 * Look up the file system's value for the property. For the 1912 * version property, we look up a slightly different string. 1913 */ 1914 if (prop == ZFS_PROP_VERSION) 1915 pname = ZPL_VERSION_STR; 1916 else 1917 pname = zfs_prop_to_name(prop); 1918 1919 if (os != NULL) 1920 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 1921 1922 if (error == ENOENT) { 1923 /* No value set, use the default value */ 1924 switch (prop) { 1925 case ZFS_PROP_VERSION: 1926 *value = ZPL_VERSION; 1927 break; 1928 case ZFS_PROP_NORMALIZE: 1929 case ZFS_PROP_UTF8ONLY: 1930 *value = 0; 1931 break; 1932 case ZFS_PROP_CASE: 1933 *value = ZFS_CASE_SENSITIVE; 1934 break; 1935 default: 1936 return (error); 1937 } 1938 error = 0; 1939 } 1940 return (error); 1941} 1942