zfs_vfsops.c revision 212694
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#include <sys/types.h> 27#include <sys/param.h> 28#include <sys/systm.h> 29#include <sys/kernel.h> 30#include <sys/sysmacros.h> 31#include <sys/kmem.h> 32#include <sys/acl.h> 33#include <sys/vnode.h> 34#include <sys/vfs.h> 35#include <sys/mntent.h> 36#include <sys/mount.h> 37#include <sys/cmn_err.h> 38#include <sys/zfs_znode.h> 39#include <sys/zfs_dir.h> 40#include <sys/zil.h> 41#include <sys/fs/zfs.h> 42#include <sys/dmu.h> 43#include <sys/dsl_prop.h> 44#include <sys/dsl_dataset.h> 45#include <sys/dsl_deleg.h> 46#include <sys/spa.h> 47#include <sys/zap.h> 48#include <sys/varargs.h> 49#include <sys/policy.h> 50#include <sys/atomic.h> 51#include <sys/zfs_ioctl.h> 52#include <sys/zfs_ctldir.h> 53#include <sys/zfs_fuid.h> 54#include <sys/sunddi.h> 55#include <sys/dnlc.h> 56#include <sys/dmu_objset.h> 57#include <sys/spa_boot.h> 58 59struct mtx zfs_debug_mtx; 60MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 61 62SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 63 64int zfs_super_owner = 0; 65SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 66 "File system owner can perform privileged operation on his file systems"); 67 68int zfs_debug_level = 0; 69TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 70SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 71 "Debug level"); 72 73SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 74static int zfs_version_acl = ZFS_ACL_VERSION; 75SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 76 "ZFS_ACL_VERSION"); 77static int zfs_version_dmu_backup_header = DMU_BACKUP_HEADER_VERSION; 78SYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_header, CTLFLAG_RD, 79 &zfs_version_dmu_backup_header, 0, "DMU_BACKUP_HEADER_VERSION"); 80static int zfs_version_dmu_backup_stream = DMU_BACKUP_STREAM_VERSION; 81SYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_stream, CTLFLAG_RD, 82 &zfs_version_dmu_backup_stream, 0, "DMU_BACKUP_STREAM_VERSION"); 83static int zfs_version_spa = SPA_VERSION; 84SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 85 "SPA_VERSION"); 86static int zfs_version_zpl = ZPL_VERSION; 87SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 88 "ZPL_VERSION"); 89 90static int zfs_mount(vfs_t *vfsp); 91static int zfs_umount(vfs_t *vfsp, int fflag); 92static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 93static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 94static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 95static int zfs_sync(vfs_t *vfsp, int waitfor); 96static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 97 struct ucred **credanonp, int *numsecflavors, int **secflavors); 98static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 99static void zfs_objset_close(zfsvfs_t *zfsvfs); 100static void zfs_freevfs(vfs_t *vfsp); 101 102static struct vfsops zfs_vfsops = { 103 .vfs_mount = zfs_mount, 104 .vfs_unmount = zfs_umount, 105 .vfs_root = zfs_root, 106 .vfs_statfs = zfs_statfs, 107 .vfs_vget = zfs_vget, 108 .vfs_sync = zfs_sync, 109 .vfs_checkexp = zfs_checkexp, 110 .vfs_fhtovp = zfs_fhtovp, 111}; 112 113VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 114 115/* 116 * We need to keep a count of active fs's. 117 * This is necessary to prevent our module 118 * from being unloaded after a umount -f 119 */ 120static uint32_t zfs_active_fs_count = 0; 121 122/*ARGSUSED*/ 123static int 124zfs_sync(vfs_t *vfsp, int waitfor) 125{ 126 127 /* 128 * Data integrity is job one. We don't want a compromised kernel 129 * writing to the storage pool, so we never sync during panic. 130 */ 131 if (panicstr) 132 return (0); 133 134 if (vfsp != NULL) { 135 /* 136 * Sync a specific filesystem. 137 */ 138 zfsvfs_t *zfsvfs = vfsp->vfs_data; 139 dsl_pool_t *dp; 140 int error; 141 142 error = vfs_stdsync(vfsp, waitfor); 143 if (error != 0) 144 return (error); 145 146 ZFS_ENTER(zfsvfs); 147 dp = dmu_objset_pool(zfsvfs->z_os); 148 149 /* 150 * If the system is shutting down, then skip any 151 * filesystems which may exist on a suspended pool. 152 */ 153 if (sys_shutdown && spa_suspended(dp->dp_spa)) { 154 ZFS_EXIT(zfsvfs); 155 return (0); 156 } 157 158 if (zfsvfs->z_log != NULL) 159 zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 160 else 161 txg_wait_synced(dp, 0); 162 ZFS_EXIT(zfsvfs); 163 } else { 164 /* 165 * Sync all ZFS filesystems. This is what happens when you 166 * run sync(1M). Unlike other filesystems, ZFS honors the 167 * request by waiting for all pools to commit all dirty data. 168 */ 169 spa_sync_allpools(); 170 } 171 172 return (0); 173} 174 175static void 176atime_changed_cb(void *arg, uint64_t newval) 177{ 178 zfsvfs_t *zfsvfs = arg; 179 180 if (newval == TRUE) { 181 zfsvfs->z_atime = TRUE; 182 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 183 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 184 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 185 } else { 186 zfsvfs->z_atime = FALSE; 187 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 188 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 189 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 190 } 191} 192 193static void 194xattr_changed_cb(void *arg, uint64_t newval) 195{ 196 zfsvfs_t *zfsvfs = arg; 197 198 if (newval == TRUE) { 199 /* XXX locking on vfs_flag? */ 200#ifdef TODO 201 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 202#endif 203 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 204 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 205 } else { 206 /* XXX locking on vfs_flag? */ 207#ifdef TODO 208 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 209#endif 210 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 211 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 212 } 213} 214 215static void 216blksz_changed_cb(void *arg, uint64_t newval) 217{ 218 zfsvfs_t *zfsvfs = arg; 219 220 if (newval < SPA_MINBLOCKSIZE || 221 newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 222 newval = SPA_MAXBLOCKSIZE; 223 224 zfsvfs->z_max_blksz = newval; 225 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 226} 227 228static void 229readonly_changed_cb(void *arg, uint64_t newval) 230{ 231 zfsvfs_t *zfsvfs = arg; 232 233 if (newval) { 234 /* XXX locking on vfs_flag? */ 235 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 236 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 237 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 238 } else { 239 /* XXX locking on vfs_flag? */ 240 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 241 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 242 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 243 } 244} 245 246static void 247setuid_changed_cb(void *arg, uint64_t newval) 248{ 249 zfsvfs_t *zfsvfs = arg; 250 251 if (newval == FALSE) { 252 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 253 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 254 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 255 } else { 256 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 257 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 258 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 259 } 260} 261 262static void 263exec_changed_cb(void *arg, uint64_t newval) 264{ 265 zfsvfs_t *zfsvfs = arg; 266 267 if (newval == FALSE) { 268 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 269 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 270 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 271 } else { 272 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 273 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 274 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 275 } 276} 277 278/* 279 * The nbmand mount option can be changed at mount time. 280 * We can't allow it to be toggled on live file systems or incorrect 281 * behavior may be seen from cifs clients 282 * 283 * This property isn't registered via dsl_prop_register(), but this callback 284 * will be called when a file system is first mounted 285 */ 286static void 287nbmand_changed_cb(void *arg, uint64_t newval) 288{ 289 zfsvfs_t *zfsvfs = arg; 290 if (newval == FALSE) { 291 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 292 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 293 } else { 294 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 295 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 296 } 297} 298 299static void 300snapdir_changed_cb(void *arg, uint64_t newval) 301{ 302 zfsvfs_t *zfsvfs = arg; 303 304 zfsvfs->z_show_ctldir = newval; 305} 306 307static void 308vscan_changed_cb(void *arg, uint64_t newval) 309{ 310 zfsvfs_t *zfsvfs = arg; 311 312 zfsvfs->z_vscan = newval; 313} 314 315static void 316acl_mode_changed_cb(void *arg, uint64_t newval) 317{ 318 zfsvfs_t *zfsvfs = arg; 319 320 zfsvfs->z_acl_mode = newval; 321} 322 323static void 324acl_inherit_changed_cb(void *arg, uint64_t newval) 325{ 326 zfsvfs_t *zfsvfs = arg; 327 328 zfsvfs->z_acl_inherit = newval; 329} 330 331static int 332zfs_register_callbacks(vfs_t *vfsp) 333{ 334 struct dsl_dataset *ds = NULL; 335 objset_t *os = NULL; 336 zfsvfs_t *zfsvfs = NULL; 337 uint64_t nbmand; 338 int readonly, do_readonly = FALSE; 339 int setuid, do_setuid = FALSE; 340 int exec, do_exec = FALSE; 341 int xattr, do_xattr = FALSE; 342 int atime, do_atime = FALSE; 343 int error = 0; 344 345 ASSERT(vfsp); 346 zfsvfs = vfsp->vfs_data; 347 ASSERT(zfsvfs); 348 os = zfsvfs->z_os; 349 350 /* 351 * This function can be called for a snapshot when we update snapshot's 352 * mount point, which isn't really supported. 353 */ 354 if (dmu_objset_is_snapshot(os)) 355 return (EOPNOTSUPP); 356 357 /* 358 * The act of registering our callbacks will destroy any mount 359 * options we may have. In order to enable temporary overrides 360 * of mount options, we stash away the current values and 361 * restore them after we register the callbacks. 362 */ 363 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 364 readonly = B_TRUE; 365 do_readonly = B_TRUE; 366 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 367 readonly = B_FALSE; 368 do_readonly = B_TRUE; 369 } 370 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 371 setuid = B_FALSE; 372 do_setuid = B_TRUE; 373 } else { 374 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 375 setuid = B_FALSE; 376 do_setuid = B_TRUE; 377 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 378 setuid = B_TRUE; 379 do_setuid = B_TRUE; 380 } 381 } 382 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 383 exec = B_FALSE; 384 do_exec = B_TRUE; 385 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 386 exec = B_TRUE; 387 do_exec = B_TRUE; 388 } 389 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 390 xattr = B_FALSE; 391 do_xattr = B_TRUE; 392 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 393 xattr = B_TRUE; 394 do_xattr = B_TRUE; 395 } 396 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 397 atime = B_FALSE; 398 do_atime = B_TRUE; 399 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 400 atime = B_TRUE; 401 do_atime = B_TRUE; 402 } 403 404 /* 405 * nbmand is a special property. It can only be changed at 406 * mount time. 407 * 408 * This is weird, but it is documented to only be changeable 409 * at mount time. 410 */ 411 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 412 nbmand = B_FALSE; 413 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 414 nbmand = B_TRUE; 415 } else { 416 char osname[MAXNAMELEN]; 417 418 dmu_objset_name(os, osname); 419 if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 420 NULL)) { 421 return (error); 422 } 423 } 424 425 /* 426 * Register property callbacks. 427 * 428 * It would probably be fine to just check for i/o error from 429 * the first prop_register(), but I guess I like to go 430 * overboard... 431 */ 432 ds = dmu_objset_ds(os); 433 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 434 error = error ? error : dsl_prop_register(ds, 435 "xattr", xattr_changed_cb, zfsvfs); 436 error = error ? error : dsl_prop_register(ds, 437 "recordsize", blksz_changed_cb, zfsvfs); 438 error = error ? error : dsl_prop_register(ds, 439 "readonly", readonly_changed_cb, zfsvfs); 440 error = error ? error : dsl_prop_register(ds, 441 "setuid", setuid_changed_cb, zfsvfs); 442 error = error ? error : dsl_prop_register(ds, 443 "exec", exec_changed_cb, zfsvfs); 444 error = error ? error : dsl_prop_register(ds, 445 "snapdir", snapdir_changed_cb, zfsvfs); 446 error = error ? error : dsl_prop_register(ds, 447 "aclmode", acl_mode_changed_cb, zfsvfs); 448 error = error ? error : dsl_prop_register(ds, 449 "aclinherit", acl_inherit_changed_cb, zfsvfs); 450 error = error ? error : dsl_prop_register(ds, 451 "vscan", vscan_changed_cb, zfsvfs); 452 if (error) 453 goto unregister; 454 455 /* 456 * Invoke our callbacks to restore temporary mount options. 457 */ 458 if (do_readonly) 459 readonly_changed_cb(zfsvfs, readonly); 460 if (do_setuid) 461 setuid_changed_cb(zfsvfs, setuid); 462 if (do_exec) 463 exec_changed_cb(zfsvfs, exec); 464 if (do_xattr) 465 xattr_changed_cb(zfsvfs, xattr); 466 if (do_atime) 467 atime_changed_cb(zfsvfs, atime); 468 469 nbmand_changed_cb(zfsvfs, nbmand); 470 471 return (0); 472 473unregister: 474 /* 475 * We may attempt to unregister some callbacks that are not 476 * registered, but this is OK; it will simply return ENOMSG, 477 * which we will ignore. 478 */ 479 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 480 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 481 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 482 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 483 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 484 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 485 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 486 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 487 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 488 zfsvfs); 489 (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 490 return (error); 491 492} 493 494static void 495uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid, 496 int64_t delta, dmu_tx_t *tx) 497{ 498 uint64_t used = 0; 499 char buf[32]; 500 int err; 501 uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 502 503 if (delta == 0) 504 return; 505 506 (void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid); 507 err = zap_lookup(os, obj, buf, 8, 1, &used); 508 ASSERT(err == 0 || err == ENOENT); 509 /* no underflow/overflow */ 510 ASSERT(delta > 0 || used >= -delta); 511 ASSERT(delta < 0 || used + delta > used); 512 used += delta; 513 if (used == 0) 514 err = zap_remove(os, obj, buf, tx); 515 else 516 err = zap_update(os, obj, buf, 8, 1, &used, tx); 517 ASSERT(err == 0); 518} 519 520static void 521zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype, 522 void *oldbonus, void *newbonus, 523 uint64_t oldused, uint64_t newused, dmu_tx_t *tx) 524{ 525 znode_phys_t *oldznp = oldbonus; 526 znode_phys_t *newznp = newbonus; 527 528 if (bonustype != DMU_OT_ZNODE) 529 return; 530 531 /* We charge 512 for the dnode (if it's allocated). */ 532 if (oldznp->zp_gen != 0) 533 oldused += DNODE_SIZE; 534 if (newznp->zp_gen != 0) 535 newused += DNODE_SIZE; 536 537 if (oldznp->zp_uid == newznp->zp_uid) { 538 uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx); 539 } else { 540 uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx); 541 uidacct(os, B_FALSE, newznp->zp_uid, newused, tx); 542 } 543 544 if (oldznp->zp_gid == newznp->zp_gid) { 545 uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx); 546 } else { 547 uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx); 548 uidacct(os, B_TRUE, newznp->zp_gid, newused, tx); 549 } 550} 551 552static void 553fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, 554 char *domainbuf, int buflen, uid_t *ridp) 555{ 556 uint64_t fuid; 557 const char *domain; 558 559 fuid = strtonum(fuidstr, NULL); 560 561 domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid)); 562 if (domain) 563 (void) strlcpy(domainbuf, domain, buflen); 564 else 565 domainbuf[0] = '\0'; 566 *ridp = FUID_RID(fuid); 567} 568 569static uint64_t 570zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type) 571{ 572 switch (type) { 573 case ZFS_PROP_USERUSED: 574 return (DMU_USERUSED_OBJECT); 575 case ZFS_PROP_GROUPUSED: 576 return (DMU_GROUPUSED_OBJECT); 577 case ZFS_PROP_USERQUOTA: 578 return (zfsvfs->z_userquota_obj); 579 case ZFS_PROP_GROUPQUOTA: 580 return (zfsvfs->z_groupquota_obj); 581 } 582 return (0); 583} 584 585int 586zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 587 uint64_t *cookiep, void *vbuf, uint64_t *bufsizep) 588{ 589 int error; 590 zap_cursor_t zc; 591 zap_attribute_t za; 592 zfs_useracct_t *buf = vbuf; 593 uint64_t obj; 594 595 if (!dmu_objset_userspace_present(zfsvfs->z_os)) 596 return (ENOTSUP); 597 598 obj = zfs_userquota_prop_to_obj(zfsvfs, type); 599 if (obj == 0) { 600 *bufsizep = 0; 601 return (0); 602 } 603 604 for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep); 605 (error = zap_cursor_retrieve(&zc, &za)) == 0; 606 zap_cursor_advance(&zc)) { 607 if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) > 608 *bufsizep) 609 break; 610 611 fuidstr_to_sid(zfsvfs, za.za_name, 612 buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid); 613 614 buf->zu_space = za.za_first_integer; 615 buf++; 616 } 617 if (error == ENOENT) 618 error = 0; 619 620 ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep); 621 *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf; 622 *cookiep = zap_cursor_serialize(&zc); 623 zap_cursor_fini(&zc); 624 return (error); 625} 626 627/* 628 * buf must be big enough (eg, 32 bytes) 629 */ 630static int 631id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, 632 char *buf, boolean_t addok) 633{ 634 uint64_t fuid; 635 int domainid = 0; 636 637 if (domain && domain[0]) { 638 domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok); 639 if (domainid == -1) 640 return (ENOENT); 641 } 642 fuid = FUID_ENCODE(domainid, rid); 643 (void) sprintf(buf, "%llx", (longlong_t)fuid); 644 return (0); 645} 646 647int 648zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 649 const char *domain, uint64_t rid, uint64_t *valp) 650{ 651 char buf[32]; 652 int err; 653 uint64_t obj; 654 655 *valp = 0; 656 657 if (!dmu_objset_userspace_present(zfsvfs->z_os)) 658 return (ENOTSUP); 659 660 obj = zfs_userquota_prop_to_obj(zfsvfs, type); 661 if (obj == 0) 662 return (0); 663 664 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE); 665 if (err) 666 return (err); 667 668 err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp); 669 if (err == ENOENT) 670 err = 0; 671 return (err); 672} 673 674int 675zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 676 const char *domain, uint64_t rid, uint64_t quota) 677{ 678 char buf[32]; 679 int err; 680 dmu_tx_t *tx; 681 uint64_t *objp; 682 boolean_t fuid_dirtied; 683 684 if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) 685 return (EINVAL); 686 687 if (zfsvfs->z_version < ZPL_VERSION_USERSPACE) 688 return (ENOTSUP); 689 690 objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj : 691 &zfsvfs->z_groupquota_obj; 692 693 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE); 694 if (err) 695 return (err); 696 fuid_dirtied = zfsvfs->z_fuid_dirty; 697 698 tx = dmu_tx_create(zfsvfs->z_os); 699 dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL); 700 if (*objp == 0) { 701 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 702 zfs_userquota_prop_prefixes[type]); 703 } 704 if (fuid_dirtied) 705 zfs_fuid_txhold(zfsvfs, tx); 706 err = dmu_tx_assign(tx, TXG_WAIT); 707 if (err) { 708 dmu_tx_abort(tx); 709 return (err); 710 } 711 712 mutex_enter(&zfsvfs->z_lock); 713 if (*objp == 0) { 714 *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA, 715 DMU_OT_NONE, 0, tx); 716 VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 717 zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); 718 } 719 mutex_exit(&zfsvfs->z_lock); 720 721 if (quota == 0) { 722 err = zap_remove(zfsvfs->z_os, *objp, buf, tx); 723 if (err == ENOENT) 724 err = 0; 725 } else { 726 err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx); 727 } 728 ASSERT(err == 0); 729 if (fuid_dirtied) 730 zfs_fuid_sync(zfsvfs, tx); 731 dmu_tx_commit(tx); 732 return (err); 733} 734 735boolean_t 736zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) 737{ 738 char buf[32]; 739 uint64_t used, quota, usedobj, quotaobj; 740 int err; 741 742 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 743 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 744 745 if (quotaobj == 0 || zfsvfs->z_replay) 746 return (B_FALSE); 747 748 (void) sprintf(buf, "%llx", (longlong_t)fuid); 749 err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a); 750 if (err != 0) 751 return (B_FALSE); 752 753 err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used); 754 if (err != 0) 755 return (B_FALSE); 756 return (used >= quota); 757} 758 759int 760zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp) 761{ 762 objset_t *os; 763 zfsvfs_t *zfsvfs; 764 uint64_t zval; 765 int i, error; 766 767 if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL)) 768 return (error); 769 if (zval) 770 mode |= DS_MODE_READONLY; 771 772 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os); 773 if (error == EROFS) { 774 mode |= DS_MODE_READONLY; 775 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os); 776 } 777 if (error) 778 return (error); 779 780 /* 781 * Initialize the zfs-specific filesystem structure. 782 * Should probably make this a kmem cache, shuffle fields, 783 * and just bzero up to z_hold_mtx[]. 784 */ 785 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 786 zfsvfs->z_vfs = NULL; 787 zfsvfs->z_parent = zfsvfs; 788 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 789 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 790 zfsvfs->z_os = os; 791 792 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 793 if (error) { 794 goto out; 795 } else if (zfsvfs->z_version > ZPL_VERSION) { 796 (void) printf("Mismatched versions: File system " 797 "is version %llu on-disk format, which is " 798 "incompatible with this software version %lld!", 799 (u_longlong_t)zfsvfs->z_version, ZPL_VERSION); 800 error = ENOTSUP; 801 goto out; 802 } 803 804 if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) 805 goto out; 806 zfsvfs->z_norm = (int)zval; 807 808 if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0) 809 goto out; 810 zfsvfs->z_utf8 = (zval != 0); 811 812 if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0) 813 goto out; 814 zfsvfs->z_case = (uint_t)zval; 815 816 /* 817 * Fold case on file systems that are always or sometimes case 818 * insensitive. 819 */ 820 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 821 zfsvfs->z_case == ZFS_CASE_MIXED) 822 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 823 824 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 825 826 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 827 &zfsvfs->z_root); 828 if (error) 829 goto out; 830 ASSERT(zfsvfs->z_root != 0); 831 832 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 833 &zfsvfs->z_unlinkedobj); 834 if (error) 835 goto out; 836 837 error = zap_lookup(os, MASTER_NODE_OBJ, 838 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 839 8, 1, &zfsvfs->z_userquota_obj); 840 if (error && error != ENOENT) 841 goto out; 842 843 error = zap_lookup(os, MASTER_NODE_OBJ, 844 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 845 8, 1, &zfsvfs->z_groupquota_obj); 846 if (error && error != ENOENT) 847 goto out; 848 849 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 850 &zfsvfs->z_fuid_obj); 851 if (error && error != ENOENT) 852 goto out; 853 854 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 855 &zfsvfs->z_shares_dir); 856 if (error && error != ENOENT) 857 goto out; 858 859 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 860 mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL); 861 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 862 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 863 offsetof(znode_t, z_link_node)); 864 rrw_init(&zfsvfs->z_teardown_lock); 865 rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 866 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 867 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 868 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 869 870 *zvp = zfsvfs; 871 return (0); 872 873out: 874 dmu_objset_close(os); 875 *zvp = NULL; 876 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 877 return (error); 878} 879 880static int 881zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 882{ 883 int error; 884 885 error = zfs_register_callbacks(zfsvfs->z_vfs); 886 if (error) 887 return (error); 888 889 /* 890 * Set the objset user_ptr to track its zfsvfs. 891 */ 892 mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 893 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 894 mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 895 896 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 897 if (zil_disable) { 898 zil_destroy(zfsvfs->z_log, B_FALSE); 899 zfsvfs->z_log = NULL; 900 } 901 902 /* 903 * If we are not mounting (ie: online recv), then we don't 904 * have to worry about replaying the log as we blocked all 905 * operations out since we closed the ZIL. 906 */ 907 if (mounting) { 908 boolean_t readonly; 909 910 /* 911 * During replay we remove the read only flag to 912 * allow replays to succeed. 913 */ 914 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 915 if (readonly != 0) 916 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 917 else 918 zfs_unlinked_drain(zfsvfs); 919 920 if (zfsvfs->z_log) { 921 /* 922 * Parse and replay the intent log. 923 * 924 * Because of ziltest, this must be done after 925 * zfs_unlinked_drain(). (Further note: ziltest 926 * doesn't use readonly mounts, where 927 * zfs_unlinked_drain() isn't called.) This is because 928 * ziltest causes spa_sync() to think it's committed, 929 * but actually it is not, so the intent log contains 930 * many txg's worth of changes. 931 * 932 * In particular, if object N is in the unlinked set in 933 * the last txg to actually sync, then it could be 934 * actually freed in a later txg and then reallocated 935 * in a yet later txg. This would write a "create 936 * object N" record to the intent log. Normally, this 937 * would be fine because the spa_sync() would have 938 * written out the fact that object N is free, before 939 * we could write the "create object N" intent log 940 * record. 941 * 942 * But when we are in ziltest mode, we advance the "open 943 * txg" without actually spa_sync()-ing the changes to 944 * disk. So we would see that object N is still 945 * allocated and in the unlinked set, and there is an 946 * intent log record saying to allocate it. 947 */ 948 zfsvfs->z_replay = B_TRUE; 949 zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector); 950 zfsvfs->z_replay = B_FALSE; 951 } 952 zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 953 } 954 955 return (0); 956} 957 958extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */ 959 960void 961zfsvfs_free(zfsvfs_t *zfsvfs) 962{ 963 int i; 964 965 /* 966 * This is a barrier to prevent the filesystem from going away in 967 * zfs_znode_move() until we can safely ensure that the filesystem is 968 * not unmounted. We consider the filesystem valid before the barrier 969 * and invalid after the barrier. 970 */ 971 rw_enter(&zfsvfs_lock, RW_READER); 972 rw_exit(&zfsvfs_lock); 973 974 zfs_fuid_destroy(zfsvfs); 975 976 mutex_destroy(&zfsvfs->z_znodes_lock); 977 mutex_destroy(&zfsvfs->z_online_recv_lock); 978 mutex_destroy(&zfsvfs->z_lock); 979 list_destroy(&zfsvfs->z_all_znodes); 980 rrw_destroy(&zfsvfs->z_teardown_lock); 981 rw_destroy(&zfsvfs->z_teardown_inactive_lock); 982 rw_destroy(&zfsvfs->z_fuid_lock); 983 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 984 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 985 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 986} 987 988static void 989zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 990{ 991 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 992 if (zfsvfs->z_use_fuids && zfsvfs->z_vfs) { 993 vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 994 vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 995 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 996 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 997 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 998 } 999} 1000 1001static int 1002zfs_domount(vfs_t *vfsp, char *osname) 1003{ 1004 uint64_t recordsize, fsid_guid; 1005 int error = 0; 1006 zfsvfs_t *zfsvfs; 1007 vnode_t *vp; 1008 1009 ASSERT(vfsp); 1010 ASSERT(osname); 1011 1012 error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs); 1013 if (error) 1014 return (error); 1015 zfsvfs->z_vfs = vfsp; 1016 1017 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 1018 NULL)) 1019 goto out; 1020 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1021 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1022 1023 vfsp->vfs_data = zfsvfs; 1024 vfsp->mnt_flag |= MNT_LOCAL; 1025 vfsp->mnt_kern_flag |= MNTK_MPSAFE; 1026 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1027 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1028 1029 1030 /* 1031 * The fsid is 64 bits, composed of an 8-bit fs type, which 1032 * separates our fsid from any other filesystem types, and a 1033 * 56-bit objset unique ID. The objset unique ID is unique to 1034 * all objsets open on this system, provided by unique_create(). 1035 * The 8-bit fs type must be put in the low bits of fsid[1] 1036 * because that's where other Solaris filesystems put it. 1037 */ 1038 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1039 ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); 1040 vfsp->vfs_fsid.val[0] = fsid_guid; 1041 vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | 1042 vfsp->mnt_vfc->vfc_typenum & 0xFF; 1043 1044 /* 1045 * Set features for file system. 1046 */ 1047 zfs_set_fuid_feature(zfsvfs); 1048 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 1049 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1050 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1051 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 1052 } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 1053 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1054 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1055 } 1056 1057 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1058 uint64_t pval; 1059 1060 atime_changed_cb(zfsvfs, B_FALSE); 1061 readonly_changed_cb(zfsvfs, B_TRUE); 1062 if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 1063 goto out; 1064 xattr_changed_cb(zfsvfs, pval); 1065 zfsvfs->z_issnap = B_TRUE; 1066 1067 mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 1068 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1069 mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 1070 } else { 1071 error = zfsvfs_setup(zfsvfs, B_TRUE); 1072 } 1073 1074 vfs_mountedfrom(vfsp, osname); 1075 /* Grab extra reference. */ 1076 VERIFY(VFS_ROOT(vfsp, LK_EXCLUSIVE, &vp) == 0); 1077 VOP_UNLOCK(vp, 0); 1078 1079 if (!zfsvfs->z_issnap) 1080 zfsctl_create(zfsvfs); 1081out: 1082 if (error) { 1083 dmu_objset_close(zfsvfs->z_os); 1084 zfsvfs_free(zfsvfs); 1085 } else { 1086 atomic_add_32(&zfs_active_fs_count, 1); 1087 } 1088 1089 return (error); 1090} 1091 1092void 1093zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1094{ 1095 objset_t *os = zfsvfs->z_os; 1096 struct dsl_dataset *ds; 1097 1098 /* 1099 * Unregister properties. 1100 */ 1101 if (!dmu_objset_is_snapshot(os)) { 1102 ds = dmu_objset_ds(os); 1103 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 1104 zfsvfs) == 0); 1105 1106 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 1107 zfsvfs) == 0); 1108 1109 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 1110 zfsvfs) == 0); 1111 1112 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 1113 zfsvfs) == 0); 1114 1115 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 1116 zfsvfs) == 0); 1117 1118 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 1119 zfsvfs) == 0); 1120 1121 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 1122 zfsvfs) == 0); 1123 1124 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 1125 zfsvfs) == 0); 1126 1127 VERIFY(dsl_prop_unregister(ds, "aclinherit", 1128 acl_inherit_changed_cb, zfsvfs) == 0); 1129 1130 VERIFY(dsl_prop_unregister(ds, "vscan", 1131 vscan_changed_cb, zfsvfs) == 0); 1132 } 1133} 1134 1135/*ARGSUSED*/ 1136static int 1137zfs_mount(vfs_t *vfsp) 1138{ 1139 kthread_t *td = curthread; 1140 vnode_t *mvp = vfsp->mnt_vnodecovered; 1141 cred_t *cr = td->td_ucred; 1142 char *osname; 1143 int error = 0; 1144 int canwrite; 1145 1146 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1147 return (EINVAL); 1148 1149 /* 1150 * If full-owner-access is enabled and delegated administration is 1151 * turned on, we must set nosuid. 1152 */ 1153 if (zfs_super_owner && 1154 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1155 secpolicy_fs_mount_clearopts(cr, vfsp); 1156 } 1157 1158 /* 1159 * Check for mount privilege? 1160 * 1161 * If we don't have privilege then see if 1162 * we have local permission to allow it 1163 */ 1164 error = secpolicy_fs_mount(cr, mvp, vfsp); 1165 if (error) { 1166 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 1167 goto out; 1168 1169 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1170 vattr_t vattr; 1171 1172 /* 1173 * Make sure user is the owner of the mount point 1174 * or has sufficient privileges. 1175 */ 1176 1177 vattr.va_mask = AT_UID; 1178 1179 vn_lock(mvp, LK_SHARED | LK_RETRY); 1180 if (VOP_GETATTR(mvp, &vattr, cr)) { 1181 VOP_UNLOCK(mvp, 0); 1182 goto out; 1183 } 1184 1185 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1186 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1187 VOP_UNLOCK(mvp, 0); 1188 goto out; 1189 } 1190 VOP_UNLOCK(mvp, 0); 1191 } 1192 1193 secpolicy_fs_mount_clearopts(cr, vfsp); 1194 } 1195 1196 /* 1197 * Refuse to mount a filesystem if we are in a local zone and the 1198 * dataset is not visible. 1199 */ 1200 if (!INGLOBALZONE(curthread) && 1201 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1202 error = EPERM; 1203 goto out; 1204 } 1205 1206 /* 1207 * When doing a remount, we simply refresh our temporary properties 1208 * according to those options set in the current VFS options. 1209 */ 1210 if (vfsp->vfs_flag & MS_REMOUNT) { 1211 /* refresh mount options */ 1212 zfs_unregister_callbacks(vfsp->vfs_data); 1213 error = zfs_register_callbacks(vfsp); 1214 goto out; 1215 } 1216 1217 DROP_GIANT(); 1218 error = zfs_domount(vfsp, osname); 1219 PICKUP_GIANT(); 1220 1221 /* 1222 * Add an extra VFS_HOLD on our parent vfs so that it can't 1223 * disappear due to a forced unmount. 1224 */ 1225 if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) 1226 VFS_HOLD(mvp->v_vfsp); 1227 1228out: 1229 return (error); 1230} 1231 1232static int 1233zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1234{ 1235 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1236 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1237 1238 statp->f_version = STATFS_VERSION; 1239 1240 ZFS_ENTER(zfsvfs); 1241 1242 dmu_objset_space(zfsvfs->z_os, 1243 &refdbytes, &availbytes, &usedobjs, &availobjs); 1244 1245 /* 1246 * The underlying storage pool actually uses multiple block sizes. 1247 * We report the fragsize as the smallest block size we support, 1248 * and we report our blocksize as the filesystem's maximum blocksize. 1249 */ 1250 statp->f_bsize = SPA_MINBLOCKSIZE; 1251 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1252 1253 /* 1254 * The following report "total" blocks of various kinds in the 1255 * file system, but reported in terms of f_frsize - the 1256 * "fragment" size. 1257 */ 1258 1259 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1260 statp->f_bfree = availbytes / statp->f_bsize; 1261 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1262 1263 /* 1264 * statvfs() should really be called statufs(), because it assumes 1265 * static metadata. ZFS doesn't preallocate files, so the best 1266 * we can do is report the max that could possibly fit in f_files, 1267 * and that minus the number actually used in f_ffree. 1268 * For f_ffree, report the smaller of the number of object available 1269 * and the number of blocks (each object will take at least a block). 1270 */ 1271 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1272 statp->f_files = statp->f_ffree + usedobjs; 1273 1274 /* 1275 * We're a zfs filesystem. 1276 */ 1277 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 1278 1279 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1280 sizeof(statp->f_mntfromname)); 1281 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1282 sizeof(statp->f_mntonname)); 1283 1284 statp->f_namemax = ZFS_MAXNAMELEN; 1285 1286 ZFS_EXIT(zfsvfs); 1287 return (0); 1288} 1289 1290static int 1291zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1292{ 1293 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1294 znode_t *rootzp; 1295 int error; 1296 1297 ZFS_ENTER_NOERROR(zfsvfs); 1298 1299 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1300 1301 ZFS_EXIT(zfsvfs); 1302 1303 if (error == 0) { 1304 *vpp = ZTOV(rootzp); 1305 error = vn_lock(*vpp, flags); 1306 (*vpp)->v_vflag |= VV_ROOT; 1307 } 1308 1309 return (error); 1310} 1311 1312/* 1313 * Teardown the zfsvfs::z_os. 1314 * 1315 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 1316 * and 'z_teardown_inactive_lock' held. 1317 */ 1318static int 1319zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1320{ 1321 znode_t *zp; 1322 1323 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1324 1325 if (!unmounting) { 1326 /* 1327 * We purge the parent filesystem's vfsp as the parent 1328 * filesystem and all of its snapshots have their vnode's 1329 * v_vfsp set to the parent's filesystem's vfsp. Note, 1330 * 'z_parent' is self referential for non-snapshots. 1331 */ 1332 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1333#ifdef FREEBSD_NAMECACHE 1334 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1335#endif 1336 } 1337 1338 /* 1339 * Close the zil. NB: Can't close the zil while zfs_inactive 1340 * threads are blocked as zil_close can call zfs_inactive. 1341 */ 1342 if (zfsvfs->z_log) { 1343 zil_close(zfsvfs->z_log); 1344 zfsvfs->z_log = NULL; 1345 } 1346 1347 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 1348 1349 /* 1350 * If we are not unmounting (ie: online recv) and someone already 1351 * unmounted this file system while we were doing the switcheroo, 1352 * or a reopen of z_os failed then just bail out now. 1353 */ 1354 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1355 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1356 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1357 return (EIO); 1358 } 1359 1360 /* 1361 * At this point there are no vops active, and any new vops will 1362 * fail with EIO since we have z_teardown_lock for writer (only 1363 * relavent for forced unmount). 1364 * 1365 * Release all holds on dbufs. 1366 */ 1367 mutex_enter(&zfsvfs->z_znodes_lock); 1368 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1369 zp = list_next(&zfsvfs->z_all_znodes, zp)) 1370 if (zp->z_dbuf) { 1371 ASSERT(ZTOV(zp)->v_count >= 0); 1372 zfs_znode_dmu_fini(zp); 1373 } 1374 mutex_exit(&zfsvfs->z_znodes_lock); 1375 1376 /* 1377 * If we are unmounting, set the unmounted flag and let new vops 1378 * unblock. zfs_inactive will have the unmounted behavior, and all 1379 * other vops will fail with EIO. 1380 */ 1381 if (unmounting) { 1382 zfsvfs->z_unmounted = B_TRUE; 1383 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1384 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1385 1386#ifdef __FreeBSD__ 1387 /* 1388 * Some znodes might not be fully reclaimed, wait for them. 1389 */ 1390 mutex_enter(&zfsvfs->z_znodes_lock); 1391 while (list_head(&zfsvfs->z_all_znodes) != NULL) { 1392 msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0, 1393 "zteardown", 0); 1394 } 1395 mutex_exit(&zfsvfs->z_znodes_lock); 1396#endif 1397 } 1398 1399 /* 1400 * z_os will be NULL if there was an error in attempting to reopen 1401 * zfsvfs, so just return as the properties had already been 1402 * unregistered and cached data had been evicted before. 1403 */ 1404 if (zfsvfs->z_os == NULL) 1405 return (0); 1406 1407 /* 1408 * Unregister properties. 1409 */ 1410 zfs_unregister_callbacks(zfsvfs); 1411 1412 /* 1413 * Evict cached data 1414 */ 1415 if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { 1416 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1417 (void) dmu_objset_evict_dbufs(zfsvfs->z_os); 1418 } 1419 1420 return (0); 1421} 1422 1423/*ARGSUSED*/ 1424static int 1425zfs_umount(vfs_t *vfsp, int fflag) 1426{ 1427 kthread_t *td = curthread; 1428 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1429 objset_t *os; 1430 cred_t *cr = td->td_ucred; 1431 int ret; 1432 1433 ret = secpolicy_fs_unmount(cr, vfsp); 1434 if (ret) { 1435 if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1436 ZFS_DELEG_PERM_MOUNT, cr)) 1437 return (ret); 1438 } 1439 /* 1440 * We purge the parent filesystem's vfsp as the parent filesystem 1441 * and all of its snapshots have their vnode's v_vfsp set to the 1442 * parent's filesystem's vfsp. Note, 'z_parent' is self 1443 * referential for non-snapshots. 1444 */ 1445 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1446 1447 /* 1448 * Unmount any snapshots mounted under .zfs before unmounting the 1449 * dataset itself. 1450 */ 1451 if (zfsvfs->z_ctldir != NULL) { 1452 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1453 return (ret); 1454 ret = vflush(vfsp, 0, 0, td); 1455 ASSERT(ret == EBUSY); 1456 if (!(fflag & MS_FORCE)) { 1457 if (zfsvfs->z_ctldir->v_count > 1) 1458 return (EBUSY); 1459 ASSERT(zfsvfs->z_ctldir->v_count == 1); 1460 } 1461 zfsctl_destroy(zfsvfs); 1462 ASSERT(zfsvfs->z_ctldir == NULL); 1463 } 1464 1465 if (fflag & MS_FORCE) { 1466 /* 1467 * Mark file system as unmounted before calling 1468 * vflush(FORCECLOSE). This way we ensure no future vnops 1469 * will be called and risk operating on DOOMED vnodes. 1470 */ 1471 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1472 zfsvfs->z_unmounted = B_TRUE; 1473 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1474 } 1475 1476 /* 1477 * Flush all the files. 1478 */ 1479 ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1480 if (ret != 0) { 1481 if (!zfsvfs->z_issnap) { 1482 zfsctl_create(zfsvfs); 1483 ASSERT(zfsvfs->z_ctldir != NULL); 1484 } 1485 return (ret); 1486 } 1487 1488 if (!(fflag & MS_FORCE)) { 1489 /* 1490 * Check the number of active vnodes in the file system. 1491 * Our count is maintained in the vfs structure, but the 1492 * number is off by 1 to indicate a hold on the vfs 1493 * structure itself. 1494 * 1495 * The '.zfs' directory maintains a reference of its 1496 * own, and any active references underneath are 1497 * reflected in the vnode count. 1498 */ 1499 if (zfsvfs->z_ctldir == NULL) { 1500 if (vfsp->vfs_count > 1) 1501 return (EBUSY); 1502 } else { 1503 if (vfsp->vfs_count > 2 || 1504 zfsvfs->z_ctldir->v_count > 1) 1505 return (EBUSY); 1506 } 1507 } else { 1508 MNT_ILOCK(vfsp); 1509 vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 1510 MNT_IUNLOCK(vfsp); 1511 } 1512 1513 VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1514 os = zfsvfs->z_os; 1515 1516 /* 1517 * z_os will be NULL if there was an error in 1518 * attempting to reopen zfsvfs. 1519 */ 1520 if (os != NULL) { 1521 /* 1522 * Unset the objset user_ptr. 1523 */ 1524 mutex_enter(&os->os->os_user_ptr_lock); 1525 dmu_objset_set_user(os, NULL); 1526 mutex_exit(&os->os->os_user_ptr_lock); 1527 1528 /* 1529 * Finally release the objset 1530 */ 1531 dmu_objset_close(os); 1532 } 1533 1534 /* 1535 * We can now safely destroy the '.zfs' directory node. 1536 */ 1537 if (zfsvfs->z_ctldir != NULL) 1538 zfsctl_destroy(zfsvfs); 1539 if (zfsvfs->z_issnap) { 1540 vnode_t *svp = vfsp->mnt_vnodecovered; 1541 1542 if (svp->v_count >= 2) 1543 VN_RELE(svp); 1544 } 1545 zfs_freevfs(vfsp); 1546 1547 return (0); 1548} 1549 1550static int 1551zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1552{ 1553 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1554 znode_t *zp; 1555 int err; 1556 1557 /* 1558 * zfs_zget() can't operate on virtual entires like .zfs/ or 1559 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 1560 * This will make NFS to switch to LOOKUP instead of using VGET. 1561 */ 1562 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR) 1563 return (EOPNOTSUPP); 1564 1565 ZFS_ENTER(zfsvfs); 1566 err = zfs_zget(zfsvfs, ino, &zp); 1567 if (err == 0 && zp->z_unlinked) { 1568 VN_RELE(ZTOV(zp)); 1569 err = EINVAL; 1570 } 1571 ZFS_EXIT(zfsvfs); 1572 if (err != 0) 1573 *vpp = NULL; 1574 else { 1575 *vpp = ZTOV(zp); 1576 vn_lock(*vpp, flags); 1577 } 1578 return (err); 1579} 1580 1581static int 1582zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1583 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1584{ 1585 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1586 1587 /* 1588 * If this is regular file system vfsp is the same as 1589 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1590 * zfsvfs->z_parent->z_vfs represents parent file system 1591 * which we have to use here, because only this file system 1592 * has mnt_export configured. 1593 */ 1594 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1595 credanonp, numsecflavors, secflavors)); 1596} 1597 1598CTASSERT(SHORT_FID_LEN <= sizeof(struct fid)); 1599CTASSERT(LONG_FID_LEN <= sizeof(struct fid)); 1600 1601static int 1602zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 1603{ 1604 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1605 znode_t *zp; 1606 uint64_t object = 0; 1607 uint64_t fid_gen = 0; 1608 uint64_t gen_mask; 1609 uint64_t zp_gen; 1610 int i, err; 1611 1612 *vpp = NULL; 1613 1614 ZFS_ENTER(zfsvfs); 1615 1616 /* 1617 * On FreeBSD we can get snapshot's mount point or its parent file 1618 * system mount point depending if snapshot is already mounted or not. 1619 */ 1620 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1621 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1622 uint64_t objsetid = 0; 1623 uint64_t setgen = 0; 1624 1625 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1626 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1627 1628 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1629 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1630 1631 ZFS_EXIT(zfsvfs); 1632 1633 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1634 if (err) 1635 return (EINVAL); 1636 ZFS_ENTER(zfsvfs); 1637 } 1638 1639 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1640 zfid_short_t *zfid = (zfid_short_t *)fidp; 1641 1642 for (i = 0; i < sizeof (zfid->zf_object); i++) 1643 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1644 1645 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1646 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1647 } else { 1648 ZFS_EXIT(zfsvfs); 1649 return (EINVAL); 1650 } 1651 1652 /* A zero fid_gen means we are in the .zfs control directories */ 1653 if (fid_gen == 0 && 1654 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1655 *vpp = zfsvfs->z_ctldir; 1656 ASSERT(*vpp != NULL); 1657 if (object == ZFSCTL_INO_SNAPDIR) { 1658 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1659 0, NULL, NULL, NULL, NULL, NULL) == 0); 1660 } else { 1661 VN_HOLD(*vpp); 1662 } 1663 ZFS_EXIT(zfsvfs); 1664 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1665 return (0); 1666 } 1667 1668 gen_mask = -1ULL >> (64 - 8 * i); 1669 1670 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1671 if (err = zfs_zget(zfsvfs, object, &zp)) { 1672 ZFS_EXIT(zfsvfs); 1673 return (err); 1674 } 1675 zp_gen = zp->z_phys->zp_gen & gen_mask; 1676 if (zp_gen == 0) 1677 zp_gen = 1; 1678 if (zp->z_unlinked || zp_gen != fid_gen) { 1679 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1680 VN_RELE(ZTOV(zp)); 1681 ZFS_EXIT(zfsvfs); 1682 return (EINVAL); 1683 } 1684 1685 ZFS_EXIT(zfsvfs); 1686 1687 *vpp = ZTOV(zp); 1688 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1689 vnode_create_vobject(*vpp, zp->z_phys->zp_size, curthread); 1690 return (0); 1691} 1692 1693/* 1694 * Block out VOPs and close zfsvfs_t::z_os 1695 * 1696 * Note, if successful, then we return with the 'z_teardown_lock' and 1697 * 'z_teardown_inactive_lock' write held. 1698 */ 1699int 1700zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep) 1701{ 1702 int error; 1703 1704 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1705 return (error); 1706 1707 *modep = zfsvfs->z_os->os_mode; 1708 if (name) 1709 dmu_objset_name(zfsvfs->z_os, name); 1710 dmu_objset_close(zfsvfs->z_os); 1711 1712 return (0); 1713} 1714 1715/* 1716 * Reopen zfsvfs_t::z_os and release VOPs. 1717 */ 1718int 1719zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) 1720{ 1721 int err; 1722 1723 ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 1724 ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 1725 1726 err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 1727 if (err) { 1728 zfsvfs->z_os = NULL; 1729 } else { 1730 znode_t *zp; 1731 1732 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 1733 1734 /* 1735 * Attempt to re-establish all the active znodes with 1736 * their dbufs. If a zfs_rezget() fails, then we'll let 1737 * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1738 * when they try to use their znode. 1739 */ 1740 mutex_enter(&zfsvfs->z_znodes_lock); 1741 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1742 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1743 (void) zfs_rezget(zp); 1744 } 1745 mutex_exit(&zfsvfs->z_znodes_lock); 1746 1747 } 1748 1749 /* release the VOPs */ 1750 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1751 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1752 1753 if (err) { 1754 /* 1755 * Since we couldn't reopen zfsvfs::z_os, force 1756 * unmount this file system. 1757 */ 1758 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 1759 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 1760 } 1761 return (err); 1762} 1763 1764static void 1765zfs_freevfs(vfs_t *vfsp) 1766{ 1767 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1768 1769 /* 1770 * If this is a snapshot, we have an extra VFS_HOLD on our parent 1771 * from zfs_mount(). Release it here. 1772 */ 1773 if (zfsvfs->z_issnap) 1774 VFS_RELE(zfsvfs->z_parent->z_vfs); 1775 1776 zfsvfs_free(zfsvfs); 1777 1778 atomic_add_32(&zfs_active_fs_count, -1); 1779} 1780 1781#ifdef __i386__ 1782static int desiredvnodes_backup; 1783#endif 1784 1785static void 1786zfs_vnodes_adjust(void) 1787{ 1788#ifdef __i386__ 1789 int newdesiredvnodes; 1790 1791 desiredvnodes_backup = desiredvnodes; 1792 1793 /* 1794 * We calculate newdesiredvnodes the same way it is done in 1795 * vntblinit(). If it is equal to desiredvnodes, it means that 1796 * it wasn't tuned by the administrator and we can tune it down. 1797 */ 1798 newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * 1799 vm_kmem_size / (5 * (sizeof(struct vm_object) + 1800 sizeof(struct vnode)))); 1801 if (newdesiredvnodes == desiredvnodes) 1802 desiredvnodes = (3 * newdesiredvnodes) / 4; 1803#endif 1804} 1805 1806static void 1807zfs_vnodes_adjust_back(void) 1808{ 1809 1810#ifdef __i386__ 1811 desiredvnodes = desiredvnodes_backup; 1812#endif 1813} 1814 1815void 1816zfs_init(void) 1817{ 1818 1819 printf("ZFS filesystem version " ZPL_VERSION_STRING "\n"); 1820 1821 /* 1822 * Initialize znode cache, vnode ops, etc... 1823 */ 1824 zfs_znode_init(); 1825 1826 /* 1827 * Initialize .zfs directory structures 1828 */ 1829 zfsctl_init(); 1830 1831 /* 1832 * Reduce number of vnode. Originally number of vnodes is calculated 1833 * with UFS inode in mind. We reduce it here, because it's too big for 1834 * ZFS/i386. 1835 */ 1836 zfs_vnodes_adjust(); 1837 1838 dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); 1839} 1840 1841void 1842zfs_fini(void) 1843{ 1844 zfsctl_fini(); 1845 zfs_znode_fini(); 1846 zfs_vnodes_adjust_back(); 1847} 1848 1849int 1850zfs_busy(void) 1851{ 1852 return (zfs_active_fs_count != 0); 1853} 1854 1855int 1856zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 1857{ 1858 int error; 1859 objset_t *os = zfsvfs->z_os; 1860 dmu_tx_t *tx; 1861 1862 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 1863 return (EINVAL); 1864 1865 if (newvers < zfsvfs->z_version) 1866 return (EINVAL); 1867 1868 tx = dmu_tx_create(os); 1869 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 1870 error = dmu_tx_assign(tx, TXG_WAIT); 1871 if (error) { 1872 dmu_tx_abort(tx); 1873 return (error); 1874 } 1875 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1876 8, 1, &newvers, tx); 1877 1878 if (error) { 1879 dmu_tx_commit(tx); 1880 return (error); 1881 } 1882 1883 spa_history_internal_log(LOG_DS_UPGRADE, 1884 dmu_objset_spa(os), tx, CRED(), 1885 "oldver=%llu newver=%llu dataset = %llu", 1886 zfsvfs->z_version, newvers, dmu_objset_id(os)); 1887 1888 dmu_tx_commit(tx); 1889 1890 zfsvfs->z_version = newvers; 1891 1892 if (zfsvfs->z_version >= ZPL_VERSION_FUID) 1893 zfs_set_fuid_feature(zfsvfs); 1894 1895 return (0); 1896} 1897/* 1898 * Read a property stored within the master node. 1899 */ 1900int 1901zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 1902{ 1903 const char *pname; 1904 int error = ENOENT; 1905 1906 /* 1907 * Look up the file system's value for the property. For the 1908 * version property, we look up a slightly different string. 1909 */ 1910 if (prop == ZFS_PROP_VERSION) 1911 pname = ZPL_VERSION_STR; 1912 else 1913 pname = zfs_prop_to_name(prop); 1914 1915 if (os != NULL) 1916 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 1917 1918 if (error == ENOENT) { 1919 /* No value set, use the default value */ 1920 switch (prop) { 1921 case ZFS_PROP_VERSION: 1922 *value = ZPL_VERSION; 1923 break; 1924 case ZFS_PROP_NORMALIZE: 1925 case ZFS_PROP_UTF8ONLY: 1926 *value = 0; 1927 break; 1928 case ZFS_PROP_CASE: 1929 *value = ZFS_CASE_SENSITIVE; 1930 break; 1931 default: 1932 return (error); 1933 } 1934 error = 0; 1935 } 1936 return (error); 1937} 1938