zfs_vfsops.c revision 211932
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#include <sys/types.h> 27#include <sys/param.h> 28#include <sys/systm.h> 29#include <sys/kernel.h> 30#include <sys/sysmacros.h> 31#include <sys/kmem.h> 32#include <sys/acl.h> 33#include <sys/vnode.h> 34#include <sys/vfs.h> 35#include <sys/mntent.h> 36#include <sys/mount.h> 37#include <sys/cmn_err.h> 38#include <sys/zfs_znode.h> 39#include <sys/zfs_dir.h> 40#include <sys/zil.h> 41#include <sys/fs/zfs.h> 42#include <sys/dmu.h> 43#include <sys/dsl_prop.h> 44#include <sys/dsl_dataset.h> 45#include <sys/dsl_deleg.h> 46#include <sys/spa.h> 47#include <sys/zap.h> 48#include <sys/varargs.h> 49#include <sys/policy.h> 50#include <sys/atomic.h> 51#include <sys/zfs_ioctl.h> 52#include <sys/zfs_ctldir.h> 53#include <sys/zfs_fuid.h> 54#include <sys/sunddi.h> 55#include <sys/dnlc.h> 56#include <sys/dmu_objset.h> 57#include <sys/spa_boot.h> 58 59struct mtx zfs_debug_mtx; 60MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 61 62SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 63 64int zfs_super_owner = 0; 65SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 66 "File system owner can perform privileged operation on his file systems"); 67 68int zfs_debug_level = 0; 69TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 70SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 71 "Debug level"); 72 73SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 74static int zfs_version_acl = ZFS_ACL_VERSION; 75SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 76 "ZFS_ACL_VERSION"); 77static int zfs_version_dmu_backup_header = DMU_BACKUP_HEADER_VERSION; 78SYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_header, CTLFLAG_RD, 79 &zfs_version_dmu_backup_header, 0, "DMU_BACKUP_HEADER_VERSION"); 80static int zfs_version_dmu_backup_stream = DMU_BACKUP_STREAM_VERSION; 81SYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_stream, CTLFLAG_RD, 82 &zfs_version_dmu_backup_stream, 0, "DMU_BACKUP_STREAM_VERSION"); 83static int zfs_version_spa = SPA_VERSION; 84SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 85 "SPA_VERSION"); 86static int zfs_version_zpl = ZPL_VERSION; 87SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 88 "ZPL_VERSION"); 89 90static int zfs_mount(vfs_t *vfsp); 91static int zfs_umount(vfs_t *vfsp, int fflag); 92static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 93static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 94static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 95static int zfs_sync(vfs_t *vfsp, int waitfor); 96static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 97 struct ucred **credanonp, int *numsecflavors, int **secflavors); 98static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 99static void zfs_objset_close(zfsvfs_t *zfsvfs); 100static void zfs_freevfs(vfs_t *vfsp); 101 102static struct vfsops zfs_vfsops = { 103 .vfs_mount = zfs_mount, 104 .vfs_unmount = zfs_umount, 105 .vfs_root = zfs_root, 106 .vfs_statfs = zfs_statfs, 107 .vfs_vget = zfs_vget, 108 .vfs_sync = zfs_sync, 109 .vfs_checkexp = zfs_checkexp, 110 .vfs_fhtovp = zfs_fhtovp, 111}; 112 113VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 114 115/* 116 * We need to keep a count of active fs's. 117 * This is necessary to prevent our module 118 * from being unloaded after a umount -f 119 */ 120static uint32_t zfs_active_fs_count = 0; 121 122/*ARGSUSED*/ 123static int 124zfs_sync(vfs_t *vfsp, int waitfor) 125{ 126 127 /* 128 * Data integrity is job one. We don't want a compromised kernel 129 * writing to the storage pool, so we never sync during panic. 130 */ 131 if (panicstr) 132 return (0); 133 134 if (vfsp != NULL) { 135 /* 136 * Sync a specific filesystem. 137 */ 138 zfsvfs_t *zfsvfs = vfsp->vfs_data; 139 dsl_pool_t *dp; 140 int error; 141 142 error = vfs_stdsync(vfsp, waitfor); 143 if (error != 0) 144 return (error); 145 146 ZFS_ENTER(zfsvfs); 147 dp = dmu_objset_pool(zfsvfs->z_os); 148 149 /* 150 * If the system is shutting down, then skip any 151 * filesystems which may exist on a suspended pool. 152 */ 153 if (sys_shutdown && spa_suspended(dp->dp_spa)) { 154 ZFS_EXIT(zfsvfs); 155 return (0); 156 } 157 158 if (zfsvfs->z_log != NULL) 159 zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 160 else 161 txg_wait_synced(dp, 0); 162 ZFS_EXIT(zfsvfs); 163 } else { 164 /* 165 * Sync all ZFS filesystems. This is what happens when you 166 * run sync(1M). Unlike other filesystems, ZFS honors the 167 * request by waiting for all pools to commit all dirty data. 168 */ 169 spa_sync_allpools(); 170 } 171 172 return (0); 173} 174 175static void 176atime_changed_cb(void *arg, uint64_t newval) 177{ 178 zfsvfs_t *zfsvfs = arg; 179 180 if (newval == TRUE) { 181 zfsvfs->z_atime = TRUE; 182 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 183 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 184 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 185 } else { 186 zfsvfs->z_atime = FALSE; 187 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 188 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 189 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 190 } 191} 192 193static void 194xattr_changed_cb(void *arg, uint64_t newval) 195{ 196 zfsvfs_t *zfsvfs = arg; 197 198 if (newval == TRUE) { 199 /* XXX locking on vfs_flag? */ 200#ifdef TODO 201 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 202#endif 203 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 204 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 205 } else { 206 /* XXX locking on vfs_flag? */ 207#ifdef TODO 208 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 209#endif 210 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 211 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 212 } 213} 214 215static void 216blksz_changed_cb(void *arg, uint64_t newval) 217{ 218 zfsvfs_t *zfsvfs = arg; 219 220 if (newval < SPA_MINBLOCKSIZE || 221 newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 222 newval = SPA_MAXBLOCKSIZE; 223 224 zfsvfs->z_max_blksz = newval; 225 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 226} 227 228static void 229readonly_changed_cb(void *arg, uint64_t newval) 230{ 231 zfsvfs_t *zfsvfs = arg; 232 233 if (newval) { 234 /* XXX locking on vfs_flag? */ 235 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 236 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 237 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 238 } else { 239 /* XXX locking on vfs_flag? */ 240 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 241 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 242 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 243 } 244} 245 246static void 247setuid_changed_cb(void *arg, uint64_t newval) 248{ 249 zfsvfs_t *zfsvfs = arg; 250 251 if (newval == FALSE) { 252 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 253 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 254 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 255 } else { 256 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 257 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 258 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 259 } 260} 261 262static void 263exec_changed_cb(void *arg, uint64_t newval) 264{ 265 zfsvfs_t *zfsvfs = arg; 266 267 if (newval == FALSE) { 268 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 269 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 270 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 271 } else { 272 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 273 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 274 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 275 } 276} 277 278/* 279 * The nbmand mount option can be changed at mount time. 280 * We can't allow it to be toggled on live file systems or incorrect 281 * behavior may be seen from cifs clients 282 * 283 * This property isn't registered via dsl_prop_register(), but this callback 284 * will be called when a file system is first mounted 285 */ 286static void 287nbmand_changed_cb(void *arg, uint64_t newval) 288{ 289 zfsvfs_t *zfsvfs = arg; 290 if (newval == FALSE) { 291 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 292 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 293 } else { 294 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 295 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 296 } 297} 298 299static void 300snapdir_changed_cb(void *arg, uint64_t newval) 301{ 302 zfsvfs_t *zfsvfs = arg; 303 304 zfsvfs->z_show_ctldir = newval; 305} 306 307static void 308vscan_changed_cb(void *arg, uint64_t newval) 309{ 310 zfsvfs_t *zfsvfs = arg; 311 312 zfsvfs->z_vscan = newval; 313} 314 315static void 316acl_mode_changed_cb(void *arg, uint64_t newval) 317{ 318 zfsvfs_t *zfsvfs = arg; 319 320 zfsvfs->z_acl_mode = newval; 321} 322 323static void 324acl_inherit_changed_cb(void *arg, uint64_t newval) 325{ 326 zfsvfs_t *zfsvfs = arg; 327 328 zfsvfs->z_acl_inherit = newval; 329} 330 331static int 332zfs_register_callbacks(vfs_t *vfsp) 333{ 334 struct dsl_dataset *ds = NULL; 335 objset_t *os = NULL; 336 zfsvfs_t *zfsvfs = NULL; 337 uint64_t nbmand; 338 int readonly, do_readonly = FALSE; 339 int setuid, do_setuid = FALSE; 340 int exec, do_exec = FALSE; 341 int xattr, do_xattr = FALSE; 342 int atime, do_atime = FALSE; 343 int error = 0; 344 345 ASSERT(vfsp); 346 zfsvfs = vfsp->vfs_data; 347 ASSERT(zfsvfs); 348 os = zfsvfs->z_os; 349 350 /* 351 * This function can be called for a snapshot when we update snapshot's 352 * mount point, which isn't really supported. 353 */ 354 if (dmu_objset_is_snapshot(os)) 355 return (EOPNOTSUPP); 356 357 /* 358 * The act of registering our callbacks will destroy any mount 359 * options we may have. In order to enable temporary overrides 360 * of mount options, we stash away the current values and 361 * restore them after we register the callbacks. 362 */ 363 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 364 readonly = B_TRUE; 365 do_readonly = B_TRUE; 366 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 367 readonly = B_FALSE; 368 do_readonly = B_TRUE; 369 } 370 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 371 setuid = B_FALSE; 372 do_setuid = B_TRUE; 373 } else { 374 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 375 setuid = B_FALSE; 376 do_setuid = B_TRUE; 377 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 378 setuid = B_TRUE; 379 do_setuid = B_TRUE; 380 } 381 } 382 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 383 exec = B_FALSE; 384 do_exec = B_TRUE; 385 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 386 exec = B_TRUE; 387 do_exec = B_TRUE; 388 } 389 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 390 xattr = B_FALSE; 391 do_xattr = B_TRUE; 392 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 393 xattr = B_TRUE; 394 do_xattr = B_TRUE; 395 } 396 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 397 atime = B_FALSE; 398 do_atime = B_TRUE; 399 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 400 atime = B_TRUE; 401 do_atime = B_TRUE; 402 } 403 404 /* 405 * nbmand is a special property. It can only be changed at 406 * mount time. 407 * 408 * This is weird, but it is documented to only be changeable 409 * at mount time. 410 */ 411 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 412 nbmand = B_FALSE; 413 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 414 nbmand = B_TRUE; 415 } else { 416 char osname[MAXNAMELEN]; 417 418 dmu_objset_name(os, osname); 419 if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 420 NULL)) { 421 return (error); 422 } 423 } 424 425 /* 426 * Register property callbacks. 427 * 428 * It would probably be fine to just check for i/o error from 429 * the first prop_register(), but I guess I like to go 430 * overboard... 431 */ 432 ds = dmu_objset_ds(os); 433 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 434 error = error ? error : dsl_prop_register(ds, 435 "xattr", xattr_changed_cb, zfsvfs); 436 error = error ? error : dsl_prop_register(ds, 437 "recordsize", blksz_changed_cb, zfsvfs); 438 error = error ? error : dsl_prop_register(ds, 439 "readonly", readonly_changed_cb, zfsvfs); 440 error = error ? error : dsl_prop_register(ds, 441 "setuid", setuid_changed_cb, zfsvfs); 442 error = error ? error : dsl_prop_register(ds, 443 "exec", exec_changed_cb, zfsvfs); 444 error = error ? error : dsl_prop_register(ds, 445 "snapdir", snapdir_changed_cb, zfsvfs); 446 error = error ? error : dsl_prop_register(ds, 447 "aclmode", acl_mode_changed_cb, zfsvfs); 448 error = error ? error : dsl_prop_register(ds, 449 "aclinherit", acl_inherit_changed_cb, zfsvfs); 450 error = error ? error : dsl_prop_register(ds, 451 "vscan", vscan_changed_cb, zfsvfs); 452 if (error) 453 goto unregister; 454 455 /* 456 * Invoke our callbacks to restore temporary mount options. 457 */ 458 if (do_readonly) 459 readonly_changed_cb(zfsvfs, readonly); 460 if (do_setuid) 461 setuid_changed_cb(zfsvfs, setuid); 462 if (do_exec) 463 exec_changed_cb(zfsvfs, exec); 464 if (do_xattr) 465 xattr_changed_cb(zfsvfs, xattr); 466 if (do_atime) 467 atime_changed_cb(zfsvfs, atime); 468 469 nbmand_changed_cb(zfsvfs, nbmand); 470 471 return (0); 472 473unregister: 474 /* 475 * We may attempt to unregister some callbacks that are not 476 * registered, but this is OK; it will simply return ENOMSG, 477 * which we will ignore. 478 */ 479 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 480 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 481 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 482 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 483 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 484 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 485 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 486 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 487 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 488 zfsvfs); 489 (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 490 return (error); 491 492} 493 494static void 495uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid, 496 int64_t delta, dmu_tx_t *tx) 497{ 498 uint64_t used = 0; 499 char buf[32]; 500 int err; 501 uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 502 503 if (delta == 0) 504 return; 505 506 (void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid); 507 err = zap_lookup(os, obj, buf, 8, 1, &used); 508 ASSERT(err == 0 || err == ENOENT); 509 /* no underflow/overflow */ 510 ASSERT(delta > 0 || used >= -delta); 511 ASSERT(delta < 0 || used + delta > used); 512 used += delta; 513 if (used == 0) 514 err = zap_remove(os, obj, buf, tx); 515 else 516 err = zap_update(os, obj, buf, 8, 1, &used, tx); 517 ASSERT(err == 0); 518} 519 520static void 521zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype, 522 void *oldbonus, void *newbonus, 523 uint64_t oldused, uint64_t newused, dmu_tx_t *tx) 524{ 525 znode_phys_t *oldznp = oldbonus; 526 znode_phys_t *newznp = newbonus; 527 528 if (bonustype != DMU_OT_ZNODE) 529 return; 530 531 /* We charge 512 for the dnode (if it's allocated). */ 532 if (oldznp->zp_gen != 0) 533 oldused += DNODE_SIZE; 534 if (newznp->zp_gen != 0) 535 newused += DNODE_SIZE; 536 537 if (oldznp->zp_uid == newznp->zp_uid) { 538 uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx); 539 } else { 540 uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx); 541 uidacct(os, B_FALSE, newznp->zp_uid, newused, tx); 542 } 543 544 if (oldznp->zp_gid == newznp->zp_gid) { 545 uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx); 546 } else { 547 uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx); 548 uidacct(os, B_TRUE, newznp->zp_gid, newused, tx); 549 } 550} 551 552static void 553fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, 554 char *domainbuf, int buflen, uid_t *ridp) 555{ 556 uint64_t fuid; 557 const char *domain; 558 559 fuid = strtonum(fuidstr, NULL); 560 561 domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid)); 562 if (domain) 563 (void) strlcpy(domainbuf, domain, buflen); 564 else 565 domainbuf[0] = '\0'; 566 *ridp = FUID_RID(fuid); 567} 568 569static uint64_t 570zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type) 571{ 572 switch (type) { 573 case ZFS_PROP_USERUSED: 574 return (DMU_USERUSED_OBJECT); 575 case ZFS_PROP_GROUPUSED: 576 return (DMU_GROUPUSED_OBJECT); 577 case ZFS_PROP_USERQUOTA: 578 return (zfsvfs->z_userquota_obj); 579 case ZFS_PROP_GROUPQUOTA: 580 return (zfsvfs->z_groupquota_obj); 581 } 582 return (0); 583} 584 585int 586zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 587 uint64_t *cookiep, void *vbuf, uint64_t *bufsizep) 588{ 589 int error; 590 zap_cursor_t zc; 591 zap_attribute_t za; 592 zfs_useracct_t *buf = vbuf; 593 uint64_t obj; 594 595 if (!dmu_objset_userspace_present(zfsvfs->z_os)) 596 return (ENOTSUP); 597 598 obj = zfs_userquota_prop_to_obj(zfsvfs, type); 599 if (obj == 0) { 600 *bufsizep = 0; 601 return (0); 602 } 603 604 for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep); 605 (error = zap_cursor_retrieve(&zc, &za)) == 0; 606 zap_cursor_advance(&zc)) { 607 if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) > 608 *bufsizep) 609 break; 610 611 fuidstr_to_sid(zfsvfs, za.za_name, 612 buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid); 613 614 buf->zu_space = za.za_first_integer; 615 buf++; 616 } 617 if (error == ENOENT) 618 error = 0; 619 620 ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep); 621 *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf; 622 *cookiep = zap_cursor_serialize(&zc); 623 zap_cursor_fini(&zc); 624 return (error); 625} 626 627/* 628 * buf must be big enough (eg, 32 bytes) 629 */ 630static int 631id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, 632 char *buf, boolean_t addok) 633{ 634 uint64_t fuid; 635 int domainid = 0; 636 637 if (domain && domain[0]) { 638 domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok); 639 if (domainid == -1) 640 return (ENOENT); 641 } 642 fuid = FUID_ENCODE(domainid, rid); 643 (void) sprintf(buf, "%llx", (longlong_t)fuid); 644 return (0); 645} 646 647int 648zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 649 const char *domain, uint64_t rid, uint64_t *valp) 650{ 651 char buf[32]; 652 int err; 653 uint64_t obj; 654 655 *valp = 0; 656 657 if (!dmu_objset_userspace_present(zfsvfs->z_os)) 658 return (ENOTSUP); 659 660 obj = zfs_userquota_prop_to_obj(zfsvfs, type); 661 if (obj == 0) 662 return (0); 663 664 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE); 665 if (err) 666 return (err); 667 668 err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp); 669 if (err == ENOENT) 670 err = 0; 671 return (err); 672} 673 674int 675zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 676 const char *domain, uint64_t rid, uint64_t quota) 677{ 678 char buf[32]; 679 int err; 680 dmu_tx_t *tx; 681 uint64_t *objp; 682 boolean_t fuid_dirtied; 683 684 if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) 685 return (EINVAL); 686 687 if (zfsvfs->z_version < ZPL_VERSION_USERSPACE) 688 return (ENOTSUP); 689 690 objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj : 691 &zfsvfs->z_groupquota_obj; 692 693 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE); 694 if (err) 695 return (err); 696 fuid_dirtied = zfsvfs->z_fuid_dirty; 697 698 tx = dmu_tx_create(zfsvfs->z_os); 699 dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL); 700 if (*objp == 0) { 701 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 702 zfs_userquota_prop_prefixes[type]); 703 } 704 if (fuid_dirtied) 705 zfs_fuid_txhold(zfsvfs, tx); 706 err = dmu_tx_assign(tx, TXG_WAIT); 707 if (err) { 708 dmu_tx_abort(tx); 709 return (err); 710 } 711 712 mutex_enter(&zfsvfs->z_lock); 713 if (*objp == 0) { 714 *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA, 715 DMU_OT_NONE, 0, tx); 716 VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 717 zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); 718 } 719 mutex_exit(&zfsvfs->z_lock); 720 721 if (quota == 0) { 722 err = zap_remove(zfsvfs->z_os, *objp, buf, tx); 723 if (err == ENOENT) 724 err = 0; 725 } else { 726 err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx); 727 } 728 ASSERT(err == 0); 729 if (fuid_dirtied) 730 zfs_fuid_sync(zfsvfs, tx); 731 dmu_tx_commit(tx); 732 return (err); 733} 734 735boolean_t 736zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) 737{ 738 char buf[32]; 739 uint64_t used, quota, usedobj, quotaobj; 740 int err; 741 742 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 743 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 744 745 if (quotaobj == 0 || zfsvfs->z_replay) 746 return (B_FALSE); 747 748 (void) sprintf(buf, "%llx", (longlong_t)fuid); 749 err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a); 750 if (err != 0) 751 return (B_FALSE); 752 753 err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used); 754 if (err != 0) 755 return (B_FALSE); 756 return (used >= quota); 757} 758 759int 760zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp) 761{ 762 objset_t *os; 763 zfsvfs_t *zfsvfs; 764 uint64_t zval; 765 int i, error; 766 767 if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL)) 768 return (error); 769 if (zval) 770 mode |= DS_MODE_READONLY; 771 772 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os); 773 if (error == EROFS) { 774 mode |= DS_MODE_READONLY; 775 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os); 776 } 777 if (error) 778 return (error); 779 780 /* 781 * Initialize the zfs-specific filesystem structure. 782 * Should probably make this a kmem cache, shuffle fields, 783 * and just bzero up to z_hold_mtx[]. 784 */ 785 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 786 zfsvfs->z_vfs = NULL; 787 zfsvfs->z_parent = zfsvfs; 788 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 789 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 790 zfsvfs->z_os = os; 791 792 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 793 if (error) { 794 goto out; 795 } else if (zfsvfs->z_version > ZPL_VERSION) { 796 (void) printf("Mismatched versions: File system " 797 "is version %llu on-disk format, which is " 798 "incompatible with this software version %lld!", 799 (u_longlong_t)zfsvfs->z_version, ZPL_VERSION); 800 error = ENOTSUP; 801 goto out; 802 } 803 804 if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) 805 goto out; 806 zfsvfs->z_norm = (int)zval; 807 808 if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0) 809 goto out; 810 zfsvfs->z_utf8 = (zval != 0); 811 812 if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0) 813 goto out; 814 zfsvfs->z_case = (uint_t)zval; 815 816 /* 817 * Fold case on file systems that are always or sometimes case 818 * insensitive. 819 */ 820 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 821 zfsvfs->z_case == ZFS_CASE_MIXED) 822 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 823 824 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 825 826 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 827 &zfsvfs->z_root); 828 if (error) 829 goto out; 830 ASSERT(zfsvfs->z_root != 0); 831 832 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 833 &zfsvfs->z_unlinkedobj); 834 if (error) 835 goto out; 836 837 error = zap_lookup(os, MASTER_NODE_OBJ, 838 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 839 8, 1, &zfsvfs->z_userquota_obj); 840 if (error && error != ENOENT) 841 goto out; 842 843 error = zap_lookup(os, MASTER_NODE_OBJ, 844 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 845 8, 1, &zfsvfs->z_groupquota_obj); 846 if (error && error != ENOENT) 847 goto out; 848 849 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 850 &zfsvfs->z_fuid_obj); 851 if (error && error != ENOENT) 852 goto out; 853 854 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 855 &zfsvfs->z_shares_dir); 856 if (error && error != ENOENT) 857 goto out; 858 859 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 860 mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL); 861 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 862 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 863 offsetof(znode_t, z_link_node)); 864 rrw_init(&zfsvfs->z_teardown_lock); 865 rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 866 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 867 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 868 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 869 870 *zvp = zfsvfs; 871 return (0); 872 873out: 874 dmu_objset_close(os); 875 *zvp = NULL; 876 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 877 return (error); 878} 879 880static int 881zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 882{ 883 int error; 884 885 error = zfs_register_callbacks(zfsvfs->z_vfs); 886 if (error) 887 return (error); 888 889 /* 890 * Set the objset user_ptr to track its zfsvfs. 891 */ 892 mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 893 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 894 mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 895 896 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 897 if (zil_disable) { 898 zil_destroy(zfsvfs->z_log, B_FALSE); 899 zfsvfs->z_log = NULL; 900 } 901 902 /* 903 * If we are not mounting (ie: online recv), then we don't 904 * have to worry about replaying the log as we blocked all 905 * operations out since we closed the ZIL. 906 */ 907 if (mounting) { 908 boolean_t readonly; 909 910 /* 911 * During replay we remove the read only flag to 912 * allow replays to succeed. 913 */ 914 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 915 if (readonly != 0) 916 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 917 else 918 zfs_unlinked_drain(zfsvfs); 919 920 if (zfsvfs->z_log) { 921 /* 922 * Parse and replay the intent log. 923 * 924 * Because of ziltest, this must be done after 925 * zfs_unlinked_drain(). (Further note: ziltest 926 * doesn't use readonly mounts, where 927 * zfs_unlinked_drain() isn't called.) This is because 928 * ziltest causes spa_sync() to think it's committed, 929 * but actually it is not, so the intent log contains 930 * many txg's worth of changes. 931 * 932 * In particular, if object N is in the unlinked set in 933 * the last txg to actually sync, then it could be 934 * actually freed in a later txg and then reallocated 935 * in a yet later txg. This would write a "create 936 * object N" record to the intent log. Normally, this 937 * would be fine because the spa_sync() would have 938 * written out the fact that object N is free, before 939 * we could write the "create object N" intent log 940 * record. 941 * 942 * But when we are in ziltest mode, we advance the "open 943 * txg" without actually spa_sync()-ing the changes to 944 * disk. So we would see that object N is still 945 * allocated and in the unlinked set, and there is an 946 * intent log record saying to allocate it. 947 */ 948 zfsvfs->z_replay = B_TRUE; 949 zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector); 950 zfsvfs->z_replay = B_FALSE; 951 } 952 zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 953 } 954 955 return (0); 956} 957 958extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */ 959 960void 961zfsvfs_free(zfsvfs_t *zfsvfs) 962{ 963 int i; 964 965 /* 966 * This is a barrier to prevent the filesystem from going away in 967 * zfs_znode_move() until we can safely ensure that the filesystem is 968 * not unmounted. We consider the filesystem valid before the barrier 969 * and invalid after the barrier. 970 */ 971 rw_enter(&zfsvfs_lock, RW_READER); 972 rw_exit(&zfsvfs_lock); 973 974 zfs_fuid_destroy(zfsvfs); 975 976 mutex_destroy(&zfsvfs->z_znodes_lock); 977 mutex_destroy(&zfsvfs->z_online_recv_lock); 978 mutex_destroy(&zfsvfs->z_lock); 979 list_destroy(&zfsvfs->z_all_znodes); 980 rrw_destroy(&zfsvfs->z_teardown_lock); 981 rw_destroy(&zfsvfs->z_teardown_inactive_lock); 982 rw_destroy(&zfsvfs->z_fuid_lock); 983 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 984 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 985 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 986} 987 988static void 989zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 990{ 991 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 992 if (zfsvfs->z_use_fuids && zfsvfs->z_vfs) { 993 vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 994 vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 995 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 996 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 997 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 998 } 999} 1000 1001static int 1002zfs_domount(vfs_t *vfsp, char *osname) 1003{ 1004 uint64_t recordsize, fsid_guid; 1005 int error = 0; 1006 zfsvfs_t *zfsvfs; 1007 vnode_t *vp; 1008 1009 ASSERT(vfsp); 1010 ASSERT(osname); 1011 1012 error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs); 1013 if (error) 1014 return (error); 1015 zfsvfs->z_vfs = vfsp; 1016 1017 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 1018 NULL)) 1019 goto out; 1020 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1021 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1022 1023 vfsp->vfs_data = zfsvfs; 1024 vfsp->mnt_flag |= MNT_LOCAL; 1025 vfsp->mnt_kern_flag |= MNTK_MPSAFE; 1026 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1027 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1028 1029 1030 /* 1031 * The fsid is 64 bits, composed of an 8-bit fs type, which 1032 * separates our fsid from any other filesystem types, and a 1033 * 56-bit objset unique ID. The objset unique ID is unique to 1034 * all objsets open on this system, provided by unique_create(). 1035 * The 8-bit fs type must be put in the low bits of fsid[1] 1036 * because that's where other Solaris filesystems put it. 1037 */ 1038 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1039 ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); 1040 vfsp->vfs_fsid.val[0] = fsid_guid; 1041 vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | 1042 vfsp->mnt_vfc->vfc_typenum & 0xFF; 1043 1044 /* 1045 * Set features for file system. 1046 */ 1047 zfs_set_fuid_feature(zfsvfs); 1048 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 1049 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1050 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1051 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 1052 } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 1053 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1054 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1055 } 1056 1057 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1058 uint64_t pval; 1059 1060 atime_changed_cb(zfsvfs, B_FALSE); 1061 readonly_changed_cb(zfsvfs, B_TRUE); 1062 if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 1063 goto out; 1064 xattr_changed_cb(zfsvfs, pval); 1065 zfsvfs->z_issnap = B_TRUE; 1066 1067 mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 1068 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1069 mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 1070 } else { 1071 error = zfsvfs_setup(zfsvfs, B_TRUE); 1072 } 1073 1074 vfs_mountedfrom(vfsp, osname); 1075 /* Grab extra reference. */ 1076 VERIFY(VFS_ROOT(vfsp, LK_EXCLUSIVE, &vp) == 0); 1077 VOP_UNLOCK(vp, 0); 1078 1079 if (!zfsvfs->z_issnap) 1080 zfsctl_create(zfsvfs); 1081out: 1082 if (error) { 1083 dmu_objset_close(zfsvfs->z_os); 1084 zfsvfs_free(zfsvfs); 1085 } else { 1086 atomic_add_32(&zfs_active_fs_count, 1); 1087 } 1088 1089 return (error); 1090} 1091 1092void 1093zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1094{ 1095 objset_t *os = zfsvfs->z_os; 1096 struct dsl_dataset *ds; 1097 1098 /* 1099 * Unregister properties. 1100 */ 1101 if (!dmu_objset_is_snapshot(os)) { 1102 ds = dmu_objset_ds(os); 1103 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 1104 zfsvfs) == 0); 1105 1106 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 1107 zfsvfs) == 0); 1108 1109 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 1110 zfsvfs) == 0); 1111 1112 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 1113 zfsvfs) == 0); 1114 1115 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 1116 zfsvfs) == 0); 1117 1118 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 1119 zfsvfs) == 0); 1120 1121 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 1122 zfsvfs) == 0); 1123 1124 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 1125 zfsvfs) == 0); 1126 1127 VERIFY(dsl_prop_unregister(ds, "aclinherit", 1128 acl_inherit_changed_cb, zfsvfs) == 0); 1129 1130 VERIFY(dsl_prop_unregister(ds, "vscan", 1131 vscan_changed_cb, zfsvfs) == 0); 1132 } 1133} 1134 1135/*ARGSUSED*/ 1136static int 1137zfs_mount(vfs_t *vfsp) 1138{ 1139 kthread_t *td = curthread; 1140 vnode_t *mvp = vfsp->mnt_vnodecovered; 1141 cred_t *cr = td->td_ucred; 1142 char *osname; 1143 int error = 0; 1144 int canwrite; 1145 1146 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1147 return (EINVAL); 1148 1149 /* 1150 * If full-owner-access is enabled and delegated administration is 1151 * turned on, we must set nosuid. 1152 */ 1153 if (zfs_super_owner && 1154 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1155 secpolicy_fs_mount_clearopts(cr, vfsp); 1156 } 1157 1158 /* 1159 * Check for mount privilege? 1160 * 1161 * If we don't have privilege then see if 1162 * we have local permission to allow it 1163 */ 1164 error = secpolicy_fs_mount(cr, mvp, vfsp); 1165 if (error) { 1166 error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 1167 if (error != 0) 1168 goto out; 1169 1170 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1171 vattr_t vattr; 1172 1173 /* 1174 * Make sure user is the owner of the mount point 1175 * or has sufficient privileges. 1176 */ 1177 1178 vattr.va_mask = AT_UID; 1179 1180 vn_lock(mvp, LK_SHARED | LK_RETRY); 1181 if (error = VOP_GETATTR(mvp, &vattr, cr)) { 1182 VOP_UNLOCK(mvp, 0); 1183 goto out; 1184 } 1185 1186 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1187 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1188 VOP_UNLOCK(mvp, 0); 1189 goto out; 1190 } 1191 VOP_UNLOCK(mvp, 0); 1192 } 1193 1194 secpolicy_fs_mount_clearopts(cr, vfsp); 1195 } 1196 1197 /* 1198 * Refuse to mount a filesystem if we are in a local zone and the 1199 * dataset is not visible. 1200 */ 1201 if (!INGLOBALZONE(curthread) && 1202 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1203 error = EPERM; 1204 goto out; 1205 } 1206 1207 /* 1208 * When doing a remount, we simply refresh our temporary properties 1209 * according to those options set in the current VFS options. 1210 */ 1211 if (vfsp->vfs_flag & MS_REMOUNT) { 1212 /* refresh mount options */ 1213 zfs_unregister_callbacks(vfsp->vfs_data); 1214 error = zfs_register_callbacks(vfsp); 1215 goto out; 1216 } 1217 1218 DROP_GIANT(); 1219 error = zfs_domount(vfsp, osname); 1220 PICKUP_GIANT(); 1221 1222 /* 1223 * Add an extra VFS_HOLD on our parent vfs so that it can't 1224 * disappear due to a forced unmount. 1225 */ 1226 if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) 1227 VFS_HOLD(mvp->v_vfsp); 1228 1229 /* 1230 * Add an extra VFS_HOLD on our parent vfs so that it can't 1231 * disappear due to a forced unmount. 1232 */ 1233 if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) 1234 VFS_HOLD(mvp->v_vfsp); 1235 1236out: 1237 return (error); 1238} 1239 1240static int 1241zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1242{ 1243 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1244 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1245 1246 statp->f_version = STATFS_VERSION; 1247 1248 ZFS_ENTER(zfsvfs); 1249 1250 dmu_objset_space(zfsvfs->z_os, 1251 &refdbytes, &availbytes, &usedobjs, &availobjs); 1252 1253 /* 1254 * The underlying storage pool actually uses multiple block sizes. 1255 * We report the fragsize as the smallest block size we support, 1256 * and we report our blocksize as the filesystem's maximum blocksize. 1257 */ 1258 statp->f_bsize = SPA_MINBLOCKSIZE; 1259 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1260 1261 /* 1262 * The following report "total" blocks of various kinds in the 1263 * file system, but reported in terms of f_frsize - the 1264 * "fragment" size. 1265 */ 1266 1267 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1268 statp->f_bfree = availbytes / statp->f_bsize; 1269 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1270 1271 /* 1272 * statvfs() should really be called statufs(), because it assumes 1273 * static metadata. ZFS doesn't preallocate files, so the best 1274 * we can do is report the max that could possibly fit in f_files, 1275 * and that minus the number actually used in f_ffree. 1276 * For f_ffree, report the smaller of the number of object available 1277 * and the number of blocks (each object will take at least a block). 1278 */ 1279 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1280 statp->f_files = statp->f_ffree + usedobjs; 1281 1282 /* 1283 * We're a zfs filesystem. 1284 */ 1285 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 1286 1287 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1288 sizeof(statp->f_mntfromname)); 1289 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1290 sizeof(statp->f_mntonname)); 1291 1292 statp->f_namemax = ZFS_MAXNAMELEN; 1293 1294 ZFS_EXIT(zfsvfs); 1295 return (0); 1296} 1297 1298static int 1299zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1300{ 1301 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1302 znode_t *rootzp; 1303 int error; 1304 1305 ZFS_ENTER_NOERROR(zfsvfs); 1306 1307 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1308 1309 ZFS_EXIT(zfsvfs); 1310 1311 if (error == 0) { 1312 *vpp = ZTOV(rootzp); 1313 error = vn_lock(*vpp, flags); 1314 (*vpp)->v_vflag |= VV_ROOT; 1315 } 1316 1317 return (error); 1318} 1319 1320/* 1321 * Teardown the zfsvfs::z_os. 1322 * 1323 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 1324 * and 'z_teardown_inactive_lock' held. 1325 */ 1326static int 1327zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1328{ 1329 znode_t *zp; 1330 1331 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1332 1333 if (!unmounting) { 1334 /* 1335 * We purge the parent filesystem's vfsp as the parent 1336 * filesystem and all of its snapshots have their vnode's 1337 * v_vfsp set to the parent's filesystem's vfsp. Note, 1338 * 'z_parent' is self referential for non-snapshots. 1339 */ 1340 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1341#ifdef FREEBSD_NAMECACHE 1342 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1343#endif 1344 } 1345 1346 /* 1347 * Close the zil. NB: Can't close the zil while zfs_inactive 1348 * threads are blocked as zil_close can call zfs_inactive. 1349 */ 1350 if (zfsvfs->z_log) { 1351 zil_close(zfsvfs->z_log); 1352 zfsvfs->z_log = NULL; 1353 } 1354 1355 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 1356 1357 /* 1358 * If we are not unmounting (ie: online recv) and someone already 1359 * unmounted this file system while we were doing the switcheroo, 1360 * or a reopen of z_os failed then just bail out now. 1361 */ 1362 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1363 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1364 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1365 return (EIO); 1366 } 1367 1368 /* 1369 * At this point there are no vops active, and any new vops will 1370 * fail with EIO since we have z_teardown_lock for writer (only 1371 * relavent for forced unmount). 1372 * 1373 * Release all holds on dbufs. 1374 */ 1375 mutex_enter(&zfsvfs->z_znodes_lock); 1376 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1377 zp = list_next(&zfsvfs->z_all_znodes, zp)) 1378 if (zp->z_dbuf) { 1379 ASSERT(ZTOV(zp)->v_count >= 0); 1380 zfs_znode_dmu_fini(zp); 1381 } 1382 mutex_exit(&zfsvfs->z_znodes_lock); 1383 1384 /* 1385 * If we are unmounting, set the unmounted flag and let new vops 1386 * unblock. zfs_inactive will have the unmounted behavior, and all 1387 * other vops will fail with EIO. 1388 */ 1389 if (unmounting) { 1390 zfsvfs->z_unmounted = B_TRUE; 1391 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1392 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1393 1394#ifdef __FreeBSD__ 1395 /* 1396 * Some znodes might not be fully reclaimed, wait for them. 1397 */ 1398 mutex_enter(&zfsvfs->z_znodes_lock); 1399 while (list_head(&zfsvfs->z_all_znodes) != NULL) { 1400 msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0, 1401 "zteardown", 0); 1402 } 1403 mutex_exit(&zfsvfs->z_znodes_lock); 1404#endif 1405 } 1406 1407 /* 1408 * z_os will be NULL if there was an error in attempting to reopen 1409 * zfsvfs, so just return as the properties had already been 1410 * unregistered and cached data had been evicted before. 1411 */ 1412 if (zfsvfs->z_os == NULL) 1413 return (0); 1414 1415 /* 1416 * Unregister properties. 1417 */ 1418 zfs_unregister_callbacks(zfsvfs); 1419 1420 /* 1421 * Evict cached data 1422 */ 1423 if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { 1424 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1425 (void) dmu_objset_evict_dbufs(zfsvfs->z_os); 1426 } 1427 1428 return (0); 1429} 1430 1431/*ARGSUSED*/ 1432static int 1433zfs_umount(vfs_t *vfsp, int fflag) 1434{ 1435 kthread_t *td = curthread; 1436 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1437 objset_t *os; 1438 cred_t *cr = td->td_ucred; 1439 int ret; 1440 1441 ret = secpolicy_fs_unmount(cr, vfsp); 1442 if (ret) { 1443 ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1444 ZFS_DELEG_PERM_MOUNT, cr); 1445 if (ret) 1446 return (ret); 1447 } 1448 /* 1449 * We purge the parent filesystem's vfsp as the parent filesystem 1450 * and all of its snapshots have their vnode's v_vfsp set to the 1451 * parent's filesystem's vfsp. Note, 'z_parent' is self 1452 * referential for non-snapshots. 1453 */ 1454 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1455 1456 /* 1457 * Unmount any snapshots mounted under .zfs before unmounting the 1458 * dataset itself. 1459 */ 1460 if (zfsvfs->z_ctldir != NULL) { 1461 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1462 return (ret); 1463 ret = vflush(vfsp, 0, 0, td); 1464 ASSERT(ret == EBUSY); 1465 if (!(fflag & MS_FORCE)) { 1466 if (zfsvfs->z_ctldir->v_count > 1) 1467 return (EBUSY); 1468 ASSERT(zfsvfs->z_ctldir->v_count == 1); 1469 } 1470 zfsctl_destroy(zfsvfs); 1471 ASSERT(zfsvfs->z_ctldir == NULL); 1472 } 1473 1474 if (fflag & MS_FORCE) { 1475 /* 1476 * Mark file system as unmounted before calling 1477 * vflush(FORCECLOSE). This way we ensure no future vnops 1478 * will be called and risk operating on DOOMED vnodes. 1479 */ 1480 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1481 zfsvfs->z_unmounted = B_TRUE; 1482 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1483 } 1484 1485 /* 1486 * Flush all the files. 1487 */ 1488 ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1489 if (ret != 0) { 1490 if (!zfsvfs->z_issnap) { 1491 zfsctl_create(zfsvfs); 1492 ASSERT(zfsvfs->z_ctldir != NULL); 1493 } 1494 return (ret); 1495 } 1496 1497 if (!(fflag & MS_FORCE)) { 1498 /* 1499 * Check the number of active vnodes in the file system. 1500 * Our count is maintained in the vfs structure, but the 1501 * number is off by 1 to indicate a hold on the vfs 1502 * structure itself. 1503 * 1504 * The '.zfs' directory maintains a reference of its 1505 * own, and any active references underneath are 1506 * reflected in the vnode count. 1507 */ 1508 if (zfsvfs->z_ctldir == NULL) { 1509 if (vfsp->vfs_count > 1) 1510 return (EBUSY); 1511 } else { 1512 if (vfsp->vfs_count > 2 || 1513 zfsvfs->z_ctldir->v_count > 1) 1514 return (EBUSY); 1515 } 1516 } else { 1517 MNT_ILOCK(vfsp); 1518 vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 1519 MNT_IUNLOCK(vfsp); 1520 } 1521 1522 VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1523 os = zfsvfs->z_os; 1524 1525 /* 1526 * z_os will be NULL if there was an error in 1527 * attempting to reopen zfsvfs. 1528 */ 1529 if (os != NULL) { 1530 /* 1531 * Unset the objset user_ptr. 1532 */ 1533 mutex_enter(&os->os->os_user_ptr_lock); 1534 dmu_objset_set_user(os, NULL); 1535 mutex_exit(&os->os->os_user_ptr_lock); 1536 1537 /* 1538 * Finally release the objset 1539 */ 1540 dmu_objset_close(os); 1541 } 1542 1543 /* 1544 * We can now safely destroy the '.zfs' directory node. 1545 */ 1546 if (zfsvfs->z_ctldir != NULL) 1547 zfsctl_destroy(zfsvfs); 1548 if (zfsvfs->z_issnap) { 1549 vnode_t *svp = vfsp->mnt_vnodecovered; 1550 1551 if (svp->v_count >= 2) 1552 VN_RELE(svp); 1553 } 1554 zfs_freevfs(vfsp); 1555 1556 return (0); 1557} 1558 1559static int 1560zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1561{ 1562 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1563 znode_t *zp; 1564 int err; 1565 1566 /* 1567 * zfs_zget() can't operate on virtual entires like .zfs/ or 1568 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 1569 * This will make NFS to switch to LOOKUP instead of using VGET. 1570 */ 1571 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR) 1572 return (EOPNOTSUPP); 1573 1574 ZFS_ENTER(zfsvfs); 1575 err = zfs_zget(zfsvfs, ino, &zp); 1576 if (err == 0 && zp->z_unlinked) { 1577 VN_RELE(ZTOV(zp)); 1578 err = EINVAL; 1579 } 1580 ZFS_EXIT(zfsvfs); 1581 if (err != 0) 1582 *vpp = NULL; 1583 else { 1584 *vpp = ZTOV(zp); 1585 vn_lock(*vpp, flags); 1586 } 1587 return (err); 1588} 1589 1590static int 1591zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1592 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1593{ 1594 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1595 1596 /* 1597 * If this is regular file system vfsp is the same as 1598 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1599 * zfsvfs->z_parent->z_vfs represents parent file system 1600 * which we have to use here, because only this file system 1601 * has mnt_export configured. 1602 */ 1603 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1604 credanonp, numsecflavors, secflavors)); 1605} 1606 1607CTASSERT(SHORT_FID_LEN <= sizeof(struct fid)); 1608CTASSERT(LONG_FID_LEN <= sizeof(struct fid)); 1609 1610static int 1611zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 1612{ 1613 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1614 znode_t *zp; 1615 uint64_t object = 0; 1616 uint64_t fid_gen = 0; 1617 uint64_t gen_mask; 1618 uint64_t zp_gen; 1619 int i, err; 1620 1621 *vpp = NULL; 1622 1623 ZFS_ENTER(zfsvfs); 1624 1625 /* 1626 * On FreeBSD we can get snapshot's mount point or its parent file 1627 * system mount point depending if snapshot is already mounted or not. 1628 */ 1629 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1630 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1631 uint64_t objsetid = 0; 1632 uint64_t setgen = 0; 1633 1634 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1635 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1636 1637 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1638 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1639 1640 ZFS_EXIT(zfsvfs); 1641 1642 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1643 if (err) 1644 return (EINVAL); 1645 ZFS_ENTER(zfsvfs); 1646 } 1647 1648 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1649 zfid_short_t *zfid = (zfid_short_t *)fidp; 1650 1651 for (i = 0; i < sizeof (zfid->zf_object); i++) 1652 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1653 1654 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1655 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1656 } else { 1657 ZFS_EXIT(zfsvfs); 1658 return (EINVAL); 1659 } 1660 1661 /* A zero fid_gen means we are in the .zfs control directories */ 1662 if (fid_gen == 0 && 1663 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1664 *vpp = zfsvfs->z_ctldir; 1665 ASSERT(*vpp != NULL); 1666 if (object == ZFSCTL_INO_SNAPDIR) { 1667 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1668 0, NULL, NULL, NULL, NULL, NULL) == 0); 1669 } else { 1670 VN_HOLD(*vpp); 1671 } 1672 ZFS_EXIT(zfsvfs); 1673 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1674 return (0); 1675 } 1676 1677 gen_mask = -1ULL >> (64 - 8 * i); 1678 1679 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1680 if (err = zfs_zget(zfsvfs, object, &zp)) { 1681 ZFS_EXIT(zfsvfs); 1682 return (err); 1683 } 1684 zp_gen = zp->z_phys->zp_gen & gen_mask; 1685 if (zp_gen == 0) 1686 zp_gen = 1; 1687 if (zp->z_unlinked || zp_gen != fid_gen) { 1688 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1689 VN_RELE(ZTOV(zp)); 1690 ZFS_EXIT(zfsvfs); 1691 return (EINVAL); 1692 } 1693 1694 ZFS_EXIT(zfsvfs); 1695 1696 *vpp = ZTOV(zp); 1697 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1698 vnode_create_vobject(*vpp, zp->z_phys->zp_size, curthread); 1699 return (0); 1700} 1701 1702/* 1703 * Block out VOPs and close zfsvfs_t::z_os 1704 * 1705 * Note, if successful, then we return with the 'z_teardown_lock' and 1706 * 'z_teardown_inactive_lock' write held. 1707 */ 1708int 1709zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep) 1710{ 1711 int error; 1712 1713 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1714 return (error); 1715 1716 *modep = zfsvfs->z_os->os_mode; 1717 if (name) 1718 dmu_objset_name(zfsvfs->z_os, name); 1719 dmu_objset_close(zfsvfs->z_os); 1720 1721 return (0); 1722} 1723 1724/* 1725 * Reopen zfsvfs_t::z_os and release VOPs. 1726 */ 1727int 1728zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) 1729{ 1730 int err; 1731 1732 ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 1733 ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 1734 1735 err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 1736 if (err) { 1737 zfsvfs->z_os = NULL; 1738 } else { 1739 znode_t *zp; 1740 1741 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 1742 1743 /* 1744 * Attempt to re-establish all the active znodes with 1745 * their dbufs. If a zfs_rezget() fails, then we'll let 1746 * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1747 * when they try to use their znode. 1748 */ 1749 mutex_enter(&zfsvfs->z_znodes_lock); 1750 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1751 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1752 (void) zfs_rezget(zp); 1753 } 1754 mutex_exit(&zfsvfs->z_znodes_lock); 1755 1756 } 1757 1758 /* release the VOPs */ 1759 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1760 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1761 1762 if (err) { 1763 /* 1764 * Since we couldn't reopen zfsvfs::z_os, force 1765 * unmount this file system. 1766 */ 1767 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 1768 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 1769 } 1770 return (err); 1771} 1772 1773static void 1774zfs_freevfs(vfs_t *vfsp) 1775{ 1776 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1777 1778 /* 1779 * If this is a snapshot, we have an extra VFS_HOLD on our parent 1780 * from zfs_mount(). Release it here. 1781 */ 1782 if (zfsvfs->z_issnap) 1783 VFS_RELE(zfsvfs->z_parent->z_vfs); 1784 1785 zfsvfs_free(zfsvfs); 1786 1787 atomic_add_32(&zfs_active_fs_count, -1); 1788} 1789 1790#ifdef __i386__ 1791static int desiredvnodes_backup; 1792#endif 1793 1794static void 1795zfs_vnodes_adjust(void) 1796{ 1797#ifdef __i386__ 1798 int newdesiredvnodes; 1799 1800 desiredvnodes_backup = desiredvnodes; 1801 1802 /* 1803 * We calculate newdesiredvnodes the same way it is done in 1804 * vntblinit(). If it is equal to desiredvnodes, it means that 1805 * it wasn't tuned by the administrator and we can tune it down. 1806 */ 1807 newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * 1808 vm_kmem_size / (5 * (sizeof(struct vm_object) + 1809 sizeof(struct vnode)))); 1810 if (newdesiredvnodes == desiredvnodes) 1811 desiredvnodes = (3 * newdesiredvnodes) / 4; 1812#endif 1813} 1814 1815static void 1816zfs_vnodes_adjust_back(void) 1817{ 1818 1819#ifdef __i386__ 1820 desiredvnodes = desiredvnodes_backup; 1821#endif 1822} 1823 1824void 1825zfs_init(void) 1826{ 1827 1828 printf("ZFS filesystem version " ZPL_VERSION_STRING "\n"); 1829 1830 /* 1831 * Initialize znode cache, vnode ops, etc... 1832 */ 1833 zfs_znode_init(); 1834 1835 /* 1836 * Initialize .zfs directory structures 1837 */ 1838 zfsctl_init(); 1839 1840 /* 1841 * Reduce number of vnode. Originally number of vnodes is calculated 1842 * with UFS inode in mind. We reduce it here, because it's too big for 1843 * ZFS/i386. 1844 */ 1845 zfs_vnodes_adjust(); 1846 1847 dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); 1848} 1849 1850void 1851zfs_fini(void) 1852{ 1853 zfsctl_fini(); 1854 zfs_znode_fini(); 1855 zfs_vnodes_adjust_back(); 1856} 1857 1858int 1859zfs_busy(void) 1860{ 1861 return (zfs_active_fs_count != 0); 1862} 1863 1864int 1865zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 1866{ 1867 int error; 1868 objset_t *os = zfsvfs->z_os; 1869 dmu_tx_t *tx; 1870 1871 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 1872 return (EINVAL); 1873 1874 if (newvers < zfsvfs->z_version) 1875 return (EINVAL); 1876 1877 tx = dmu_tx_create(os); 1878 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 1879 error = dmu_tx_assign(tx, TXG_WAIT); 1880 if (error) { 1881 dmu_tx_abort(tx); 1882 return (error); 1883 } 1884 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1885 8, 1, &newvers, tx); 1886 1887 if (error) { 1888 dmu_tx_commit(tx); 1889 return (error); 1890 } 1891 1892 spa_history_internal_log(LOG_DS_UPGRADE, 1893 dmu_objset_spa(os), tx, CRED(), 1894 "oldver=%llu newver=%llu dataset = %llu", 1895 zfsvfs->z_version, newvers, dmu_objset_id(os)); 1896 1897 dmu_tx_commit(tx); 1898 1899 zfsvfs->z_version = newvers; 1900 1901 if (zfsvfs->z_version >= ZPL_VERSION_FUID) 1902 zfs_set_fuid_feature(zfsvfs); 1903 1904 return (0); 1905} 1906/* 1907 * Read a property stored within the master node. 1908 */ 1909int 1910zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 1911{ 1912 const char *pname; 1913 int error = ENOENT; 1914 1915 /* 1916 * Look up the file system's value for the property. For the 1917 * version property, we look up a slightly different string. 1918 */ 1919 if (prop == ZFS_PROP_VERSION) 1920 pname = ZPL_VERSION_STR; 1921 else 1922 pname = zfs_prop_to_name(prop); 1923 1924 if (os != NULL) 1925 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 1926 1927 if (error == ENOENT) { 1928 /* No value set, use the default value */ 1929 switch (prop) { 1930 case ZFS_PROP_VERSION: 1931 *value = ZPL_VERSION; 1932 break; 1933 case ZFS_PROP_NORMALIZE: 1934 case ZFS_PROP_UTF8ONLY: 1935 *value = 0; 1936 break; 1937 case ZFS_PROP_CASE: 1938 *value = ZFS_CASE_SENSITIVE; 1939 break; 1940 default: 1941 return (error); 1942 } 1943 error = 0; 1944 } 1945 return (error); 1946} 1947