zfs_vfsops.c revision 249643
11590Srgrimes/* 21590Srgrimes * CDDL HEADER START 31590Srgrimes * 41590Srgrimes * The contents of this file are subject to the terms of the 51590Srgrimes * Common Development and Distribution License (the "License"). 61590Srgrimes * You may not use this file except in compliance with the License. 71590Srgrimes * 81590Srgrimes * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91590Srgrimes * or http://www.opensolaris.org/os/licensing. 101590Srgrimes * See the License for the specific language governing permissions 111590Srgrimes * and limitations under the License. 121590Srgrimes * 131590Srgrimes * When distributing Covered Code, include this CDDL HEADER in each 141590Srgrimes * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151590Srgrimes * If applicable, add the following below this CDDL HEADER, with the 161590Srgrimes * fields enclosed by brackets "[]" replaced with your own identifying 171590Srgrimes * information: Portions Copyright [yyyy] [name of copyright owner] 181590Srgrimes * 191590Srgrimes * CDDL HEADER END 201590Srgrimes */ 211590Srgrimes/* 221590Srgrimes * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 231590Srgrimes * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 241590Srgrimes * All rights reserved. 251590Srgrimes * Copyright (c) 2013 by Delphix. All rights reserved. 261590Srgrimes */ 271590Srgrimes 281590Srgrimes/* Portions Copyright 2010 Robert Milkowski */ 29279219Sken 30279219Sken#include <sys/types.h> 31279219Sken#include <sys/param.h> 32279219Sken#include <sys/systm.h> 33279219Sken#include <sys/kernel.h> 34279219Sken#include <sys/sysmacros.h> 35279219Sken#include <sys/kmem.h> 36279219Sken#include <sys/acl.h> 37279219Sken#include <sys/vnode.h> 38279219Sken#include <sys/vfs.h> 39279219Sken#include <sys/mntent.h> 40279219Sken#include <sys/mount.h> 41279219Sken#include <sys/cmn_err.h> 42279219Sken#include <sys/zfs_znode.h> 43279219Sken#include <sys/zfs_dir.h> 44279219Sken#include <sys/zil.h> 45279219Sken#include <sys/fs/zfs.h> 46279219Sken#include <sys/dmu.h> 47279219Sken#include <sys/dsl_prop.h> 48279219Sken#include <sys/dsl_dataset.h> 49279219Sken#include <sys/dsl_deleg.h> 50279219Sken#include <sys/spa.h> 51279219Sken#include <sys/zap.h> 52279219Sken#include <sys/sa.h> 53279219Sken#include <sys/sa_impl.h> 54279219Sken#include <sys/varargs.h> 55279219Sken#include <sys/policy.h> 56279219Sken#include <sys/atomic.h> 57279219Sken#include <sys/zfs_ioctl.h> 58279219Sken#include <sys/zfs_ctldir.h> 59279219Sken#include <sys/zfs_fuid.h> 601590Srgrimes#include <sys/sunddi.h> 611590Srgrimes#include <sys/dnlc.h> 6227752Scharnier#include <sys/dmu_objset.h> 631590Srgrimes#include <sys/spa_boot.h> 641590Srgrimes#include <sys/jail.h> 651590Srgrimes#include "zfs_comutil.h" 661590Srgrimes 671590Srgrimesstruct mtx zfs_debug_mtx; 6827752ScharnierMTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 6923693Speter 7027752ScharnierSYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 711590Srgrimes 721590Srgrimesint zfs_super_owner; 7394505ScharnierSYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 7494505Scharnier "File system owner can perform privileged operation on his file systems"); 7594505Scharnier 761590Srgrimesint zfs_debug_level; 771590SrgrimesTUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 781590SrgrimesSYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 791590Srgrimes "Debug level"); 801590Srgrimes 811590SrgrimesSYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 821590Srgrimesstatic int zfs_version_acl = ZFS_ACL_VERSION; 83279219SkenSYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 84279219Sken "ZFS_ACL_VERSION"); 8523693Speterstatic int zfs_version_spa = SPA_VERSION; 8623693SpeterSYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 8727752Scharnier "SPA_VERSION"); 881590Srgrimesstatic int zfs_version_zpl = ZPL_VERSION; 8923693SpeterSYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 901590Srgrimes "ZPL_VERSION"); 911590Srgrimes 9223693Speterstatic int zfs_mount(vfs_t *vfsp); 93279219Skenstatic int zfs_umount(vfs_t *vfsp, int fflag); 94279219Skenstatic int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 95279219Skenstatic int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 96279219Skenstatic int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 971590Srgrimesstatic int zfs_sync(vfs_t *vfsp, int waitfor); 98279219Skenstatic int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 99279219Sken struct ucred **credanonp, int *numsecflavors, int **secflavors); 100279219Skenstatic int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); 101279219Skenstatic void zfs_objset_close(zfsvfs_t *zfsvfs); 102279219Skenstatic void zfs_freevfs(vfs_t *vfsp); 103279219Sken 1047913Sjoergstatic struct vfsops zfs_vfsops = { 1057913Sjoerg .vfs_mount = zfs_mount, 1067913Sjoerg .vfs_unmount = zfs_umount, 1077929Sjoerg .vfs_root = zfs_root, 1087929Sjoerg .vfs_statfs = zfs_statfs, 1099541Sjoerg .vfs_vget = zfs_vget, 11039260Sgibbs .vfs_sync = zfs_sync, 111279219Sken .vfs_checkexp = zfs_checkexp, 1127913Sjoerg .vfs_fhtovp = zfs_fhtovp, 11339260Sgibbs}; 11439260Sgibbs 11539260SgibbsVFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 11639260Sgibbs 11739260Sgibbs/* 11839260Sgibbs * We need to keep a count of active fs's. 119279219Sken * This is necessary to prevent our module 120279219Sken * from being unloaded after a umount -f 121279219Sken */ 122280231Skenstatic uint32_t zfs_active_fs_count = 0; 12339260Sgibbs 124279219Sken/*ARGSUSED*/ 125279219Skenstatic int 126279219Skenzfs_sync(vfs_t *vfsp, int waitfor) 127279219Sken{ 128279219Sken 129279219Sken /* 130227174Sed * Data integrity is job one. We don't want a compromised kernel 131152396Sdwmalone * writing to the storage pool, so we never sync during panic. 132228619Sdim */ 1331590Srgrimes if (panicstr) 1347913Sjoerg return (0); 1351590Srgrimes 13694505Scharnier if (vfsp != NULL) { 13794505Scharnier /* 1389541Sjoerg * Sync a specific filesystem. 1399541Sjoerg */ 1409541Sjoerg zfsvfs_t *zfsvfs = vfsp->vfs_data; 14194505Scharnier dsl_pool_t *dp; 14294505Scharnier int error; 14394505Scharnier 144279219Sken error = vfs_stdsync(vfsp, waitfor); 14594505Scharnier if (error != 0) 14694505Scharnier return (error); 147279219Sken 14841913Smjacob ZFS_ENTER(zfsvfs); 149279219Sken dp = dmu_objset_pool(zfsvfs->z_os); 15039260Sgibbs 1517913Sjoerg /* 1527929Sjoerg * If the system is shutting down, then skip any 15394505Scharnier * filesystems which may exist on a suspended pool. 15494505Scharnier */ 15594505Scharnier if (sys_shutdown && spa_suspended(dp->dp_spa)) { 15694505Scharnier ZFS_EXIT(zfsvfs); 15794505Scharnier return (0); 15894505Scharnier } 15939260Sgibbs 16094505Scharnier if (zfsvfs->z_log != NULL) 16194505Scharnier zil_commit(zfsvfs->z_log, 0); 16294505Scharnier 16341913Smjacob ZFS_EXIT(zfsvfs); 16441913Smjacob } else { 16594505Scharnier /* 16646928Smjacob * Sync all ZFS filesystems. This is what happens when you 16746928Smjacob * run sync(1M). Unlike other filesystems, ZFS honors the 16894505Scharnier * request by waiting for all pools to commit all dirty data. 16994505Scharnier */ 170279219Sken spa_sync_allpools(); 171279219Sken } 172279219Sken 173279219Sken return (0); 174279219Sken} 175279219Sken 17694505Scharnier#ifndef __FreeBSD__ 1771590Srgrimesstatic int 1781590Srgrimeszfs_create_unique_device(dev_t *dev) 179279219Sken{ 180227174Sed major_t new_major; 181227174Sed 182227174Sed do { 183227174Sed ASSERT3U(zfs_minor, <=, MAXMIN32); 184279219Sken minor_t start = zfs_minor; 185279219Sken do { 186279219Sken mutex_enter(&zfs_dev_mtx); 187279219Sken if (zfs_minor >= MAXMIN32) { 188279219Sken /* 189279219Sken * If we're still using the real major 190279219Sken * keep out of /dev/zfs and /dev/zvol minor 191279219Sken * number space. If we're using a getudev()'ed 192279219Sken * major number, we can use all of its minors. 193279219Sken */ 194279219Sken if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 195279219Sken zfs_minor = ZFS_MIN_MINOR; 196279219Sken else 197279219Sken zfs_minor = 0; 198279219Sken } else { 199279219Sken zfs_minor++; 200279219Sken } 201279219Sken *dev = makedevice(zfs_major, zfs_minor); 202279219Sken mutex_exit(&zfs_dev_mtx); 203227174Sed } while (vfs_devismounted(*dev) && zfs_minor != start); 204227174Sed if (zfs_minor == start) { 205227174Sed /* 2061590Srgrimes * We are using all ~262,000 minor numbers for the 2071590Srgrimes * current major number. Create a new major number. 208152396Sdwmalone */ 2091590Srgrimes if ((new_major = getudev()) == (major_t)-1) { 210227174Sed cmn_err(CE_WARN, 2111590Srgrimes "zfs_mount: Can't get unique major " 2121590Srgrimes "device number."); 2131590Srgrimes return (-1); 214152396Sdwmalone } 2151590Srgrimes mutex_enter(&zfs_dev_mtx); 216279261Sken zfs_major = new_major; 217279261Sken zfs_minor = 0; 2181590Srgrimes 2191590Srgrimes mutex_exit(&zfs_dev_mtx); 2201590Srgrimes } else { 22124360Simp break; 2221590Srgrimes } 2231590Srgrimes /* CONSTANTCONDITION */ 2241590Srgrimes } while (1); 2251590Srgrimes 2261590Srgrimes return (0); 2271590Srgrimes} 228279219Sken#endif /* !__FreeBSD__ */ 229279219Sken 2301590Srgrimesstatic void 231279219Skenatime_changed_cb(void *arg, uint64_t newval) 2321590Srgrimes{ 2331590Srgrimes zfsvfs_t *zfsvfs = arg; 2341590Srgrimes 2351590Srgrimes if (newval == TRUE) { 236279219Sken zfsvfs->z_atime = TRUE; 2371590Srgrimes zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 2381590Srgrimes vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 2391590Srgrimes vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 2401590Srgrimes } else { 2411590Srgrimes zfsvfs->z_atime = FALSE; 24227752Scharnier zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 2431590Srgrimes vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 2441590Srgrimes vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 2451590Srgrimes } 2467913Sjoerg} 2477913Sjoerg 2489541Sjoergstatic void 2499541Sjoergxattr_changed_cb(void *arg, uint64_t newval) 2509541Sjoerg{ 251279219Sken zfsvfs_t *zfsvfs = arg; 252279219Sken 253279219Sken if (newval == TRUE) { 254279219Sken /* XXX locking on vfs_flag? */ 255279219Sken#ifdef TODO 2561590Srgrimes zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 25727752Scharnier#endif 2581590Srgrimes vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 2591590Srgrimes vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 2601590Srgrimes } else { 2617929Sjoerg /* XXX locking on vfs_flag? */ 26241925Smjacob#ifdef TODO 2637929Sjoerg zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 264279219Sken#endif 2657929Sjoerg vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 26627752Scharnier vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 2677929Sjoerg } 2687929Sjoerg} 2697929Sjoerg 2707929Sjoergstatic void 2717929Sjoergblksz_changed_cb(void *arg, uint64_t newval) 2727929Sjoerg{ 27339260Sgibbs zfsvfs_t *zfsvfs = arg; 27441925Smjacob 27539260Sgibbs if (newval < SPA_MINBLOCKSIZE || 27639260Sgibbs newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 27739260Sgibbs newval = SPA_MAXBLOCKSIZE; 27839260Sgibbs 27939260Sgibbs zfsvfs->z_max_blksz = newval; 28039260Sgibbs zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 281279219Sken} 282152396Sdwmalone 2837929Sjoergstatic void 284152396Sdwmalonereadonly_changed_cb(void *arg, uint64_t newval) 285152396Sdwmalone{ 286152396Sdwmalone zfsvfs_t *zfsvfs = arg; 287279219Sken 288279219Sken if (newval) { 289279219Sken /* XXX locking on vfs_flag? */ 290279219Sken zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 291279219Sken vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 29227752Scharnier vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 2931590Srgrimes } else { 2941590Srgrimes /* XXX locking on vfs_flag? */ 2951590Srgrimes zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 29641913Smjacob vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 29741945Smjacob vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 29841945Smjacob } 29994505Scharnier} 30041945Smjacob 30141945Smjacobstatic void 30241945Smjacobsetuid_changed_cb(void *arg, uint64_t newval) 30341945Smjacob{ 30441945Smjacob zfsvfs_t *zfsvfs = arg; 30541945Smjacob 30642010Smjacob if (newval == FALSE) { 30742010Smjacob zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 30842010Smjacob vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 30942010Smjacob vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 31041945Smjacob } else { 31141945Smjacob zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 31241945Smjacob vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 31341945Smjacob vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 31441945Smjacob } 31541945Smjacob} 31641945Smjacob 31741945Smjacobstatic void 31841945Smjacobexec_changed_cb(void *arg, uint64_t newval) 31941945Smjacob{ 32042010Smjacob zfsvfs_t *zfsvfs = arg; 32142010Smjacob 32242010Smjacob if (newval == FALSE) { 32342010Smjacob zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 32441945Smjacob vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 32541945Smjacob vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 32641945Smjacob } else { 32741945Smjacob zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 32841945Smjacob vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 32941945Smjacob vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 33041945Smjacob } 33141945Smjacob} 33241945Smjacob 33341945Smjacob/* 33441945Smjacob * The nbmand mount option can be changed at mount time. 33541913Smjacob * We can't allow it to be toggled on live file systems or incorrect 33641913Smjacob * behavior may be seen from cifs clients 33741925Smjacob * 33841925Smjacob * This property isn't registered via dsl_prop_register(), but this callback 33941925Smjacob * will be called when a file system is first mounted 34041913Smjacob */ 34141945Smjacobstatic void 34241913Smjacobnbmand_changed_cb(void *arg, uint64_t newval) 34341925Smjacob{ 34441925Smjacob zfsvfs_t *zfsvfs = arg; 34541913Smjacob if (newval == FALSE) { 34641925Smjacob vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 34741913Smjacob vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 34841913Smjacob } else { 34941925Smjacob vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 35041925Smjacob vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 35141925Smjacob } 35241913Smjacob} 35341925Smjacob 35441913Smjacobstatic void 35541925Smjacobsnapdir_changed_cb(void *arg, uint64_t newval) 35646928Smjacob{ 35746928Smjacob zfsvfs_t *zfsvfs = arg; 35846928Smjacob 35946928Smjacob zfsvfs->z_show_ctldir = newval; 36046928Smjacob} 36146928Smjacob 36246928Smjacobstatic void 36346928Smjacobvscan_changed_cb(void *arg, uint64_t newval) 36446928Smjacob{ 36546928Smjacob zfsvfs_t *zfsvfs = arg; 36646928Smjacob 36746928Smjacob zfsvfs->z_vscan = newval; 36846928Smjacob} 36946928Smjacob 37046928Smjacobstatic void 37146928Smjacobacl_mode_changed_cb(void *arg, uint64_t newval) 37246928Smjacob{ 37346928Smjacob zfsvfs_t *zfsvfs = arg; 37446928Smjacob 37546928Smjacob zfsvfs->z_acl_mode = newval; 37646928Smjacob} 37746928Smjacob 37846928Smjacobstatic void 37946928Smjacobacl_inherit_changed_cb(void *arg, uint64_t newval) 380279219Sken{ 381279219Sken zfsvfs_t *zfsvfs = arg; 382279219Sken 383279219Sken zfsvfs->z_acl_inherit = newval; 384279219Sken} 385279219Sken 386279219Skenstatic int 387279219Skenzfs_register_callbacks(vfs_t *vfsp) 388280231Sken{ 389280231Sken struct dsl_dataset *ds = NULL; 390280231Sken objset_t *os = NULL; 391280231Sken zfsvfs_t *zfsvfs = NULL; 392279219Sken uint64_t nbmand; 393280231Sken boolean_t readonly = B_FALSE; 394280231Sken boolean_t do_readonly = B_FALSE; 395280231Sken boolean_t setuid = B_FALSE; 396280231Sken boolean_t do_setuid = B_FALSE; 397280231Sken boolean_t exec = B_FALSE; 398279219Sken boolean_t do_exec = B_FALSE; 399279219Sken#ifdef illumos 400279219Sken boolean_t devices = B_FALSE; 401279219Sken boolean_t do_devices = B_FALSE; 402279219Sken#endif 403279219Sken boolean_t xattr = B_FALSE; 404279219Sken boolean_t do_xattr = B_FALSE; 405279219Sken boolean_t atime = B_FALSE; 406279219Sken boolean_t do_atime = B_FALSE; 407279219Sken int error = 0; 408279219Sken 409279219Sken ASSERT(vfsp); 410279219Sken zfsvfs = vfsp->vfs_data; 411279219Sken ASSERT(zfsvfs); 412279219Sken os = zfsvfs->z_os; 413279219Sken 414279219Sken /* 415279219Sken * This function can be called for a snapshot when we update snapshot's 416279219Sken * mount point, which isn't really supported. 417279219Sken */ 418279219Sken if (dmu_objset_is_snapshot(os)) 419279219Sken return (EOPNOTSUPP); 420279219Sken 42141913Smjacob /* 42241913Smjacob * The act of registering our callbacks will destroy any mount 42341913Smjacob * options we may have. In order to enable temporary overrides 4241590Srgrimes * of mount options, we stash away the current values and 42527752Scharnier * restore them after we register the callbacks. 4261590Srgrimes */ 4271590Srgrimes if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || 42827752Scharnier !spa_writeable(dmu_objset_spa(os))) { 4291590Srgrimes readonly = B_TRUE; 4301590Srgrimes do_readonly = B_TRUE; 43141925Smjacob } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 4321590Srgrimes readonly = B_FALSE; 4331590Srgrimes do_readonly = B_TRUE; 4341590Srgrimes } 435227174Sed if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 4361590Srgrimes setuid = B_FALSE; 437152396Sdwmalone do_setuid = B_TRUE; 438152396Sdwmalone } else { 439152396Sdwmalone if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 4401590Srgrimes setuid = B_FALSE; 4417913Sjoerg do_setuid = B_TRUE; 44294505Scharnier } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 4431590Srgrimes setuid = B_TRUE; 4441590Srgrimes do_setuid = B_TRUE; 4451590Srgrimes } 4461590Srgrimes } 4471590Srgrimes if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 448227174Sed exec = B_FALSE; 449152396Sdwmalone do_exec = B_TRUE; 4501590Srgrimes } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 451227174Sed exec = B_TRUE; 4521590Srgrimes do_exec = B_TRUE; 4531590Srgrimes } 4541590Srgrimes if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 4551590Srgrimes xattr = B_FALSE; 4561590Srgrimes do_xattr = B_TRUE; 4571590Srgrimes } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 4581590Srgrimes xattr = B_TRUE; 4591590Srgrimes do_xattr = B_TRUE; 4601590Srgrimes } 4611590Srgrimes if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 4627913Sjoerg atime = B_FALSE; 4637913Sjoerg do_atime = B_TRUE; 4647913Sjoerg } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 46592764Sphk atime = B_TRUE; 46692764Sphk do_atime = B_TRUE; 46792764Sphk } 46892764Sphk 46992764Sphk /* 4707913Sjoerg * nbmand is a special property. It can only be changed at 4711590Srgrimes * mount time. 4721590Srgrimes * 4731590Srgrimes * This is weird, but it is documented to only be changeable 4741590Srgrimes * at mount time. 4751590Srgrimes */ 476227174Sed if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 477152396Sdwmalone nbmand = B_FALSE; 4781590Srgrimes } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 479152396Sdwmalone nbmand = B_TRUE; 480152396Sdwmalone } else { 4811590Srgrimes char osname[MAXNAMELEN]; 4821590Srgrimes 4831590Srgrimes dmu_objset_name(os, osname); 4841590Srgrimes if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 4851590Srgrimes NULL)) { 48611608Sbde return (error); 48711608Sbde } 4881590Srgrimes } 4891590Srgrimes 4901590Srgrimes /* 49127752Scharnier * Register property callbacks. 4921590Srgrimes * 4931590Srgrimes * It would probably be fine to just check for i/o error from 4941590Srgrimes * the first prop_register(), but I guess I like to go 4951590Srgrimes * overboard... 4961590Srgrimes */ 4971590Srgrimes ds = dmu_objset_ds(os); 4981590Srgrimes dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 4991590Srgrimes error = dsl_prop_register(ds, 5001590Srgrimes zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs); 5011590Srgrimes error = error ? error : dsl_prop_register(ds, 5021590Srgrimes zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs); 5031590Srgrimes error = error ? error : dsl_prop_register(ds, 5041590Srgrimes zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs); 5051590Srgrimes error = error ? error : dsl_prop_register(ds, 506227174Sed zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs); 507152396Sdwmalone#ifdef illumos 5081590Srgrimes error = error ? error : dsl_prop_register(ds, 50994505Scharnier zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs); 5101590Srgrimes#endif 5111590Srgrimes error = error ? error : dsl_prop_register(ds, 5121590Srgrimes zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs); 513227174Sed error = error ? error : dsl_prop_register(ds, 51439260Sgibbs zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs); 51539260Sgibbs error = error ? error : dsl_prop_register(ds, 51639260Sgibbs zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs); 51739260Sgibbs error = error ? error : dsl_prop_register(ds, 51839260Sgibbs zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs); 51939260Sgibbs error = error ? error : dsl_prop_register(ds, 52039260Sgibbs zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, 52139260Sgibbs zfsvfs); 52239260Sgibbs error = error ? error : dsl_prop_register(ds, 52339260Sgibbs zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs); 52439260Sgibbs dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 52539260Sgibbs if (error) 526227174Sed goto unregister; 5277929Sjoerg 5287913Sjoerg /* 5297913Sjoerg * Invoke our callbacks to restore temporary mount options. 530279219Sken */ 5317913Sjoerg if (do_readonly) 532279219Sken readonly_changed_cb(zfsvfs, readonly); 5337929Sjoerg if (do_setuid) 53444397Smjacob setuid_changed_cb(zfsvfs, setuid); 535279219Sken if (do_exec) 53644397Smjacob exec_changed_cb(zfsvfs, exec); 5377913Sjoerg if (do_xattr) 5387913Sjoerg xattr_changed_cb(zfsvfs, xattr); 539227174Sed if (do_atime) 5407913Sjoerg atime_changed_cb(zfsvfs, atime); 5417913Sjoerg 5427913Sjoerg nbmand_changed_cb(zfsvfs, nbmand); 5437913Sjoerg 5447913Sjoerg return (0); 5457913Sjoerg 54639260Sgibbsunregister: 5477913Sjoerg /* 5487913Sjoerg * We may attempt to unregister some callbacks that are not 5497913Sjoerg * registered, but this is OK; it will simply return ENOMSG, 5507913Sjoerg * which we will ignore. 551227174Sed */ 55239260Sgibbs (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME), 55339260Sgibbs atime_changed_cb, zfsvfs); 55439260Sgibbs (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR), 555227174Sed xattr_changed_cb, zfsvfs); 5567913Sjoerg (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE), 55739260Sgibbs blksz_changed_cb, zfsvfs); 55839260Sgibbs (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY), 55939260Sgibbs readonly_changed_cb, zfsvfs); 56039260Sgibbs#ifdef illumos 56139260Sgibbs (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES), 56239260Sgibbs devices_changed_cb, zfsvfs); 56339260Sgibbs#endif 56439260Sgibbs (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID), 56539260Sgibbs setuid_changed_cb, zfsvfs); 56639260Sgibbs (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC), 56744618Smjacob exec_changed_cb, zfsvfs); 56839260Sgibbs (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR), 56939260Sgibbs snapdir_changed_cb, zfsvfs); 57039260Sgibbs (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLMODE), 57139260Sgibbs acl_mode_changed_cb, zfsvfs); 57239260Sgibbs (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT), 573227174Sed acl_inherit_changed_cb, zfsvfs); 57439260Sgibbs (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN), 57539260Sgibbs vscan_changed_cb, zfsvfs); 576227174Sed return (error); 57739260Sgibbs} 57839260Sgibbs 57939260Sgibbsstatic int 58039260Sgibbszfs_space_delta_cb(dmu_object_type_t bonustype, void *data, 58139260Sgibbs uint64_t *userp, uint64_t *groupp) 58239260Sgibbs{ 58339260Sgibbs /* 58439260Sgibbs * Is it a valid type of object to track? 58539260Sgibbs */ 586279219Sken if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA) 587279219Sken return (SET_ERROR(ENOENT)); 588279219Sken 589279219Sken /* 590279219Sken * If we have a NULL data pointer 591279219Sken * then assume the id's aren't changing and 592279219Sken * return EEXIST to the dmu to let it know to 593279219Sken * use the same ids 594279219Sken */ 595279219Sken if (data == NULL) 596279219Sken return (SET_ERROR(EEXIST)); 597279219Sken 598279219Sken if (bonustype == DMU_OT_ZNODE) { 599279219Sken znode_phys_t *znp = data; 600279219Sken *userp = znp->zp_uid; 601279219Sken *groupp = znp->zp_gid; 602279219Sken } else { 603279219Sken int hdrsize; 604279219Sken sa_hdr_phys_t *sap = data; 605279219Sken sa_hdr_phys_t sa = *sap; 606279219Sken boolean_t swap = B_FALSE; 607279219Sken 608279219Sken ASSERT(bonustype == DMU_OT_SA); 609279219Sken 610279219Sken if (sa.sa_magic == 0) { 611279219Sken /* 612279219Sken * This should only happen for newly created 613279219Sken * files that haven't had the znode data filled 614279219Sken * in yet. 615279219Sken */ 616279219Sken *userp = 0; 617279219Sken *groupp = 0; 618227174Sed return (0); 6197913Sjoerg } 6207913Sjoerg if (sa.sa_magic == BSWAP_32(SA_MAGIC)) { 62144644Smjacob sa.sa_magic = SA_MAGIC; 62239260Sgibbs sa.sa_layout_info = BSWAP_16(sa.sa_layout_info); 62344644Smjacob swap = B_TRUE; 62439260Sgibbs } else { 62544644Smjacob VERIFY3U(sa.sa_magic, ==, SA_MAGIC); 62644644Smjacob } 62744644Smjacob 62844644Smjacob hdrsize = sa_hdrsize(&sa); 62939260Sgibbs VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t)); 630279219Sken *userp = *((uint64_t *)((uintptr_t)data + hdrsize + 63139260Sgibbs SA_UID_OFFSET)); 632279219Sken *groupp = *((uint64_t *)((uintptr_t)data + hdrsize + 63339260Sgibbs SA_GID_OFFSET)); 634279219Sken if (swap) { 63539260Sgibbs *userp = BSWAP_64(*userp); 636279219Sken *groupp = BSWAP_64(*groupp); 63739260Sgibbs } 638279219Sken } 63943629Smjacob return (0); 64043629Smjacob} 64169248Skris 64243629Smjacobstatic void 643279219Skenfuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, 644279219Sken char *domainbuf, int buflen, uid_t *ridp) 645279219Sken{ 646279219Sken uint64_t fuid; 647279219Sken const char *domain; 648279219Sken 649279219Sken fuid = strtonum(fuidstr, NULL); 650279219Sken 651279219Sken domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid)); 652279219Sken if (domain) 653279219Sken (void) strlcpy(domainbuf, domain, buflen); 654279219Sken else 655279219Sken domainbuf[0] = '\0'; 656279219Sken *ridp = FUID_RID(fuid); 657279219Sken} 658279219Sken 659279219Skenstatic uint64_t 660279219Skenzfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type) 661279219Sken{ 662279219Sken switch (type) { 663279219Sken case ZFS_PROP_USERUSED: 664279219Sken return (DMU_USERUSED_OBJECT); 665279219Sken case ZFS_PROP_GROUPUSED: 666279219Sken return (DMU_GROUPUSED_OBJECT); 667279219Sken case ZFS_PROP_USERQUOTA: 668279219Sken return (zfsvfs->z_userquota_obj); 669279219Sken case ZFS_PROP_GROUPQUOTA: 670279219Sken return (zfsvfs->z_groupquota_obj); 671279219Sken } 672279219Sken return (0); 673279219Sken} 674279219Sken 675279219Skenint 676279219Skenzfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 677279219Sken uint64_t *cookiep, void *vbuf, uint64_t *bufsizep) 678279219Sken{ 679279219Sken int error; 680279219Sken zap_cursor_t zc; 681279219Sken zap_attribute_t za; 682279219Sken zfs_useracct_t *buf = vbuf; 68343629Smjacob uint64_t obj; 684279219Sken 685279219Sken if (!dmu_objset_userspace_present(zfsvfs->z_os)) 686279219Sken return (SET_ERROR(ENOTSUP)); 687279219Sken 68843629Smjacob obj = zfs_userquota_prop_to_obj(zfsvfs, type); 689279219Sken if (obj == 0) { 690279219Sken *bufsizep = 0; 691279219Sken return (0); 692279219Sken } 693279219Sken 694279219Sken for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep); 695279219Sken (error = zap_cursor_retrieve(&zc, &za)) == 0; 69643629Smjacob zap_cursor_advance(&zc)) { 697279219Sken if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) > 698279219Sken *bufsizep) 699279219Sken break; 700279219Sken 701279219Sken fuidstr_to_sid(zfsvfs, za.za_name, 70243629Smjacob buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid); 703279219Sken 704279219Sken buf->zu_space = za.za_first_integer; 705279219Sken buf++; 706279219Sken } 707279219Sken if (error == ENOENT) 708279219Sken error = 0; 709279219Sken 71043629Smjacob ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep); 711279219Sken *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf; 712279219Sken *cookiep = zap_cursor_serialize(&zc); 713279219Sken zap_cursor_fini(&zc); 714279219Sken return (error); 715279219Sken} 716279219Sken 71743629Smjacob/* 718279219Sken * buf must be big enough (eg, 32 bytes) 719279219Sken */ 720279219Skenstatic int 721279219Skenid_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, 722279219Sken char *buf, boolean_t addok) 72343629Smjacob{ 724279219Sken uint64_t fuid; 72543629Smjacob int domainid = 0; 726279219Sken 727279219Sken if (domain && domain[0]) { 728279219Sken domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok); 729279219Sken if (domainid == -1) 730279219Sken return (SET_ERROR(ENOENT)); 731279219Sken } 732279219Sken fuid = FUID_ENCODE(domainid, rid); 733279219Sken (void) sprintf(buf, "%llx", (longlong_t)fuid); 734279219Sken return (0); 735279219Sken} 736279219Sken 737279219Skenint 738279219Skenzfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 739279219Sken const char *domain, uint64_t rid, uint64_t *valp) 740279219Sken{ 741279219Sken char buf[32]; 742279219Sken int err; 743279219Sken uint64_t obj; 744279219Sken 745279219Sken *valp = 0; 746279219Sken 747279219Sken if (!dmu_objset_userspace_present(zfsvfs->z_os)) 748279219Sken return (SET_ERROR(ENOTSUP)); 749279219Sken 750279219Sken obj = zfs_userquota_prop_to_obj(zfsvfs, type); 751279219Sken if (obj == 0) 752279219Sken return (0); 753279219Sken 754279219Sken err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE); 755279219Sken if (err) 756279219Sken return (err); 757279219Sken 758279219Sken err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp); 759279219Sken if (err == ENOENT) 760279219Sken err = 0; 761279219Sken return (err); 762279219Sken} 763279219Sken 764279219Skenint 765279219Skenzfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 766279219Sken const char *domain, uint64_t rid, uint64_t quota) 767279219Sken{ 768279219Sken char buf[32]; 769279219Sken int err; 770279219Sken dmu_tx_t *tx; 771279219Sken uint64_t *objp; 772279219Sken boolean_t fuid_dirtied; 773279219Sken 774279219Sken if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) 775279219Sken return (SET_ERROR(EINVAL)); 776279219Sken 777279219Sken if (zfsvfs->z_version < ZPL_VERSION_USERSPACE) 778279219Sken return (SET_ERROR(ENOTSUP)); 779279219Sken 780279219Sken objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj : 781279219Sken &zfsvfs->z_groupquota_obj; 782279219Sken 783279219Sken err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE); 784279219Sken if (err) 785279219Sken return (err); 786279219Sken fuid_dirtied = zfsvfs->z_fuid_dirty; 787279219Sken 788279219Sken tx = dmu_tx_create(zfsvfs->z_os); 789279219Sken dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL); 790279219Sken if (*objp == 0) { 791279219Sken dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 792279219Sken zfs_userquota_prop_prefixes[type]); 793279219Sken } 794279219Sken if (fuid_dirtied) 795279219Sken zfs_fuid_txhold(zfsvfs, tx); 796279219Sken err = dmu_tx_assign(tx, TXG_WAIT); 797279219Sken if (err) { 798279219Sken dmu_tx_abort(tx); 799279219Sken return (err); 800279219Sken } 801279219Sken 802279219Sken mutex_enter(&zfsvfs->z_lock); 803279219Sken if (*objp == 0) { 804279219Sken *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA, 805279219Sken DMU_OT_NONE, 0, tx); 806279219Sken VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 807279219Sken zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); 808279219Sken } 809279219Sken mutex_exit(&zfsvfs->z_lock); 810279219Sken 811279219Sken if (quota == 0) { 812279219Sken err = zap_remove(zfsvfs->z_os, *objp, buf, tx); 813279219Sken if (err == ENOENT) 814279219Sken err = 0; 815279219Sken } else { 816279219Sken err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx); 817279219Sken } 818279219Sken ASSERT(err == 0); 819279219Sken if (fuid_dirtied) 820279219Sken zfs_fuid_sync(zfsvfs, tx); 821279219Sken dmu_tx_commit(tx); 822279219Sken return (err); 823279219Sken} 824279219Sken 825279219Skenboolean_t 826279219Skenzfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) 827279219Sken{ 828279219Sken char buf[32]; 829279219Sken uint64_t used, quota, usedobj, quotaobj; 830279219Sken int err; 831279219Sken 832279219Sken usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 833279219Sken quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 834279219Sken 835279219Sken if (quotaobj == 0 || zfsvfs->z_replay) 836279219Sken return (B_FALSE); 837279219Sken 838279219Sken (void) sprintf(buf, "%llx", (longlong_t)fuid); 839279219Sken err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a); 840279219Sken if (err != 0) 841279219Sken return (B_FALSE); 842279219Sken 843279219Sken err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used); 844279219Sken if (err != 0) 845279219Sken return (B_FALSE); 846279219Sken return (used >= quota); 847279219Sken} 848279219Sken 849279219Skenboolean_t 850279219Skenzfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup) 85143629Smjacob{ 852279219Sken uint64_t fuid; 853279219Sken uint64_t quotaobj; 85443629Smjacob 855279219Sken quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 85643629Smjacob 857279219Sken fuid = isgroup ? zp->z_gid : zp->z_uid; 858279219Sken 859279219Sken if (quotaobj == 0 || zfsvfs->z_replay) 860279219Sken return (B_FALSE); 861279219Sken 862279219Sken return (zfs_fuid_overquota(zfsvfs, isgroup, fuid)); 863279219Sken} 864279219Sken 865279219Skenint 866279219Skenzfsvfs_create(const char *osname, zfsvfs_t **zfvp) 867279219Sken{ 868279219Sken objset_t *os; 869279219Sken zfsvfs_t *zfsvfs; 870279219Sken uint64_t zval; 871279219Sken int i, error; 872279219Sken uint64_t sa_obj; 873279219Sken 874279219Sken zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 875279219Sken 876279219Sken /* 877279219Sken * We claim to always be readonly so we can open snapshots; 878279219Sken * other ZPL code will prevent us from writing to snapshots. 879279219Sken */ 880279219Sken error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); 881279219Sken if (error) { 882279219Sken kmem_free(zfsvfs, sizeof (zfsvfs_t)); 883279219Sken return (error); 884279219Sken } 885279219Sken 886279219Sken /* 887279219Sken * Initialize the zfs-specific filesystem structure. 888279219Sken * Should probably make this a kmem cache, shuffle fields, 889279219Sken * and just bzero up to z_hold_mtx[]. 890279219Sken */ 891279219Sken zfsvfs->z_vfs = NULL; 892279219Sken zfsvfs->z_parent = zfsvfs; 893279219Sken zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 894279219Sken zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 895279219Sken zfsvfs->z_os = os; 896279219Sken 897279219Sken error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 898279219Sken if (error) { 899279219Sken goto out; 900279219Sken } else if (zfsvfs->z_version > 901279219Sken zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { 902279219Sken (void) printf("Can't mount a version %lld file system " 903279219Sken "on a version %lld pool\n. Pool must be upgraded to mount " 904279219Sken "this file system.", (u_longlong_t)zfsvfs->z_version, 905279219Sken (u_longlong_t)spa_version(dmu_objset_spa(os))); 906279219Sken error = SET_ERROR(ENOTSUP); 907279219Sken goto out; 908279219Sken } 909279219Sken if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) 910279219Sken goto out; 911279219Sken zfsvfs->z_norm = (int)zval; 912279219Sken 913279219Sken if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0) 914279219Sken goto out; 915279219Sken zfsvfs->z_utf8 = (zval != 0); 916279219Sken 917279219Sken if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0) 918279219Sken goto out; 919279219Sken zfsvfs->z_case = (uint_t)zval; 920279219Sken 921279219Sken /* 922279219Sken * Fold case on file systems that are always or sometimes case 923279219Sken * insensitive. 924279219Sken */ 925279219Sken if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 926279219Sken zfsvfs->z_case == ZFS_CASE_MIXED) 927279219Sken zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 928279219Sken 929279219Sken zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 930279219Sken zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 931279219Sken 932279219Sken if (zfsvfs->z_use_sa) { 933279219Sken /* should either have both of these objects or none */ 934279219Sken error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, 935279219Sken &sa_obj); 936279219Sken if (error) 937279219Sken return (error); 938279219Sken } else { 939279219Sken /* 940279219Sken * Pre SA versions file systems should never touch 941279219Sken * either the attribute registration or layout objects. 942279219Sken */ 943279219Sken sa_obj = 0; 944279219Sken } 945279219Sken 946279219Sken error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 947279219Sken &zfsvfs->z_attr_table); 948279219Sken if (error) 949279219Sken goto out; 950279219Sken 951279219Sken if (zfsvfs->z_version >= ZPL_VERSION_SA) 952279219Sken sa_register_update_callback(os, zfs_sa_upgrade); 953279219Sken 954279219Sken error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 955279219Sken &zfsvfs->z_root); 956279219Sken if (error) 957279219Sken goto out; 958279219Sken ASSERT(zfsvfs->z_root != 0); 959279219Sken 960279219Sken error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 961279219Sken &zfsvfs->z_unlinkedobj); 962279219Sken if (error) 963279219Sken goto out; 964279219Sken 965279219Sken error = zap_lookup(os, MASTER_NODE_OBJ, 966279219Sken zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 967279219Sken 8, 1, &zfsvfs->z_userquota_obj); 968279219Sken if (error && error != ENOENT) 969279219Sken goto out; 970279219Sken 971279219Sken error = zap_lookup(os, MASTER_NODE_OBJ, 972279219Sken zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 973279219Sken 8, 1, &zfsvfs->z_groupquota_obj); 974279219Sken if (error && error != ENOENT) 975279219Sken goto out; 976279219Sken 977279219Sken error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 978279219Sken &zfsvfs->z_fuid_obj); 979279219Sken if (error && error != ENOENT) 980279219Sken goto out; 981279219Sken 982279219Sken error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 983279219Sken &zfsvfs->z_shares_dir); 984279219Sken if (error && error != ENOENT) 985279219Sken goto out; 986279219Sken 987279219Sken mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 988279219Sken mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 989279219Sken list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 990279219Sken offsetof(znode_t, z_link_node)); 991279219Sken rrw_init(&zfsvfs->z_teardown_lock, B_FALSE); 992279219Sken rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 993279219Sken rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 994279219Sken for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 995279219Sken mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 996279219Sken 997279219Sken *zfvp = zfsvfs; 998279219Sken return (0); 999279219Sken 1000279219Skenout: 1001279219Sken dmu_objset_disown(os, zfsvfs); 1002279219Sken *zfvp = NULL; 1003279219Sken kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1004279219Sken return (error); 1005279219Sken} 1006279219Sken 1007279219Skenstatic int 1008279219Skenzfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 1009279219Sken{ 1010279219Sken int error; 1011279219Sken 1012279219Sken error = zfs_register_callbacks(zfsvfs->z_vfs); 1013279219Sken if (error) 1014279219Sken return (error); 1015279219Sken 1016279219Sken /* 1017279219Sken * Set the objset user_ptr to track its zfsvfs. 1018279219Sken */ 1019279219Sken mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1020279219Sken dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1021279219Sken mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1022279219Sken 1023279219Sken zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 1024279219Sken 1025279219Sken /* 1026279219Sken * If we are not mounting (ie: online recv), then we don't 1027279219Sken * have to worry about replaying the log as we blocked all 1028279219Sken * operations out since we closed the ZIL. 1029279219Sken */ 1030279219Sken if (mounting) { 1031279219Sken boolean_t readonly; 1032279219Sken 1033279219Sken /* 1034279219Sken * During replay we remove the read only flag to 1035279219Sken * allow replays to succeed. 1036279219Sken */ 1037279219Sken readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 1038279219Sken if (readonly != 0) 1039279219Sken zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 1040279219Sken else 1041279219Sken zfs_unlinked_drain(zfsvfs); 1042279219Sken 1043279219Sken /* 1044279219Sken * Parse and replay the intent log. 1045279219Sken * 1046279219Sken * Because of ziltest, this must be done after 1047279219Sken * zfs_unlinked_drain(). (Further note: ziltest 1048279219Sken * doesn't use readonly mounts, where 1049279219Sken * zfs_unlinked_drain() isn't called.) This is because 1050279219Sken * ziltest causes spa_sync() to think it's committed, 1051279219Sken * but actually it is not, so the intent log contains 1052279219Sken * many txg's worth of changes. 1053279219Sken * 1054279219Sken * In particular, if object N is in the unlinked set in 1055279219Sken * the last txg to actually sync, then it could be 1056279219Sken * actually freed in a later txg and then reallocated 1057279219Sken * in a yet later txg. This would write a "create 1058279219Sken * object N" record to the intent log. Normally, this 1059279219Sken * would be fine because the spa_sync() would have 1060279219Sken * written out the fact that object N is free, before 1061279219Sken * we could write the "create object N" intent log 1062279219Sken * record. 1063279219Sken * 1064279219Sken * But when we are in ziltest mode, we advance the "open 1065279219Sken * txg" without actually spa_sync()-ing the changes to 1066279219Sken * disk. So we would see that object N is still 1067279219Sken * allocated and in the unlinked set, and there is an 1068279219Sken * intent log record saying to allocate it. 1069279219Sken */ 1070279219Sken if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { 1071279219Sken if (zil_replay_disable) { 1072279219Sken zil_destroy(zfsvfs->z_log, B_FALSE); 1073279219Sken } else { 1074279219Sken zfsvfs->z_replay = B_TRUE; 1075279219Sken zil_replay(zfsvfs->z_os, zfsvfs, 1076279219Sken zfs_replay_vector); 1077279219Sken zfsvfs->z_replay = B_FALSE; 1078279219Sken } 1079279219Sken } 1080279219Sken zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 1081279219Sken } 1082279219Sken 1083279219Sken return (0); 1084279219Sken} 1085279219Sken 1086279219Skenextern krwlock_t zfsvfs_lock; /* in zfs_znode.c */ 1087279219Sken 1088279219Skenvoid 1089279219Skenzfsvfs_free(zfsvfs_t *zfsvfs) 1090279219Sken{ 1091279219Sken int i; 1092279219Sken 1093279219Sken /* 1094279219Sken * This is a barrier to prevent the filesystem from going away in 1095279219Sken * zfs_znode_move() until we can safely ensure that the filesystem is 1096279219Sken * not unmounted. We consider the filesystem valid before the barrier 1097279219Sken * and invalid after the barrier. 1098279219Sken */ 1099279219Sken rw_enter(&zfsvfs_lock, RW_READER); 1100279219Sken rw_exit(&zfsvfs_lock); 1101279219Sken 1102279219Sken zfs_fuid_destroy(zfsvfs); 1103279219Sken 1104279219Sken mutex_destroy(&zfsvfs->z_znodes_lock); 1105279219Sken mutex_destroy(&zfsvfs->z_lock); 1106279219Sken list_destroy(&zfsvfs->z_all_znodes); 1107279219Sken rrw_destroy(&zfsvfs->z_teardown_lock); 1108279219Sken rw_destroy(&zfsvfs->z_teardown_inactive_lock); 1109279219Sken rw_destroy(&zfsvfs->z_fuid_lock); 1110279219Sken for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1111279219Sken mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1112279219Sken kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1113279219Sken} 1114279219Sken 1115279219Skenstatic void 1116279219Skenzfs_set_fuid_feature(zfsvfs_t *zfsvfs) 1117279219Sken{ 1118279219Sken zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 1119279219Sken if (zfsvfs->z_vfs) { 1120279219Sken if (zfsvfs->z_use_fuids) { 1121279219Sken vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 1122279219Sken vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 1123279219Sken vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 1124279219Sken vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 1125279219Sken vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 1126279219Sken vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE); 1127279219Sken } else { 1128279219Sken vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 1129279219Sken vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 1130279219Sken vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 1131279219Sken vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 1132279219Sken vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 1133279219Sken vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE); 1134279219Sken } 1135279219Sken } 1136279219Sken zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 1137279219Sken} 1138279219Sken 1139279219Skenstatic int 1140279219Skenzfs_domount(vfs_t *vfsp, char *osname) 1141279219Sken{ 1142279219Sken uint64_t recordsize, fsid_guid; 1143279219Sken int error = 0; 1144279219Sken zfsvfs_t *zfsvfs; 1145279219Sken vnode_t *vp; 1146279219Sken 1147279219Sken ASSERT(vfsp); 1148279219Sken ASSERT(osname); 1149279219Sken 1150279219Sken error = zfsvfs_create(osname, &zfsvfs); 1151279219Sken if (error) 1152279219Sken return (error); 1153279219Sken zfsvfs->z_vfs = vfsp; 1154279219Sken 1155279219Sken#ifdef illumos 1156279219Sken /* Initialize the generic filesystem structure. */ 1157279219Sken vfsp->vfs_bcount = 0; 1158279219Sken vfsp->vfs_data = NULL; 1159279219Sken 1160279219Sken if (zfs_create_unique_device(&mount_dev) == -1) { 1161279219Sken error = SET_ERROR(ENODEV); 1162279219Sken goto out; 1163279219Sken } 1164279219Sken ASSERT(vfs_devismounted(mount_dev) == 0); 1165279219Sken#endif 1166279219Sken 1167279219Sken if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 1168279219Sken NULL)) 1169279219Sken goto out; 1170279219Sken zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1171279219Sken zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1172279219Sken 1173279219Sken vfsp->vfs_data = zfsvfs; 1174279219Sken vfsp->mnt_flag |= MNT_LOCAL; 1175279219Sken vfsp->mnt_kern_flag |= MNTK_MPSAFE; 1176279219Sken vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1177279219Sken vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 117843629Smjacob vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; 1179279219Sken 1180279219Sken /* 118143629Smjacob * The fsid is 64 bits, composed of an 8-bit fs type, which 1182279219Sken * separates our fsid from any other filesystem types, and a 1183279219Sken * 56-bit objset unique ID. The objset unique ID is unique to 1184279219Sken * all objsets open on this system, provided by unique_create(). 1185279219Sken * The 8-bit fs type must be put in the low bits of fsid[1] 1186279219Sken * because that's where other Solaris filesystems put it. 1187279219Sken */ 1188279219Sken fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1189279219Sken ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); 1190279219Sken vfsp->vfs_fsid.val[0] = fsid_guid; 1191279219Sken vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | 1192279219Sken vfsp->mnt_vfc->vfc_typenum & 0xFF; 1193279219Sken 1194279219Sken /* 1195279219Sken * Set features for file system. 1196279219Sken */ 1197279219Sken zfs_set_fuid_feature(zfsvfs); 1198279219Sken if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 1199279219Sken vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1200279219Sken vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1201279219Sken vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 1202279219Sken } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 1203279219Sken vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1204279219Sken vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1205279219Sken } 1206279219Sken vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED); 1207279219Sken 1208279219Sken if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1209279219Sken uint64_t pval; 1210279219Sken 1211279219Sken atime_changed_cb(zfsvfs, B_FALSE); 1212279219Sken readonly_changed_cb(zfsvfs, B_TRUE); 1213279219Sken if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 1214279219Sken goto out; 1215279219Sken xattr_changed_cb(zfsvfs, pval); 1216279219Sken zfsvfs->z_issnap = B_TRUE; 121743629Smjacob zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; 121843629Smjacob 121943629Smjacob mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 122043629Smjacob dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1221279219Sken mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1222279219Sken } else { 1223279219Sken error = zfsvfs_setup(zfsvfs, B_TRUE); 1224279219Sken } 1225279219Sken 1226279219Sken vfs_mountedfrom(vfsp, osname); 1227279219Sken /* Grab extra reference. */ 1228279219Sken VERIFY(VFS_ROOT(vfsp, LK_EXCLUSIVE, &vp) == 0); 1229279219Sken VOP_UNLOCK(vp, 0); 1230279219Sken 1231279219Sken if (!zfsvfs->z_issnap) 1232279219Sken zfsctl_create(zfsvfs); 1233279219Skenout: 1234279219Sken if (error) { 1235279219Sken dmu_objset_disown(zfsvfs->z_os, zfsvfs); 1236279219Sken zfsvfs_free(zfsvfs); 1237279219Sken } else { 1238279219Sken atomic_add_32(&zfs_active_fs_count, 1); 1239279219Sken } 1240279219Sken 1241279219Sken return (error); 1242279219Sken} 1243279219Sken 1244279219Skenvoid 1245279219Skenzfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1246279219Sken{ 1247279219Sken objset_t *os = zfsvfs->z_os; 1248279219Sken struct dsl_dataset *ds; 1249279219Sken 1250279219Sken /* 1251279219Sken * Unregister properties. 1252279219Sken */ 1253279219Sken if (!dmu_objset_is_snapshot(os)) { 1254279219Sken ds = dmu_objset_ds(os); 1255279219Sken VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 1256279219Sken zfsvfs) == 0); 1257279219Sken 1258279219Sken VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 1259279219Sken zfsvfs) == 0); 1260279219Sken 1261279219Sken VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 1262279219Sken zfsvfs) == 0); 1263279219Sken 1264279219Sken VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 1265279219Sken zfsvfs) == 0); 1266279219Sken 1267279219Sken VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 1268279219Sken zfsvfs) == 0); 1269279219Sken 1270279219Sken VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 1271279219Sken zfsvfs) == 0); 1272279219Sken 1273279219Sken VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 1274279219Sken zfsvfs) == 0); 1275279219Sken 1276279219Sken VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 1277279219Sken zfsvfs) == 0); 1278279219Sken 1279279219Sken VERIFY(dsl_prop_unregister(ds, "aclinherit", 1280279219Sken acl_inherit_changed_cb, zfsvfs) == 0); 1281279219Sken 1282279219Sken VERIFY(dsl_prop_unregister(ds, "vscan", 1283279219Sken vscan_changed_cb, zfsvfs) == 0); 1284279219Sken } 1285279219Sken} 1286279219Sken 1287279219Sken#ifdef SECLABEL 1288279219Sken/* 1289279219Sken * Convert a decimal digit string to a uint64_t integer. 1290279219Sken */ 1291279219Skenstatic int 1292279219Skenstr_to_uint64(char *str, uint64_t *objnum) 1293279219Sken{ 1294279219Sken uint64_t num = 0; 1295279219Sken 1296279219Sken while (*str) { 1297279219Sken if (*str < '0' || *str > '9') 1298279219Sken return (SET_ERROR(EINVAL)); 1299279219Sken 1300279219Sken num = num*10 + *str++ - '0'; 1301279219Sken } 1302279219Sken 1303279219Sken *objnum = num; 1304279219Sken return (0); 1305279219Sken} 1306279219Sken 1307279219Sken/* 1308279219Sken * The boot path passed from the boot loader is in the form of 1309279219Sken * "rootpool-name/root-filesystem-object-number'. Convert this 1310279219Sken * string to a dataset name: "rootpool-name/root-filesystem-name". 1311279219Sken */ 1312279219Skenstatic int 1313279219Skenzfs_parse_bootfs(char *bpath, char *outpath) 1314279219Sken{ 1315279219Sken char *slashp; 1316279219Sken uint64_t objnum; 1317279219Sken int error; 1318279219Sken 1319279219Sken if (*bpath == 0 || *bpath == '/') 1320279219Sken return (SET_ERROR(EINVAL)); 1321279219Sken 1322279219Sken (void) strcpy(outpath, bpath); 1323279219Sken 1324279219Sken slashp = strchr(bpath, '/'); 1325279219Sken 1326279219Sken /* if no '/', just return the pool name */ 1327279219Sken if (slashp == NULL) { 13287913Sjoerg return (0); 13297913Sjoerg } 1330279219Sken 1331279219Sken /* if not a number, just return the root dataset name */ 1332279219Sken if (str_to_uint64(slashp+1, &objnum)) { 1333279219Sken return (0); 1334279219Sken } 1335279219Sken 1336279219Sken *slashp = '\0'; 1337279219Sken error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 1338279219Sken *slashp = '/'; 1339279219Sken 1340279219Sken return (error); 1341279219Sken} 1342279219Sken 1343279219Sken/* 1344279219Sken * zfs_check_global_label: 1345279261Sken * Check that the hex label string is appropriate for the dataset 1346279261Sken * being mounted into the global_zone proper. 1347279261Sken * 1348279261Sken * Return an error if the hex label string is not default or 1349279261Sken * admin_low/admin_high. For admin_low labels, the corresponding 1350279261Sken * dataset must be readonly. 1351279219Sken */ 1352279219Skenint 1353279219Skenzfs_check_global_label(const char *dsname, const char *hexsl) 1354279219Sken{ 1355279219Sken if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0) 1356279219Sken return (0); 1357279261Sken if (strcasecmp(hexsl, ADMIN_HIGH) == 0) 1358279261Sken return (0); 1359279261Sken if (strcasecmp(hexsl, ADMIN_LOW) == 0) { 1360279261Sken /* must be readonly */ 1361279261Sken uint64_t rdonly; 1362279261Sken 1363279219Sken if (dsl_prop_get_integer(dsname, 1364279219Sken zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL)) 1365279219Sken return (SET_ERROR(EACCES)); 1366279219Sken return (rdonly ? 0 : EACCES); 1367279219Sken } 1368279219Sken return (SET_ERROR(EACCES)); 1369279219Sken} 1370279219Sken 1371279219Sken/* 1372279219Sken * zfs_mount_label_policy: 1373279219Sken * Determine whether the mount is allowed according to MAC check. 1374279261Sken * by comparing (where appropriate) label of the dataset against 1375279261Sken * the label of the zone being mounted into. If the dataset has 1376279261Sken * no label, create one. 1377279261Sken * 1378279261Sken * Returns: 1379279219Sken * 0 : access allowed 1380279219Sken * >0 : error code, such as EACCES 1381279219Sken */ 1382279261Skenstatic int 1383279261Skenzfs_mount_label_policy(vfs_t *vfsp, char *osname) 1384279219Sken{ 1385279219Sken int error, retv; 1386279219Sken zone_t *mntzone = NULL; 1387279219Sken ts_label_t *mnt_tsl; 1388279219Sken bslabel_t *mnt_sl; 1389279219Sken bslabel_t ds_sl; 1390279219Sken char ds_hexsl[MAXNAMELEN]; 1391279219Sken 1392279219Sken retv = EACCES; /* assume the worst */ 1393279219Sken 1394279219Sken /* 1395279261Sken * Start by getting the dataset label if it exists. 1396279261Sken */ 1397279261Sken error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 1398279219Sken 1, sizeof (ds_hexsl), &ds_hexsl, NULL); 1399279219Sken if (error) 1400279219Sken return (SET_ERROR(EACCES)); 1401279219Sken 1402279219Sken /* 1403279219Sken * If labeling is NOT enabled, then disallow the mount of datasets 1404279219Sken * which have a non-default label already. No other label checks 1405279219Sken * are needed. 1406279219Sken */ 1407279219Sken if (!is_system_labeled()) { 1408279219Sken if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) 1409279219Sken return (0); 1410279219Sken return (SET_ERROR(EACCES)); 1411279219Sken } 1412279219Sken 1413279219Sken /* 1414279219Sken * Get the label of the mountpoint. If mounting into the global 1415279219Sken * zone (i.e. mountpoint is not within an active zone and the 1416280230Sken * zoned property is off), the label must be default or 1417280230Sken * admin_low/admin_high only; no other checks are needed. 1418279219Sken */ 1419279219Sken mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE); 1420279219Sken if (mntzone->zone_id == GLOBAL_ZONEID) { 1421279219Sken uint64_t zoned; 1422279219Sken 1423279219Sken zone_rele(mntzone); 1424279219Sken 1425279219Sken if (dsl_prop_get_integer(osname, 1426279219Sken zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) 1427279219Sken return (SET_ERROR(EACCES)); 1428279219Sken if (!zoned) 1429279219Sken return (zfs_check_global_label(osname, ds_hexsl)); 1430279219Sken else 1431279219Sken /* 1432279219Sken * This is the case of a zone dataset being mounted 1433279219Sken * initially, before the zone has been fully created; 1434279219Sken * allow this mount into global zone. 1435279219Sken */ 1436279219Sken return (0); 1437279219Sken } 1438279219Sken 1439279219Sken mnt_tsl = mntzone->zone_slabel; 1440279219Sken ASSERT(mnt_tsl != NULL); 1441279219Sken label_hold(mnt_tsl); 1442279219Sken mnt_sl = label2bslabel(mnt_tsl); 1443279219Sken 1444279219Sken if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) { 1445279219Sken /* 1446279219Sken * The dataset doesn't have a real label, so fabricate one. 1447279219Sken */ 1448279219Sken char *str = NULL; 1449279219Sken 1450279219Sken if (l_to_str_internal(mnt_sl, &str) == 0 && 1451279219Sken dsl_prop_set_string(osname, 1452279219Sken zfs_prop_to_name(ZFS_PROP_MLSLABEL), 1453279219Sken ZPROP_SRC_LOCAL, str) == 0) 1454279219Sken retv = 0; 1455279219Sken if (str != NULL) 1456279219Sken kmem_free(str, strlen(str) + 1); 1457279219Sken } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) { 1458279219Sken /* 1459279219Sken * Now compare labels to complete the MAC check. If the 1460279219Sken * labels are equal then allow access. If the mountpoint 1461279219Sken * label dominates the dataset label, allow readonly access. 1462279219Sken * Otherwise, access is denied. 1463279219Sken */ 1464279219Sken if (blequal(mnt_sl, &ds_sl)) 1465279219Sken retv = 0; 1466279219Sken else if (bldominates(mnt_sl, &ds_sl)) { 1467279219Sken vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 1468279219Sken retv = 0; 1469279219Sken } 1470279219Sken } 1471279219Sken 1472279219Sken label_rele(mnt_tsl); 1473279219Sken zone_rele(mntzone); 1474279219Sken return (retv); 1475279219Sken} 1476279219Sken#endif /* SECLABEL */ 1477279219Sken 1478279219Sken#ifdef OPENSOLARIS_MOUNTROOT 1479279219Skenstatic int 1480279219Skenzfs_mountroot(vfs_t *vfsp, enum whymountroot why) 1481279219Sken{ 1482279219Sken int error = 0; 1483279219Sken static int zfsrootdone = 0; 1484279219Sken zfsvfs_t *zfsvfs = NULL; 1485279219Sken znode_t *zp = NULL; 1486279219Sken vnode_t *vp = NULL; 1487279219Sken char *zfs_bootfs; 1488279219Sken char *zfs_devid; 1489279219Sken 1490279219Sken ASSERT(vfsp); 1491279219Sken 1492279219Sken /* 1493279219Sken * The filesystem that we mount as root is defined in the 1494279219Sken * boot property "zfs-bootfs" with a format of 1495279219Sken * "poolname/root-dataset-objnum". 1496279219Sken */ 1497279219Sken if (why == ROOT_INIT) { 1498279219Sken if (zfsrootdone++) 1499279219Sken return (SET_ERROR(EBUSY)); 1500279219Sken /* 1501279219Sken * the process of doing a spa_load will require the 1502279219Sken * clock to be set before we could (for example) do 1503279219Sken * something better by looking at the timestamp on 1504279219Sken * an uberblock, so just set it to -1. 1505279219Sken */ 1506279219Sken clkset(-1); 1507279219Sken 1508279219Sken if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) { 1509279219Sken cmn_err(CE_NOTE, "spa_get_bootfs: can not get " 1510279219Sken "bootfs name"); 1511279219Sken return (SET_ERROR(EINVAL)); 1512279219Sken } 1513279219Sken zfs_devid = spa_get_bootprop("diskdevid"); 1514279219Sken error = spa_import_rootpool(rootfs.bo_name, zfs_devid); 1515279219Sken if (zfs_devid) 1516279219Sken spa_free_bootprop(zfs_devid); 1517279219Sken if (error) { 1518279219Sken spa_free_bootprop(zfs_bootfs); 1519279219Sken cmn_err(CE_NOTE, "spa_import_rootpool: error %d", 1520279219Sken error); 1521279219Sken return (error); 1522279219Sken } 1523279219Sken if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) { 1524279219Sken spa_free_bootprop(zfs_bootfs); 1525279219Sken cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d", 1526279219Sken error); 1527279219Sken return (error); 1528279219Sken } 1529279219Sken 1530279219Sken spa_free_bootprop(zfs_bootfs); 1531279219Sken 1532279219Sken if (error = vfs_lock(vfsp)) 1533279219Sken return (error); 1534279219Sken 1535279219Sken if (error = zfs_domount(vfsp, rootfs.bo_name)) { 1536279219Sken cmn_err(CE_NOTE, "zfs_domount: error %d", error); 1537279219Sken goto out; 1538279219Sken } 1539279219Sken 1540279219Sken zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 1541279219Sken ASSERT(zfsvfs); 1542279219Sken if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) { 1543279219Sken cmn_err(CE_NOTE, "zfs_zget: error %d", error); 1544279219Sken goto out; 1545279219Sken } 1546279219Sken 1547279219Sken vp = ZTOV(zp); 1548279219Sken mutex_enter(&vp->v_lock); 1549279219Sken vp->v_flag |= VROOT; 1550279219Sken mutex_exit(&vp->v_lock); 1551279219Sken rootvp = vp; 1552279219Sken 1553279219Sken /* 1554279219Sken * Leave rootvp held. The root file system is never unmounted. 1555279219Sken */ 1556279219Sken 1557279219Sken vfs_add((struct vnode *)0, vfsp, 1558279219Sken (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 1559279219Skenout: 1560279219Sken vfs_unlock(vfsp); 1561279219Sken return (error); 1562279219Sken } else if (why == ROOT_REMOUNT) { 1563279219Sken readonly_changed_cb(vfsp->vfs_data, B_FALSE); 1564279219Sken vfsp->vfs_flag |= VFS_REMOUNT; 1565279219Sken 1566279219Sken /* refresh mount options */ 1567279219Sken zfs_unregister_callbacks(vfsp->vfs_data); 1568279219Sken return (zfs_register_callbacks(vfsp)); 1569279219Sken 1570279219Sken } else if (why == ROOT_UNMOUNT) { 1571279219Sken zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 1572279219Sken (void) zfs_sync(vfsp, 0, 0); 1573279219Sken return (0); 1574279219Sken } 1575279219Sken 1576279219Sken /* 1577279219Sken * if "why" is equal to anything else other than ROOT_INIT, 1578279219Sken * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 1579279219Sken */ 1580279219Sken return (SET_ERROR(ENOTSUP)); 1581279219Sken} 1582279219Sken#endif /* OPENSOLARIS_MOUNTROOT */ 1583279219Sken 1584279219Skenstatic int 1585279219Skengetpoolname(const char *osname, char *poolname) 1586279219Sken{ 1587279219Sken char *p; 1588279219Sken 1589227174Sed p = strchr(osname, '/'); 15909541Sjoerg if (p == NULL) { 15919541Sjoerg if (strlen(osname) >= MAXNAMELEN) 15929541Sjoerg return (ENAMETOOLONG); 15939541Sjoerg (void) strcpy(poolname, osname); 15949541Sjoerg } else { 15959541Sjoerg if (p - osname >= MAXNAMELEN) 15969541Sjoerg return (ENAMETOOLONG); 15979541Sjoerg (void) strncpy(poolname, osname, p - osname); 15989541Sjoerg poolname[p - osname] = '\0'; 1599 } 1600 return (0); 1601} 1602 1603/*ARGSUSED*/ 1604static int 1605zfs_mount(vfs_t *vfsp) 1606{ 1607 kthread_t *td = curthread; 1608 vnode_t *mvp = vfsp->mnt_vnodecovered; 1609 cred_t *cr = td->td_ucred; 1610 char *osname; 1611 int error = 0; 1612 int canwrite; 1613 1614#ifdef illumos 1615 if (mvp->v_type != VDIR) 1616 return (SET_ERROR(ENOTDIR)); 1617 1618 mutex_enter(&mvp->v_lock); 1619 if ((uap->flags & MS_REMOUNT) == 0 && 1620 (uap->flags & MS_OVERLAY) == 0 && 1621 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 1622 mutex_exit(&mvp->v_lock); 1623 return (SET_ERROR(EBUSY)); 1624 } 1625 mutex_exit(&mvp->v_lock); 1626 1627 /* 1628 * ZFS does not support passing unparsed data in via MS_DATA. 1629 * Users should use the MS_OPTIONSTR interface; this means 1630 * that all option parsing is already done and the options struct 1631 * can be interrogated. 1632 */ 1633 if ((uap->flags & MS_DATA) && uap->datalen > 0) 1634#else 1635 if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_ZFS)) 1636 return (SET_ERROR(EPERM)); 1637 1638 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1639 return (SET_ERROR(EINVAL)); 1640#endif /* ! illumos */ 1641 1642 /* 1643 * If full-owner-access is enabled and delegated administration is 1644 * turned on, we must set nosuid. 1645 */ 1646 if (zfs_super_owner && 1647 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1648 secpolicy_fs_mount_clearopts(cr, vfsp); 1649 } 1650 1651 /* 1652 * Check for mount privilege? 1653 * 1654 * If we don't have privilege then see if 1655 * we have local permission to allow it 1656 */ 1657 error = secpolicy_fs_mount(cr, mvp, vfsp); 1658 if (error) { 1659 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 1660 goto out; 1661 1662 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1663 vattr_t vattr; 1664 1665 /* 1666 * Make sure user is the owner of the mount point 1667 * or has sufficient privileges. 1668 */ 1669 1670 vattr.va_mask = AT_UID; 1671 1672 vn_lock(mvp, LK_SHARED | LK_RETRY); 1673 if (VOP_GETATTR(mvp, &vattr, cr)) { 1674 VOP_UNLOCK(mvp, 0); 1675 goto out; 1676 } 1677 1678 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1679 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1680 VOP_UNLOCK(mvp, 0); 1681 goto out; 1682 } 1683 VOP_UNLOCK(mvp, 0); 1684 } 1685 1686 secpolicy_fs_mount_clearopts(cr, vfsp); 1687 } 1688 1689 /* 1690 * Refuse to mount a filesystem if we are in a local zone and the 1691 * dataset is not visible. 1692 */ 1693 if (!INGLOBALZONE(curthread) && 1694 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1695 error = SET_ERROR(EPERM); 1696 goto out; 1697 } 1698 1699#ifdef SECLABEL 1700 error = zfs_mount_label_policy(vfsp, osname); 1701 if (error) 1702 goto out; 1703#endif 1704 1705 vfsp->vfs_flag |= MNT_NFS4ACLS; 1706 1707 /* 1708 * When doing a remount, we simply refresh our temporary properties 1709 * according to those options set in the current VFS options. 1710 */ 1711 if (vfsp->vfs_flag & MS_REMOUNT) { 1712 /* refresh mount options */ 1713 zfs_unregister_callbacks(vfsp->vfs_data); 1714 error = zfs_register_callbacks(vfsp); 1715 goto out; 1716 } 1717 1718 /* Initial root mount: try hard to import the requested root pool. */ 1719 if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 && 1720 (vfsp->vfs_flag & MNT_UPDATE) == 0) { 1721 char pname[MAXNAMELEN]; 1722 1723 error = getpoolname(osname, pname); 1724 if (error == 0) 1725 error = spa_import_rootpool(pname); 1726 if (error) 1727 goto out; 1728 } 1729 DROP_GIANT(); 1730 error = zfs_domount(vfsp, osname); 1731 PICKUP_GIANT(); 1732 1733#ifdef sun 1734 /* 1735 * Add an extra VFS_HOLD on our parent vfs so that it can't 1736 * disappear due to a forced unmount. 1737 */ 1738 if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) 1739 VFS_HOLD(mvp->v_vfsp); 1740#endif /* sun */ 1741 1742out: 1743 return (error); 1744} 1745 1746static int 1747zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1748{ 1749 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1750 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1751 1752 statp->f_version = STATFS_VERSION; 1753 1754 ZFS_ENTER(zfsvfs); 1755 1756 dmu_objset_space(zfsvfs->z_os, 1757 &refdbytes, &availbytes, &usedobjs, &availobjs); 1758 1759 /* 1760 * The underlying storage pool actually uses multiple block sizes. 1761 * We report the fragsize as the smallest block size we support, 1762 * and we report our blocksize as the filesystem's maximum blocksize. 1763 */ 1764 statp->f_bsize = SPA_MINBLOCKSIZE; 1765 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1766 1767 /* 1768 * The following report "total" blocks of various kinds in the 1769 * file system, but reported in terms of f_frsize - the 1770 * "fragment" size. 1771 */ 1772 1773 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1774 statp->f_bfree = availbytes / statp->f_bsize; 1775 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1776 1777 /* 1778 * statvfs() should really be called statufs(), because it assumes 1779 * static metadata. ZFS doesn't preallocate files, so the best 1780 * we can do is report the max that could possibly fit in f_files, 1781 * and that minus the number actually used in f_ffree. 1782 * For f_ffree, report the smaller of the number of object available 1783 * and the number of blocks (each object will take at least a block). 1784 */ 1785 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1786 statp->f_files = statp->f_ffree + usedobjs; 1787 1788 /* 1789 * We're a zfs filesystem. 1790 */ 1791 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 1792 1793 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1794 sizeof(statp->f_mntfromname)); 1795 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1796 sizeof(statp->f_mntonname)); 1797 1798 statp->f_namemax = ZFS_MAXNAMELEN; 1799 1800 ZFS_EXIT(zfsvfs); 1801 return (0); 1802} 1803 1804int 1805zfs_vnode_lock(vnode_t *vp, int flags) 1806{ 1807 int error; 1808 1809 ASSERT(vp != NULL); 1810 1811 error = vn_lock(vp, flags); 1812 return (error); 1813} 1814 1815static int 1816zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1817{ 1818 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1819 znode_t *rootzp; 1820 int error; 1821 1822 ZFS_ENTER_NOERROR(zfsvfs); 1823 1824 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1825 if (error == 0) 1826 *vpp = ZTOV(rootzp); 1827 1828 ZFS_EXIT(zfsvfs); 1829 1830 if (error == 0) { 1831 error = zfs_vnode_lock(*vpp, flags); 1832 if (error == 0) 1833 (*vpp)->v_vflag |= VV_ROOT; 1834 } 1835 if (error != 0) 1836 *vpp = NULL; 1837 1838 return (error); 1839} 1840 1841/* 1842 * Teardown the zfsvfs::z_os. 1843 * 1844 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 1845 * and 'z_teardown_inactive_lock' held. 1846 */ 1847static int 1848zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1849{ 1850 znode_t *zp; 1851 1852 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1853 1854 if (!unmounting) { 1855 /* 1856 * We purge the parent filesystem's vfsp as the parent 1857 * filesystem and all of its snapshots have their vnode's 1858 * v_vfsp set to the parent's filesystem's vfsp. Note, 1859 * 'z_parent' is self referential for non-snapshots. 1860 */ 1861 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1862#ifdef FREEBSD_NAMECACHE 1863 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1864#endif 1865 } 1866 1867 /* 1868 * Close the zil. NB: Can't close the zil while zfs_inactive 1869 * threads are blocked as zil_close can call zfs_inactive. 1870 */ 1871 if (zfsvfs->z_log) { 1872 zil_close(zfsvfs->z_log); 1873 zfsvfs->z_log = NULL; 1874 } 1875 1876 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 1877 1878 /* 1879 * If we are not unmounting (ie: online recv) and someone already 1880 * unmounted this file system while we were doing the switcheroo, 1881 * or a reopen of z_os failed then just bail out now. 1882 */ 1883 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1884 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1885 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1886 return (SET_ERROR(EIO)); 1887 } 1888 1889 /* 1890 * At this point there are no vops active, and any new vops will 1891 * fail with EIO since we have z_teardown_lock for writer (only 1892 * relavent for forced unmount). 1893 * 1894 * Release all holds on dbufs. 1895 */ 1896 mutex_enter(&zfsvfs->z_znodes_lock); 1897 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1898 zp = list_next(&zfsvfs->z_all_znodes, zp)) 1899 if (zp->z_sa_hdl) { 1900 ASSERT(ZTOV(zp)->v_count >= 0); 1901 zfs_znode_dmu_fini(zp); 1902 } 1903 mutex_exit(&zfsvfs->z_znodes_lock); 1904 1905 /* 1906 * If we are unmounting, set the unmounted flag and let new vops 1907 * unblock. zfs_inactive will have the unmounted behavior, and all 1908 * other vops will fail with EIO. 1909 */ 1910 if (unmounting) { 1911 zfsvfs->z_unmounted = B_TRUE; 1912 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1913 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1914 } 1915 1916 /* 1917 * z_os will be NULL if there was an error in attempting to reopen 1918 * zfsvfs, so just return as the properties had already been 1919 * unregistered and cached data had been evicted before. 1920 */ 1921 if (zfsvfs->z_os == NULL) 1922 return (0); 1923 1924 /* 1925 * Unregister properties. 1926 */ 1927 zfs_unregister_callbacks(zfsvfs); 1928 1929 /* 1930 * Evict cached data 1931 */ 1932 if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) && 1933 !(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)) 1934 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1935 dmu_objset_evict_dbufs(zfsvfs->z_os); 1936 1937 return (0); 1938} 1939 1940/*ARGSUSED*/ 1941static int 1942zfs_umount(vfs_t *vfsp, int fflag) 1943{ 1944 kthread_t *td = curthread; 1945 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1946 objset_t *os; 1947 cred_t *cr = td->td_ucred; 1948 int ret; 1949 1950 ret = secpolicy_fs_unmount(cr, vfsp); 1951 if (ret) { 1952 if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1953 ZFS_DELEG_PERM_MOUNT, cr)) 1954 return (ret); 1955 } 1956 1957 /* 1958 * We purge the parent filesystem's vfsp as the parent filesystem 1959 * and all of its snapshots have their vnode's v_vfsp set to the 1960 * parent's filesystem's vfsp. Note, 'z_parent' is self 1961 * referential for non-snapshots. 1962 */ 1963 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1964 1965 /* 1966 * Unmount any snapshots mounted under .zfs before unmounting the 1967 * dataset itself. 1968 */ 1969 if (zfsvfs->z_ctldir != NULL) { 1970 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1971 return (ret); 1972 ret = vflush(vfsp, 0, 0, td); 1973 ASSERT(ret == EBUSY); 1974 if (!(fflag & MS_FORCE)) { 1975 if (zfsvfs->z_ctldir->v_count > 1) 1976 return (EBUSY); 1977 ASSERT(zfsvfs->z_ctldir->v_count == 1); 1978 } 1979 zfsctl_destroy(zfsvfs); 1980 ASSERT(zfsvfs->z_ctldir == NULL); 1981 } 1982 1983 if (fflag & MS_FORCE) { 1984 /* 1985 * Mark file system as unmounted before calling 1986 * vflush(FORCECLOSE). This way we ensure no future vnops 1987 * will be called and risk operating on DOOMED vnodes. 1988 */ 1989 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1990 zfsvfs->z_unmounted = B_TRUE; 1991 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1992 } 1993 1994 /* 1995 * Flush all the files. 1996 */ 1997 ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1998 if (ret != 0) { 1999 if (!zfsvfs->z_issnap) { 2000 zfsctl_create(zfsvfs); 2001 ASSERT(zfsvfs->z_ctldir != NULL); 2002 } 2003 return (ret); 2004 } 2005 2006 if (!(fflag & MS_FORCE)) { 2007 /* 2008 * Check the number of active vnodes in the file system. 2009 * Our count is maintained in the vfs structure, but the 2010 * number is off by 1 to indicate a hold on the vfs 2011 * structure itself. 2012 * 2013 * The '.zfs' directory maintains a reference of its 2014 * own, and any active references underneath are 2015 * reflected in the vnode count. 2016 */ 2017 if (zfsvfs->z_ctldir == NULL) { 2018 if (vfsp->vfs_count > 1) 2019 return (SET_ERROR(EBUSY)); 2020 } else { 2021 if (vfsp->vfs_count > 2 || 2022 zfsvfs->z_ctldir->v_count > 1) 2023 return (SET_ERROR(EBUSY)); 2024 } 2025 } 2026 2027 VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 2028 os = zfsvfs->z_os; 2029 2030 /* 2031 * z_os will be NULL if there was an error in 2032 * attempting to reopen zfsvfs. 2033 */ 2034 if (os != NULL) { 2035 /* 2036 * Unset the objset user_ptr. 2037 */ 2038 mutex_enter(&os->os_user_ptr_lock); 2039 dmu_objset_set_user(os, NULL); 2040 mutex_exit(&os->os_user_ptr_lock); 2041 2042 /* 2043 * Finally release the objset 2044 */ 2045 dmu_objset_disown(os, zfsvfs); 2046 } 2047 2048 /* 2049 * We can now safely destroy the '.zfs' directory node. 2050 */ 2051 if (zfsvfs->z_ctldir != NULL) 2052 zfsctl_destroy(zfsvfs); 2053 if (zfsvfs->z_issnap) { 2054 vnode_t *svp = vfsp->mnt_vnodecovered; 2055 2056 if (svp->v_count >= 2) 2057 VN_RELE(svp); 2058 } 2059 zfs_freevfs(vfsp); 2060 2061 return (0); 2062} 2063 2064static int 2065zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 2066{ 2067 zfsvfs_t *zfsvfs = vfsp->vfs_data; 2068 znode_t *zp; 2069 int err; 2070 2071 /* 2072 * zfs_zget() can't operate on virtual entries like .zfs/ or 2073 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 2074 * This will make NFS to switch to LOOKUP instead of using VGET. 2075 */ 2076 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR || 2077 (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir)) 2078 return (EOPNOTSUPP); 2079 2080 ZFS_ENTER(zfsvfs); 2081 err = zfs_zget(zfsvfs, ino, &zp); 2082 if (err == 0 && zp->z_unlinked) { 2083 VN_RELE(ZTOV(zp)); 2084 err = EINVAL; 2085 } 2086 if (err == 0) 2087 *vpp = ZTOV(zp); 2088 ZFS_EXIT(zfsvfs); 2089 if (err == 0) 2090 err = zfs_vnode_lock(*vpp, flags); 2091 if (err != 0) 2092 *vpp = NULL; 2093 else 2094 (*vpp)->v_hash = ino; 2095 return (err); 2096} 2097 2098static int 2099zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 2100 struct ucred **credanonp, int *numsecflavors, int **secflavors) 2101{ 2102 zfsvfs_t *zfsvfs = vfsp->vfs_data; 2103 2104 /* 2105 * If this is regular file system vfsp is the same as 2106 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 2107 * zfsvfs->z_parent->z_vfs represents parent file system 2108 * which we have to use here, because only this file system 2109 * has mnt_export configured. 2110 */ 2111 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 2112 credanonp, numsecflavors, secflavors)); 2113} 2114 2115CTASSERT(SHORT_FID_LEN <= sizeof(struct fid)); 2116CTASSERT(LONG_FID_LEN <= sizeof(struct fid)); 2117 2118static int 2119zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) 2120{ 2121 zfsvfs_t *zfsvfs = vfsp->vfs_data; 2122 znode_t *zp; 2123 uint64_t object = 0; 2124 uint64_t fid_gen = 0; 2125 uint64_t gen_mask; 2126 uint64_t zp_gen; 2127 int i, err; 2128 2129 *vpp = NULL; 2130 2131 ZFS_ENTER(zfsvfs); 2132 2133 /* 2134 * On FreeBSD we can get snapshot's mount point or its parent file 2135 * system mount point depending if snapshot is already mounted or not. 2136 */ 2137 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 2138 zfid_long_t *zlfid = (zfid_long_t *)fidp; 2139 uint64_t objsetid = 0; 2140 uint64_t setgen = 0; 2141 2142 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 2143 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 2144 2145 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 2146 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 2147 2148 ZFS_EXIT(zfsvfs); 2149 2150 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 2151 if (err) 2152 return (SET_ERROR(EINVAL)); 2153 ZFS_ENTER(zfsvfs); 2154 } 2155 2156 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 2157 zfid_short_t *zfid = (zfid_short_t *)fidp; 2158 2159 for (i = 0; i < sizeof (zfid->zf_object); i++) 2160 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 2161 2162 for (i = 0; i < sizeof (zfid->zf_gen); i++) 2163 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 2164 } else { 2165 ZFS_EXIT(zfsvfs); 2166 return (SET_ERROR(EINVAL)); 2167 } 2168 2169 /* 2170 * A zero fid_gen means we are in .zfs or the .zfs/snapshot 2171 * directory tree. If the object == zfsvfs->z_shares_dir, then 2172 * we are in the .zfs/shares directory tree. 2173 */ 2174 if ((fid_gen == 0 && 2175 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) || 2176 (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) { 2177 *vpp = zfsvfs->z_ctldir; 2178 ASSERT(*vpp != NULL); 2179 if (object == ZFSCTL_INO_SNAPDIR) { 2180 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 2181 0, NULL, NULL, NULL, NULL, NULL) == 0); 2182 } else if (object == zfsvfs->z_shares_dir) { 2183 VERIFY(zfsctl_root_lookup(*vpp, "shares", vpp, NULL, 2184 0, NULL, NULL, NULL, NULL, NULL) == 0); 2185 } else { 2186 VN_HOLD(*vpp); 2187 } 2188 ZFS_EXIT(zfsvfs); 2189 err = zfs_vnode_lock(*vpp, flags); 2190 if (err != 0) 2191 *vpp = NULL; 2192 return (err); 2193 } 2194 2195 gen_mask = -1ULL >> (64 - 8 * i); 2196 2197 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 2198 if (err = zfs_zget(zfsvfs, object, &zp)) { 2199 ZFS_EXIT(zfsvfs); 2200 return (err); 2201 } 2202 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, 2203 sizeof (uint64_t)); 2204 zp_gen = zp_gen & gen_mask; 2205 if (zp_gen == 0) 2206 zp_gen = 1; 2207 if (zp->z_unlinked || zp_gen != fid_gen) { 2208 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 2209 VN_RELE(ZTOV(zp)); 2210 ZFS_EXIT(zfsvfs); 2211 return (SET_ERROR(EINVAL)); 2212 } 2213 2214 *vpp = ZTOV(zp); 2215 ZFS_EXIT(zfsvfs); 2216 err = zfs_vnode_lock(*vpp, flags | LK_RETRY); 2217 if (err == 0) 2218 vnode_create_vobject(*vpp, zp->z_size, curthread); 2219 else 2220 *vpp = NULL; 2221 return (err); 2222} 2223 2224/* 2225 * Block out VOPs and close zfsvfs_t::z_os 2226 * 2227 * Note, if successful, then we return with the 'z_teardown_lock' and 2228 * 'z_teardown_inactive_lock' write held. 2229 */ 2230int 2231zfs_suspend_fs(zfsvfs_t *zfsvfs) 2232{ 2233 int error; 2234 2235 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 2236 return (error); 2237 dmu_objset_disown(zfsvfs->z_os, zfsvfs); 2238 2239 return (0); 2240} 2241 2242/* 2243 * Reopen zfsvfs_t::z_os and release VOPs. 2244 */ 2245int 2246zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) 2247{ 2248 int err; 2249 2250 ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 2251 ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 2252 2253 err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs, 2254 &zfsvfs->z_os); 2255 if (err) { 2256 zfsvfs->z_os = NULL; 2257 } else { 2258 znode_t *zp; 2259 uint64_t sa_obj = 0; 2260 2261 /* 2262 * Make sure version hasn't changed 2263 */ 2264 2265 err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION, 2266 &zfsvfs->z_version); 2267 2268 if (err) 2269 goto bail; 2270 2271 err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ, 2272 ZFS_SA_ATTRS, 8, 1, &sa_obj); 2273 2274 if (err && zfsvfs->z_version >= ZPL_VERSION_SA) 2275 goto bail; 2276 2277 if ((err = sa_setup(zfsvfs->z_os, sa_obj, 2278 zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table)) != 0) 2279 goto bail; 2280 2281 if (zfsvfs->z_version >= ZPL_VERSION_SA) 2282 sa_register_update_callback(zfsvfs->z_os, 2283 zfs_sa_upgrade); 2284 2285 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 2286 2287 zfs_set_fuid_feature(zfsvfs); 2288 2289 /* 2290 * Attempt to re-establish all the active znodes with 2291 * their dbufs. If a zfs_rezget() fails, then we'll let 2292 * any potential callers discover that via ZFS_ENTER_VERIFY_VP 2293 * when they try to use their znode. 2294 */ 2295 mutex_enter(&zfsvfs->z_znodes_lock); 2296 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 2297 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 2298 (void) zfs_rezget(zp); 2299 } 2300 mutex_exit(&zfsvfs->z_znodes_lock); 2301 } 2302 2303bail: 2304 /* release the VOPs */ 2305 rw_exit(&zfsvfs->z_teardown_inactive_lock); 2306 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 2307 2308 if (err) { 2309 /* 2310 * Since we couldn't reopen zfsvfs::z_os, or 2311 * setup the sa framework force unmount this file system. 2312 */ 2313 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 2314 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 2315 } 2316 return (err); 2317} 2318 2319static void 2320zfs_freevfs(vfs_t *vfsp) 2321{ 2322 zfsvfs_t *zfsvfs = vfsp->vfs_data; 2323 2324#ifdef sun 2325 /* 2326 * If this is a snapshot, we have an extra VFS_HOLD on our parent 2327 * from zfs_mount(). Release it here. If we came through 2328 * zfs_mountroot() instead, we didn't grab an extra hold, so 2329 * skip the VFS_RELE for rootvfs. 2330 */ 2331 if (zfsvfs->z_issnap && (vfsp != rootvfs)) 2332 VFS_RELE(zfsvfs->z_parent->z_vfs); 2333#endif /* sun */ 2334 2335 zfsvfs_free(zfsvfs); 2336 2337 atomic_add_32(&zfs_active_fs_count, -1); 2338} 2339 2340#ifdef __i386__ 2341static int desiredvnodes_backup; 2342#endif 2343 2344static void 2345zfs_vnodes_adjust(void) 2346{ 2347#ifdef __i386__ 2348 int newdesiredvnodes; 2349 2350 desiredvnodes_backup = desiredvnodes; 2351 2352 /* 2353 * We calculate newdesiredvnodes the same way it is done in 2354 * vntblinit(). If it is equal to desiredvnodes, it means that 2355 * it wasn't tuned by the administrator and we can tune it down. 2356 */ 2357 newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * 2358 vm_kmem_size / (5 * (sizeof(struct vm_object) + 2359 sizeof(struct vnode)))); 2360 if (newdesiredvnodes == desiredvnodes) 2361 desiredvnodes = (3 * newdesiredvnodes) / 4; 2362#endif 2363} 2364 2365static void 2366zfs_vnodes_adjust_back(void) 2367{ 2368 2369#ifdef __i386__ 2370 desiredvnodes = desiredvnodes_backup; 2371#endif 2372} 2373 2374void 2375zfs_init(void) 2376{ 2377 2378 printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n"); 2379 2380 /* 2381 * Initialize .zfs directory structures 2382 */ 2383 zfsctl_init(); 2384 2385 /* 2386 * Initialize znode cache, vnode ops, etc... 2387 */ 2388 zfs_znode_init(); 2389 2390 /* 2391 * Reduce number of vnodes. Originally number of vnodes is calculated 2392 * with UFS inode in mind. We reduce it here, because it's too big for 2393 * ZFS/i386. 2394 */ 2395 zfs_vnodes_adjust(); 2396 2397 dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); 2398} 2399 2400void 2401zfs_fini(void) 2402{ 2403 zfsctl_fini(); 2404 zfs_znode_fini(); 2405 zfs_vnodes_adjust_back(); 2406} 2407 2408int 2409zfs_busy(void) 2410{ 2411 return (zfs_active_fs_count != 0); 2412} 2413 2414int 2415zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 2416{ 2417 int error; 2418 objset_t *os = zfsvfs->z_os; 2419 dmu_tx_t *tx; 2420 2421 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 2422 return (SET_ERROR(EINVAL)); 2423 2424 if (newvers < zfsvfs->z_version) 2425 return (SET_ERROR(EINVAL)); 2426 2427 if (zfs_spa_version_map(newvers) > 2428 spa_version(dmu_objset_spa(zfsvfs->z_os))) 2429 return (SET_ERROR(ENOTSUP)); 2430 2431 tx = dmu_tx_create(os); 2432 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 2433 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2434 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 2435 ZFS_SA_ATTRS); 2436 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2437 } 2438 error = dmu_tx_assign(tx, TXG_WAIT); 2439 if (error) { 2440 dmu_tx_abort(tx); 2441 return (error); 2442 } 2443 2444 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 2445 8, 1, &newvers, tx); 2446 2447 if (error) { 2448 dmu_tx_commit(tx); 2449 return (error); 2450 } 2451 2452 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2453 uint64_t sa_obj; 2454 2455 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, 2456 SPA_VERSION_SA); 2457 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 2458 DMU_OT_NONE, 0, tx); 2459 2460 error = zap_add(os, MASTER_NODE_OBJ, 2461 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 2462 ASSERT0(error); 2463 2464 VERIFY(0 == sa_set_sa_object(os, sa_obj)); 2465 sa_register_update_callback(os, zfs_sa_upgrade); 2466 } 2467 2468 spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, 2469 "from %llu to %llu", zfsvfs->z_version, newvers); 2470 2471 dmu_tx_commit(tx); 2472 2473 zfsvfs->z_version = newvers; 2474 2475 zfs_set_fuid_feature(zfsvfs); 2476 2477 return (0); 2478} 2479 2480/* 2481 * Read a property stored within the master node. 2482 */ 2483int 2484zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 2485{ 2486 const char *pname; 2487 int error = ENOENT; 2488 2489 /* 2490 * Look up the file system's value for the property. For the 2491 * version property, we look up a slightly different string. 2492 */ 2493 if (prop == ZFS_PROP_VERSION) 2494 pname = ZPL_VERSION_STR; 2495 else 2496 pname = zfs_prop_to_name(prop); 2497 2498 if (os != NULL) 2499 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 2500 2501 if (error == ENOENT) { 2502 /* No value set, use the default value */ 2503 switch (prop) { 2504 case ZFS_PROP_VERSION: 2505 *value = ZPL_VERSION; 2506 break; 2507 case ZFS_PROP_NORMALIZE: 2508 case ZFS_PROP_UTF8ONLY: 2509 *value = 0; 2510 break; 2511 case ZFS_PROP_CASE: 2512 *value = ZFS_CASE_SENSITIVE; 2513 break; 2514 default: 2515 return (error); 2516 } 2517 error = 0; 2518 } 2519 return (error); 2520} 2521 2522#ifdef _KERNEL 2523void 2524zfsvfs_update_fromname(const char *oldname, const char *newname) 2525{ 2526 char tmpbuf[MAXPATHLEN]; 2527 struct mount *mp; 2528 char *fromname; 2529 size_t oldlen; 2530 2531 oldlen = strlen(oldname); 2532 2533 mtx_lock(&mountlist_mtx); 2534 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 2535 fromname = mp->mnt_stat.f_mntfromname; 2536 if (strcmp(fromname, oldname) == 0) { 2537 (void)strlcpy(fromname, newname, 2538 sizeof(mp->mnt_stat.f_mntfromname)); 2539 continue; 2540 } 2541 if (strncmp(fromname, oldname, oldlen) == 0 && 2542 (fromname[oldlen] == '/' || fromname[oldlen] == '@')) { 2543 (void)snprintf(tmpbuf, sizeof(tmpbuf), "%s%s", 2544 newname, fromname + oldlen); 2545 (void)strlcpy(fromname, tmpbuf, 2546 sizeof(mp->mnt_stat.f_mntfromname)); 2547 continue; 2548 } 2549 } 2550 mtx_unlock(&mountlist_mtx); 2551} 2552#endif 2553