1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23226707Spjd * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 24226707Spjd * All rights reserved. 25303969Savg * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 26296519Smav * Copyright (c) 2014 Integros [integros.com] 27331384Smav * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 28168404Spjd */ 29168404Spjd 30219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 31219089Spjd 32168404Spjd#include <sys/types.h> 33168404Spjd#include <sys/param.h> 34168404Spjd#include <sys/systm.h> 35168404Spjd#include <sys/kernel.h> 36168404Spjd#include <sys/sysmacros.h> 37168404Spjd#include <sys/kmem.h> 38168404Spjd#include <sys/acl.h> 39168404Spjd#include <sys/vnode.h> 40168404Spjd#include <sys/vfs.h> 41168404Spjd#include <sys/mntent.h> 42168404Spjd#include <sys/mount.h> 43168404Spjd#include <sys/cmn_err.h> 44168404Spjd#include <sys/zfs_znode.h> 45168404Spjd#include <sys/zfs_dir.h> 46168404Spjd#include <sys/zil.h> 47168404Spjd#include <sys/fs/zfs.h> 48168404Spjd#include <sys/dmu.h> 49168404Spjd#include <sys/dsl_prop.h> 50168404Spjd#include <sys/dsl_dataset.h> 51185029Spjd#include <sys/dsl_deleg.h> 52168404Spjd#include <sys/spa.h> 53168404Spjd#include <sys/zap.h> 54219089Spjd#include <sys/sa.h> 55240955Smm#include <sys/sa_impl.h> 56168404Spjd#include <sys/varargs.h> 57168962Spjd#include <sys/policy.h> 58168404Spjd#include <sys/atomic.h> 59168404Spjd#include <sys/zfs_ioctl.h> 60168404Spjd#include <sys/zfs_ctldir.h> 61185029Spjd#include <sys/zfs_fuid.h> 62168962Spjd#include <sys/sunddi.h> 63168404Spjd#include <sys/dnlc.h> 64185029Spjd#include <sys/dmu_objset.h> 65185029Spjd#include <sys/spa_boot.h> 66232186Smm#include <sys/jail.h> 67339008Ssef#include <ufs/ufs/quota.h> 68339008Ssef 69219089Spjd#include "zfs_comutil.h" 70168404Spjd 71168404Spjdstruct mtx zfs_debug_mtx; 72168404SpjdMTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 73185029Spjd 74168404SpjdSYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 75185029Spjd 76219089Spjdint zfs_super_owner; 77185029SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 78185029Spjd "File system owner can perform privileged operation on his file systems"); 79185029Spjd 80219089Spjdint zfs_debug_level; 81267992ShselaskySYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, 82168404Spjd "Debug level"); 83168404Spjd 84185029SpjdSYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 85185029Spjdstatic int zfs_version_acl = ZFS_ACL_VERSION; 86185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 87185029Spjd "ZFS_ACL_VERSION"); 88185029Spjdstatic int zfs_version_spa = SPA_VERSION; 89185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 90185029Spjd "SPA_VERSION"); 91185029Spjdstatic int zfs_version_zpl = ZPL_VERSION; 92185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 93185029Spjd "ZPL_VERSION"); 94185029Spjd 95339008Ssefstatic int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg); 96191990Sattiliostatic int zfs_mount(vfs_t *vfsp); 97191990Sattiliostatic int zfs_umount(vfs_t *vfsp, int fflag); 98191990Sattiliostatic int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 99191990Sattiliostatic int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 100168404Spjdstatic int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 101191990Sattiliostatic int zfs_sync(vfs_t *vfsp, int waitfor); 102196982Spjdstatic int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 103196982Spjd struct ucred **credanonp, int *numsecflavors, int **secflavors); 104222167Srmacklemstatic int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); 105168404Spjdstatic void zfs_objset_close(zfsvfs_t *zfsvfs); 106168404Spjdstatic void zfs_freevfs(vfs_t *vfsp); 107168404Spjd 108301997Skibstruct vfsops zfs_vfsops = { 109168404Spjd .vfs_mount = zfs_mount, 110168404Spjd .vfs_unmount = zfs_umount, 111168404Spjd .vfs_root = zfs_root, 112168404Spjd .vfs_statfs = zfs_statfs, 113168404Spjd .vfs_vget = zfs_vget, 114168404Spjd .vfs_sync = zfs_sync, 115196982Spjd .vfs_checkexp = zfs_checkexp, 116168404Spjd .vfs_fhtovp = zfs_fhtovp, 117339008Ssef .vfs_quotactl = zfs_quotactl, 118168404Spjd}; 119168404Spjd 120185029SpjdVFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 121168404Spjd 122168404Spjd/* 123168404Spjd * We need to keep a count of active fs's. 124168404Spjd * This is necessary to prevent our module 125168404Spjd * from being unloaded after a umount -f 126168404Spjd */ 127168404Spjdstatic uint32_t zfs_active_fs_count = 0; 128168404Spjd 129339008Ssefstatic int 130339008Ssefzfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp) 131339008Ssef{ 132339008Ssef int error = 0; 133339008Ssef char buf[32]; 134339008Ssef int err; 135339008Ssef uint64_t usedobj, quotaobj; 136339008Ssef uint64_t quota, used = 0; 137339008Ssef timespec_t now; 138339008Ssef 139339008Ssef usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 140339008Ssef quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 141339008Ssef 142339008Ssef if (quotaobj == 0 || zfsvfs->z_replay) { 143343624Ssef error = EINVAL; 144339008Ssef goto done; 145339008Ssef } 146339008Ssef (void)sprintf(buf, "%llx", (longlong_t)id); 147339008Ssef if ((error = zap_lookup(zfsvfs->z_os, quotaobj, 148339008Ssef buf, sizeof(quota), 1, "a)) != 0) { 149339008Ssef dprintf("%s(%d): quotaobj lookup failed\n", __FUNCTION__, __LINE__); 150339008Ssef goto done; 151339008Ssef } 152339008Ssef /* 153339008Ssef * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit". 154339008Ssef * So we set them to be the same. 155339008Ssef */ 156339008Ssef dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota); 157339008Ssef error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof(used), 1, &used); 158339008Ssef if (error && error != ENOENT) { 159339008Ssef dprintf("%s(%d): usedobj failed; %d\n", __FUNCTION__, __LINE__, error); 160339008Ssef goto done; 161339008Ssef } 162339008Ssef dqp->dqb_curblocks = btodb(used); 163339008Ssef dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0; 164339008Ssef vfs_timestamp(&now); 165339008Ssef /* 166339008Ssef * Setting this to 0 causes FreeBSD quota(8) to print 167339008Ssef * the number of days since the epoch, which isn't 168339008Ssef * particularly useful. 169339008Ssef */ 170339008Ssef dqp->dqb_btime = dqp->dqb_itime = now.tv_sec; 171339008Ssefdone: 172339008Ssef return (error); 173339008Ssef} 174339008Ssef 175339008Ssefstatic int 176339008Ssefzfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg) 177339008Ssef{ 178339008Ssef zfsvfs_t *zfsvfs = vfsp->vfs_data; 179339008Ssef struct thread *td; 180339008Ssef int cmd, type, error = 0; 181339008Ssef int bitsize; 182339008Ssef uint64_t fuid; 183339008Ssef zfs_userquota_prop_t quota_type; 184339008Ssef struct dqblk64 dqblk = { 0 }; 185339008Ssef 186339008Ssef td = curthread; 187339008Ssef cmd = cmds >> SUBCMDSHIFT; 188339008Ssef type = cmds & SUBCMDMASK; 189339008Ssef 190339008Ssef ZFS_ENTER(zfsvfs); 191339008Ssef if (id == -1) { 192339008Ssef switch (type) { 193339008Ssef case USRQUOTA: 194339008Ssef id = td->td_ucred->cr_ruid; 195339008Ssef break; 196339008Ssef case GRPQUOTA: 197339008Ssef id = td->td_ucred->cr_rgid; 198339008Ssef break; 199339008Ssef default: 200339008Ssef error = EINVAL; 201339008Ssef if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF) 202339008Ssef vfs_unbusy(vfsp); 203339008Ssef goto done; 204339008Ssef } 205339008Ssef } 206339008Ssef /* 207339008Ssef * Map BSD type to: 208339008Ssef * ZFS_PROP_USERUSED, 209339008Ssef * ZFS_PROP_USERQUOTA, 210339008Ssef * ZFS_PROP_GROUPUSED, 211339008Ssef * ZFS_PROP_GROUPQUOTA 212339008Ssef */ 213339008Ssef switch (cmd) { 214339008Ssef case Q_SETQUOTA: 215339008Ssef case Q_SETQUOTA32: 216339008Ssef if (type == USRQUOTA) 217339008Ssef quota_type = ZFS_PROP_USERQUOTA; 218339008Ssef else if (type == GRPQUOTA) 219339008Ssef quota_type = ZFS_PROP_GROUPQUOTA; 220339008Ssef else 221339008Ssef error = EINVAL; 222339008Ssef break; 223339008Ssef case Q_GETQUOTA: 224339008Ssef case Q_GETQUOTA32: 225339008Ssef if (type == USRQUOTA) 226339008Ssef quota_type = ZFS_PROP_USERUSED; 227339008Ssef else if (type == GRPQUOTA) 228339008Ssef quota_type = ZFS_PROP_GROUPUSED; 229339008Ssef else 230339008Ssef error = EINVAL; 231339008Ssef break; 232339008Ssef } 233339008Ssef 234339008Ssef /* 235339008Ssef * Depending on the cmd, we may need to get 236339008Ssef * the ruid and domain (see fuidstr_to_sid?), 237339008Ssef * the fuid (how?), or other information. 238339008Ssef * Create fuid using zfs_fuid_create(zfsvfs, id, 239339008Ssef * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)? 240339008Ssef * I think I can use just the id? 241339008Ssef * 242339008Ssef * Look at zfs_fuid_overquota() to look up a quota. 243339008Ssef * zap_lookup(something, quotaobj, fuidstring, sizeof(long long), 1, "a) 244339008Ssef * 245339008Ssef * See zfs_set_userquota() to set a quota. 246339008Ssef */ 247339008Ssef if ((u_int)type >= MAXQUOTAS) { 248339008Ssef error = EINVAL; 249339008Ssef goto done; 250339008Ssef } 251339008Ssef 252339008Ssef switch (cmd) { 253339008Ssef case Q_GETQUOTASIZE: 254339008Ssef bitsize = 64; 255339008Ssef error = copyout(&bitsize, arg, sizeof(int)); 256339008Ssef break; 257339008Ssef case Q_QUOTAON: 258339008Ssef // As far as I can tell, you can't turn quotas on or off on zfs 259339008Ssef error = 0; 260339008Ssef vfs_unbusy(vfsp); 261339008Ssef break; 262339008Ssef case Q_QUOTAOFF: 263339008Ssef error = ENOTSUP; 264339008Ssef vfs_unbusy(vfsp); 265339008Ssef break; 266339008Ssef case Q_SETQUOTA: 267364843Sjhb error = copyin(arg, &dqblk, sizeof(dqblk)); 268339008Ssef if (error == 0) 269339008Ssef error = zfs_set_userquota(zfsvfs, quota_type, 270339008Ssef "", id, dbtob(dqblk.dqb_bhardlimit)); 271339008Ssef break; 272339008Ssef case Q_GETQUOTA: 273339008Ssef error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk); 274339008Ssef if (error == 0) 275339008Ssef error = copyout(&dqblk, arg, sizeof(dqblk)); 276339008Ssef break; 277339008Ssef default: 278339008Ssef error = EINVAL; 279339008Ssef break; 280339008Ssef } 281339008Ssefdone: 282339008Ssef ZFS_EXIT(zfsvfs); 283339008Ssef return (error); 284339008Ssef} 285339008Ssef 286168404Spjd/*ARGSUSED*/ 287168404Spjdstatic int 288191990Sattiliozfs_sync(vfs_t *vfsp, int waitfor) 289168404Spjd{ 290168404Spjd 291168404Spjd /* 292168404Spjd * Data integrity is job one. We don't want a compromised kernel 293168404Spjd * writing to the storage pool, so we never sync during panic. 294168404Spjd */ 295168404Spjd if (panicstr) 296168404Spjd return (0); 297168404Spjd 298277503Swill /* 299277503Swill * Ignore the system syncher. ZFS already commits async data 300277503Swill * at zfs_txg_timeout intervals. 301277503Swill */ 302277503Swill if (waitfor == MNT_LAZY) 303277503Swill return (0); 304277503Swill 305168404Spjd if (vfsp != NULL) { 306168404Spjd /* 307168404Spjd * Sync a specific filesystem. 308168404Spjd */ 309168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 310209962Smm dsl_pool_t *dp; 311168404Spjd int error; 312168404Spjd 313191990Sattilio error = vfs_stdsync(vfsp, waitfor); 314168404Spjd if (error != 0) 315168404Spjd return (error); 316168404Spjd 317168404Spjd ZFS_ENTER(zfsvfs); 318209962Smm dp = dmu_objset_pool(zfsvfs->z_os); 319209962Smm 320209962Smm /* 321209962Smm * If the system is shutting down, then skip any 322209962Smm * filesystems which may exist on a suspended pool. 323209962Smm */ 324209962Smm if (sys_shutdown && spa_suspended(dp->dp_spa)) { 325209962Smm ZFS_EXIT(zfsvfs); 326209962Smm return (0); 327209962Smm } 328209962Smm 329168404Spjd if (zfsvfs->z_log != NULL) 330219089Spjd zil_commit(zfsvfs->z_log, 0); 331219089Spjd 332168404Spjd ZFS_EXIT(zfsvfs); 333168404Spjd } else { 334168404Spjd /* 335168404Spjd * Sync all ZFS filesystems. This is what happens when you 336168404Spjd * run sync(1M). Unlike other filesystems, ZFS honors the 337168404Spjd * request by waiting for all pools to commit all dirty data. 338168404Spjd */ 339168404Spjd spa_sync_allpools(); 340168404Spjd } 341168404Spjd 342168404Spjd return (0); 343168404Spjd} 344168404Spjd 345252431Srmh#ifndef __FreeBSD_kernel__ 346219089Spjdstatic int 347219089Spjdzfs_create_unique_device(dev_t *dev) 348219089Spjd{ 349219089Spjd major_t new_major; 350219089Spjd 351219089Spjd do { 352219089Spjd ASSERT3U(zfs_minor, <=, MAXMIN32); 353219089Spjd minor_t start = zfs_minor; 354219089Spjd do { 355219089Spjd mutex_enter(&zfs_dev_mtx); 356219089Spjd if (zfs_minor >= MAXMIN32) { 357219089Spjd /* 358219089Spjd * If we're still using the real major 359219089Spjd * keep out of /dev/zfs and /dev/zvol minor 360219089Spjd * number space. If we're using a getudev()'ed 361219089Spjd * major number, we can use all of its minors. 362219089Spjd */ 363219089Spjd if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 364219089Spjd zfs_minor = ZFS_MIN_MINOR; 365219089Spjd else 366219089Spjd zfs_minor = 0; 367219089Spjd } else { 368219089Spjd zfs_minor++; 369219089Spjd } 370219089Spjd *dev = makedevice(zfs_major, zfs_minor); 371219089Spjd mutex_exit(&zfs_dev_mtx); 372219089Spjd } while (vfs_devismounted(*dev) && zfs_minor != start); 373219089Spjd if (zfs_minor == start) { 374219089Spjd /* 375219089Spjd * We are using all ~262,000 minor numbers for the 376219089Spjd * current major number. Create a new major number. 377219089Spjd */ 378219089Spjd if ((new_major = getudev()) == (major_t)-1) { 379219089Spjd cmn_err(CE_WARN, 380219089Spjd "zfs_mount: Can't get unique major " 381219089Spjd "device number."); 382219089Spjd return (-1); 383219089Spjd } 384219089Spjd mutex_enter(&zfs_dev_mtx); 385219089Spjd zfs_major = new_major; 386219089Spjd zfs_minor = 0; 387219089Spjd 388219089Spjd mutex_exit(&zfs_dev_mtx); 389219089Spjd } else { 390219089Spjd break; 391219089Spjd } 392219089Spjd /* CONSTANTCONDITION */ 393219089Spjd } while (1); 394219089Spjd 395219089Spjd return (0); 396219089Spjd} 397252431Srmh#endif /* !__FreeBSD_kernel__ */ 398219089Spjd 399168404Spjdstatic void 400168404Spjdatime_changed_cb(void *arg, uint64_t newval) 401168404Spjd{ 402168404Spjd zfsvfs_t *zfsvfs = arg; 403168404Spjd 404168404Spjd if (newval == TRUE) { 405168404Spjd zfsvfs->z_atime = TRUE; 406168404Spjd zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 407168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 408168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 409168404Spjd } else { 410168404Spjd zfsvfs->z_atime = FALSE; 411168404Spjd zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 412168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 413168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 414168404Spjd } 415168404Spjd} 416168404Spjd 417168404Spjdstatic void 418168404Spjdxattr_changed_cb(void *arg, uint64_t newval) 419168404Spjd{ 420168404Spjd zfsvfs_t *zfsvfs = arg; 421168404Spjd 422168404Spjd if (newval == TRUE) { 423168404Spjd /* XXX locking on vfs_flag? */ 424168404Spjd#ifdef TODO 425168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 426168404Spjd#endif 427168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 428168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 429168404Spjd } else { 430168404Spjd /* XXX locking on vfs_flag? */ 431168404Spjd#ifdef TODO 432168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 433168404Spjd#endif 434168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 435168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 436168404Spjd } 437168404Spjd} 438168404Spjd 439168404Spjdstatic void 440168404Spjdblksz_changed_cb(void *arg, uint64_t newval) 441168404Spjd{ 442168404Spjd zfsvfs_t *zfsvfs = arg; 443274337Sdelphij ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os))); 444274337Sdelphij ASSERT3U(newval, >=, SPA_MINBLOCKSIZE); 445274337Sdelphij ASSERT(ISP2(newval)); 446168404Spjd 447168404Spjd zfsvfs->z_max_blksz = newval; 448204101Spjd zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 449168404Spjd} 450168404Spjd 451168404Spjdstatic void 452168404Spjdreadonly_changed_cb(void *arg, uint64_t newval) 453168404Spjd{ 454168404Spjd zfsvfs_t *zfsvfs = arg; 455168404Spjd 456168404Spjd if (newval) { 457168404Spjd /* XXX locking on vfs_flag? */ 458168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 459168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 460168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 461168404Spjd } else { 462168404Spjd /* XXX locking on vfs_flag? */ 463168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 464168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 465168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 466168404Spjd } 467168404Spjd} 468168404Spjd 469168404Spjdstatic void 470168404Spjdsetuid_changed_cb(void *arg, uint64_t newval) 471168404Spjd{ 472168404Spjd zfsvfs_t *zfsvfs = arg; 473168404Spjd 474168404Spjd if (newval == FALSE) { 475168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 476168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 477168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 478168404Spjd } else { 479168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 480168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 481168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 482168404Spjd } 483168404Spjd} 484168404Spjd 485168404Spjdstatic void 486168404Spjdexec_changed_cb(void *arg, uint64_t newval) 487168404Spjd{ 488168404Spjd zfsvfs_t *zfsvfs = arg; 489168404Spjd 490168404Spjd if (newval == FALSE) { 491168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 492168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 493168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 494168404Spjd } else { 495168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 496168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 497168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 498168404Spjd } 499168404Spjd} 500168404Spjd 501185029Spjd/* 502185029Spjd * The nbmand mount option can be changed at mount time. 503185029Spjd * We can't allow it to be toggled on live file systems or incorrect 504185029Spjd * behavior may be seen from cifs clients 505185029Spjd * 506185029Spjd * This property isn't registered via dsl_prop_register(), but this callback 507185029Spjd * will be called when a file system is first mounted 508185029Spjd */ 509168404Spjdstatic void 510185029Spjdnbmand_changed_cb(void *arg, uint64_t newval) 511185029Spjd{ 512185029Spjd zfsvfs_t *zfsvfs = arg; 513185029Spjd if (newval == FALSE) { 514185029Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 515185029Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 516185029Spjd } else { 517185029Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 518185029Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 519185029Spjd } 520185029Spjd} 521185029Spjd 522185029Spjdstatic void 523168404Spjdsnapdir_changed_cb(void *arg, uint64_t newval) 524168404Spjd{ 525168404Spjd zfsvfs_t *zfsvfs = arg; 526168404Spjd 527168404Spjd zfsvfs->z_show_ctldir = newval; 528168404Spjd} 529168404Spjd 530168404Spjdstatic void 531185029Spjdvscan_changed_cb(void *arg, uint64_t newval) 532185029Spjd{ 533185029Spjd zfsvfs_t *zfsvfs = arg; 534185029Spjd 535185029Spjd zfsvfs->z_vscan = newval; 536185029Spjd} 537185029Spjd 538185029Spjdstatic void 539224174Smmacl_mode_changed_cb(void *arg, uint64_t newval) 540224174Smm{ 541224174Smm zfsvfs_t *zfsvfs = arg; 542224174Smm 543224174Smm zfsvfs->z_acl_mode = newval; 544224174Smm} 545224174Smm 546224174Smmstatic void 547168404Spjdacl_inherit_changed_cb(void *arg, uint64_t newval) 548168404Spjd{ 549168404Spjd zfsvfs_t *zfsvfs = arg; 550168404Spjd 551168404Spjd zfsvfs->z_acl_inherit = newval; 552168404Spjd} 553168404Spjd 554168404Spjdstatic int 555168404Spjdzfs_register_callbacks(vfs_t *vfsp) 556168404Spjd{ 557168404Spjd struct dsl_dataset *ds = NULL; 558168404Spjd objset_t *os = NULL; 559168404Spjd zfsvfs_t *zfsvfs = NULL; 560185029Spjd uint64_t nbmand; 561247187Smm boolean_t readonly = B_FALSE; 562247187Smm boolean_t do_readonly = B_FALSE; 563247187Smm boolean_t setuid = B_FALSE; 564247187Smm boolean_t do_setuid = B_FALSE; 565247187Smm boolean_t exec = B_FALSE; 566247187Smm boolean_t do_exec = B_FALSE; 567248571Smm#ifdef illumos 568247187Smm boolean_t devices = B_FALSE; 569247187Smm boolean_t do_devices = B_FALSE; 570248571Smm#endif 571247187Smm boolean_t xattr = B_FALSE; 572247187Smm boolean_t do_xattr = B_FALSE; 573247187Smm boolean_t atime = B_FALSE; 574247187Smm boolean_t do_atime = B_FALSE; 575168404Spjd int error = 0; 576168404Spjd 577168404Spjd ASSERT(vfsp); 578168404Spjd zfsvfs = vfsp->vfs_data; 579168404Spjd ASSERT(zfsvfs); 580168404Spjd os = zfsvfs->z_os; 581168404Spjd 582168404Spjd /* 583196965Spjd * This function can be called for a snapshot when we update snapshot's 584196965Spjd * mount point, which isn't really supported. 585196965Spjd */ 586196965Spjd if (dmu_objset_is_snapshot(os)) 587196965Spjd return (EOPNOTSUPP); 588196965Spjd 589196965Spjd /* 590168404Spjd * The act of registering our callbacks will destroy any mount 591168404Spjd * options we may have. In order to enable temporary overrides 592168404Spjd * of mount options, we stash away the current values and 593168404Spjd * restore them after we register the callbacks. 594168404Spjd */ 595219089Spjd if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || 596219089Spjd !spa_writeable(dmu_objset_spa(os))) { 597168404Spjd readonly = B_TRUE; 598168404Spjd do_readonly = B_TRUE; 599168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 600168404Spjd readonly = B_FALSE; 601168404Spjd do_readonly = B_TRUE; 602168404Spjd } 603168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 604168404Spjd setuid = B_FALSE; 605168404Spjd do_setuid = B_TRUE; 606168404Spjd } else { 607168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 608168404Spjd setuid = B_FALSE; 609168404Spjd do_setuid = B_TRUE; 610168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 611168404Spjd setuid = B_TRUE; 612168404Spjd do_setuid = B_TRUE; 613168404Spjd } 614168404Spjd } 615168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 616168404Spjd exec = B_FALSE; 617168404Spjd do_exec = B_TRUE; 618168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 619168404Spjd exec = B_TRUE; 620168404Spjd do_exec = B_TRUE; 621168404Spjd } 622168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 623168404Spjd xattr = B_FALSE; 624168404Spjd do_xattr = B_TRUE; 625168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 626168404Spjd xattr = B_TRUE; 627168404Spjd do_xattr = B_TRUE; 628168404Spjd } 629185029Spjd if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 630185029Spjd atime = B_FALSE; 631185029Spjd do_atime = B_TRUE; 632185029Spjd } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 633185029Spjd atime = B_TRUE; 634185029Spjd do_atime = B_TRUE; 635185029Spjd } 636168404Spjd 637168404Spjd /* 638286985Savg * We need to enter pool configuration here, so that we can use 639286985Savg * dsl_prop_get_int_ds() to handle the special nbmand property below. 640286985Savg * dsl_prop_get_integer() can not be used, because it has to acquire 641286985Savg * spa_namespace_lock and we can not do that because we already hold 642332525Smav * z_teardown_lock. The problem is that spa_write_cachefile() is called 643286985Savg * with spa_namespace_lock held and the function calls ZFS vnode 644286985Savg * operations to write the cache file and thus z_teardown_lock is 645286985Savg * acquired after spa_namespace_lock. 646286985Savg */ 647286985Savg ds = dmu_objset_ds(os); 648286985Savg dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 649286985Savg 650286985Savg /* 651185029Spjd * nbmand is a special property. It can only be changed at 652185029Spjd * mount time. 653185029Spjd * 654185029Spjd * This is weird, but it is documented to only be changeable 655185029Spjd * at mount time. 656185029Spjd */ 657185029Spjd if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 658185029Spjd nbmand = B_FALSE; 659185029Spjd } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 660185029Spjd nbmand = B_TRUE; 661286985Savg } else if (error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0) { 662286985Savg dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 663286985Savg return (error); 664185029Spjd } 665185029Spjd 666185029Spjd /* 667168404Spjd * Register property callbacks. 668168404Spjd * 669168404Spjd * It would probably be fine to just check for i/o error from 670168404Spjd * the first prop_register(), but I guess I like to go 671168404Spjd * overboard... 672168404Spjd */ 673248571Smm error = dsl_prop_register(ds, 674248571Smm zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs); 675168404Spjd error = error ? error : dsl_prop_register(ds, 676248571Smm zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs); 677168404Spjd error = error ? error : dsl_prop_register(ds, 678248571Smm zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs); 679168404Spjd error = error ? error : dsl_prop_register(ds, 680248571Smm zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs); 681248571Smm#ifdef illumos 682168404Spjd error = error ? error : dsl_prop_register(ds, 683248571Smm zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs); 684248571Smm#endif 685168404Spjd error = error ? error : dsl_prop_register(ds, 686248571Smm zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs); 687168404Spjd error = error ? error : dsl_prop_register(ds, 688248571Smm zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs); 689168404Spjd error = error ? error : dsl_prop_register(ds, 690248571Smm zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs); 691224174Smm error = error ? error : dsl_prop_register(ds, 692248571Smm zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs); 693185029Spjd error = error ? error : dsl_prop_register(ds, 694248571Smm zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, 695248571Smm zfsvfs); 696248571Smm error = error ? error : dsl_prop_register(ds, 697248571Smm zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs); 698248571Smm dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 699168404Spjd if (error) 700168404Spjd goto unregister; 701168404Spjd 702168404Spjd /* 703168404Spjd * Invoke our callbacks to restore temporary mount options. 704168404Spjd */ 705168404Spjd if (do_readonly) 706168404Spjd readonly_changed_cb(zfsvfs, readonly); 707168404Spjd if (do_setuid) 708168404Spjd setuid_changed_cb(zfsvfs, setuid); 709168404Spjd if (do_exec) 710168404Spjd exec_changed_cb(zfsvfs, exec); 711168404Spjd if (do_xattr) 712168404Spjd xattr_changed_cb(zfsvfs, xattr); 713185029Spjd if (do_atime) 714185029Spjd atime_changed_cb(zfsvfs, atime); 715168404Spjd 716185029Spjd nbmand_changed_cb(zfsvfs, nbmand); 717185029Spjd 718168404Spjd return (0); 719168404Spjd 720168404Spjdunregister: 721288204Sdelphij dsl_prop_unregister_all(ds, zfsvfs); 722168404Spjd return (error); 723168404Spjd} 724168404Spjd 725219089Spjdstatic int 726219089Spjdzfs_space_delta_cb(dmu_object_type_t bonustype, void *data, 727219089Spjd uint64_t *userp, uint64_t *groupp) 728209962Smm{ 729219089Spjd /* 730219089Spjd * Is it a valid type of object to track? 731219089Spjd */ 732219089Spjd if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA) 733249195Smm return (SET_ERROR(ENOENT)); 734209962Smm 735219089Spjd /* 736219089Spjd * If we have a NULL data pointer 737219089Spjd * then assume the id's aren't changing and 738219089Spjd * return EEXIST to the dmu to let it know to 739219089Spjd * use the same ids 740219089Spjd */ 741219089Spjd if (data == NULL) 742249195Smm return (SET_ERROR(EEXIST)); 743209962Smm 744219089Spjd if (bonustype == DMU_OT_ZNODE) { 745240955Smm znode_phys_t *znp = data; 746219089Spjd *userp = znp->zp_uid; 747219089Spjd *groupp = znp->zp_gid; 748219089Spjd } else { 749219089Spjd int hdrsize; 750240955Smm sa_hdr_phys_t *sap = data; 751240955Smm sa_hdr_phys_t sa = *sap; 752240955Smm boolean_t swap = B_FALSE; 753209962Smm 754219089Spjd ASSERT(bonustype == DMU_OT_SA); 755209962Smm 756240955Smm if (sa.sa_magic == 0) { 757219089Spjd /* 758219089Spjd * This should only happen for newly created 759219089Spjd * files that haven't had the znode data filled 760219089Spjd * in yet. 761219089Spjd */ 762219089Spjd *userp = 0; 763219089Spjd *groupp = 0; 764240955Smm return (0); 765219089Spjd } 766240955Smm if (sa.sa_magic == BSWAP_32(SA_MAGIC)) { 767240955Smm sa.sa_magic = SA_MAGIC; 768240955Smm sa.sa_layout_info = BSWAP_16(sa.sa_layout_info); 769240955Smm swap = B_TRUE; 770240955Smm } else { 771240955Smm VERIFY3U(sa.sa_magic, ==, SA_MAGIC); 772240955Smm } 773240955Smm 774240955Smm hdrsize = sa_hdrsize(&sa); 775240955Smm VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t)); 776240955Smm *userp = *((uint64_t *)((uintptr_t)data + hdrsize + 777240955Smm SA_UID_OFFSET)); 778240955Smm *groupp = *((uint64_t *)((uintptr_t)data + hdrsize + 779240955Smm SA_GID_OFFSET)); 780240955Smm if (swap) { 781240955Smm *userp = BSWAP_64(*userp); 782240955Smm *groupp = BSWAP_64(*groupp); 783240955Smm } 784209962Smm } 785248571Smm return (0); 786209962Smm} 787209962Smm 788209962Smmstatic void 789209962Smmfuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, 790209962Smm char *domainbuf, int buflen, uid_t *ridp) 791209962Smm{ 792209962Smm uint64_t fuid; 793209962Smm const char *domain; 794209962Smm 795321578Smav fuid = zfs_strtonum(fuidstr, NULL); 796209962Smm 797209962Smm domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid)); 798209962Smm if (domain) 799209962Smm (void) strlcpy(domainbuf, domain, buflen); 800209962Smm else 801209962Smm domainbuf[0] = '\0'; 802209962Smm *ridp = FUID_RID(fuid); 803209962Smm} 804209962Smm 805209962Smmstatic uint64_t 806209962Smmzfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type) 807209962Smm{ 808209962Smm switch (type) { 809209962Smm case ZFS_PROP_USERUSED: 810209962Smm return (DMU_USERUSED_OBJECT); 811209962Smm case ZFS_PROP_GROUPUSED: 812209962Smm return (DMU_GROUPUSED_OBJECT); 813209962Smm case ZFS_PROP_USERQUOTA: 814209962Smm return (zfsvfs->z_userquota_obj); 815209962Smm case ZFS_PROP_GROUPQUOTA: 816209962Smm return (zfsvfs->z_groupquota_obj); 817209962Smm } 818209962Smm return (0); 819209962Smm} 820209962Smm 821209962Smmint 822209962Smmzfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 823209962Smm uint64_t *cookiep, void *vbuf, uint64_t *bufsizep) 824209962Smm{ 825209962Smm int error; 826209962Smm zap_cursor_t zc; 827209962Smm zap_attribute_t za; 828209962Smm zfs_useracct_t *buf = vbuf; 829209962Smm uint64_t obj; 830209962Smm 831209962Smm if (!dmu_objset_userspace_present(zfsvfs->z_os)) 832249195Smm return (SET_ERROR(ENOTSUP)); 833209962Smm 834209962Smm obj = zfs_userquota_prop_to_obj(zfsvfs, type); 835209962Smm if (obj == 0) { 836209962Smm *bufsizep = 0; 837209962Smm return (0); 838209962Smm } 839209962Smm 840209962Smm for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep); 841209962Smm (error = zap_cursor_retrieve(&zc, &za)) == 0; 842209962Smm zap_cursor_advance(&zc)) { 843209962Smm if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) > 844209962Smm *bufsizep) 845209962Smm break; 846209962Smm 847209962Smm fuidstr_to_sid(zfsvfs, za.za_name, 848209962Smm buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid); 849209962Smm 850209962Smm buf->zu_space = za.za_first_integer; 851209962Smm buf++; 852209962Smm } 853209962Smm if (error == ENOENT) 854209962Smm error = 0; 855209962Smm 856209962Smm ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep); 857209962Smm *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf; 858209962Smm *cookiep = zap_cursor_serialize(&zc); 859209962Smm zap_cursor_fini(&zc); 860209962Smm return (error); 861209962Smm} 862209962Smm 863209962Smm/* 864209962Smm * buf must be big enough (eg, 32 bytes) 865209962Smm */ 866168404Spjdstatic int 867209962Smmid_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, 868209962Smm char *buf, boolean_t addok) 869209962Smm{ 870209962Smm uint64_t fuid; 871209962Smm int domainid = 0; 872209962Smm 873209962Smm if (domain && domain[0]) { 874209962Smm domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok); 875209962Smm if (domainid == -1) 876249195Smm return (SET_ERROR(ENOENT)); 877209962Smm } 878209962Smm fuid = FUID_ENCODE(domainid, rid); 879209962Smm (void) sprintf(buf, "%llx", (longlong_t)fuid); 880209962Smm return (0); 881209962Smm} 882209962Smm 883209962Smmint 884209962Smmzfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 885209962Smm const char *domain, uint64_t rid, uint64_t *valp) 886209962Smm{ 887209962Smm char buf[32]; 888209962Smm int err; 889209962Smm uint64_t obj; 890209962Smm 891209962Smm *valp = 0; 892209962Smm 893209962Smm if (!dmu_objset_userspace_present(zfsvfs->z_os)) 894249195Smm return (SET_ERROR(ENOTSUP)); 895209962Smm 896209962Smm obj = zfs_userquota_prop_to_obj(zfsvfs, type); 897209962Smm if (obj == 0) 898209962Smm return (0); 899209962Smm 900209962Smm err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE); 901209962Smm if (err) 902209962Smm return (err); 903209962Smm 904209962Smm err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp); 905209962Smm if (err == ENOENT) 906209962Smm err = 0; 907209962Smm return (err); 908209962Smm} 909209962Smm 910209962Smmint 911209962Smmzfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 912209962Smm const char *domain, uint64_t rid, uint64_t quota) 913209962Smm{ 914209962Smm char buf[32]; 915209962Smm int err; 916209962Smm dmu_tx_t *tx; 917209962Smm uint64_t *objp; 918209962Smm boolean_t fuid_dirtied; 919209962Smm 920209962Smm if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) 921249195Smm return (SET_ERROR(EINVAL)); 922209962Smm 923209962Smm if (zfsvfs->z_version < ZPL_VERSION_USERSPACE) 924249195Smm return (SET_ERROR(ENOTSUP)); 925209962Smm 926209962Smm objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj : 927209962Smm &zfsvfs->z_groupquota_obj; 928209962Smm 929209962Smm err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE); 930209962Smm if (err) 931209962Smm return (err); 932209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 933209962Smm 934209962Smm tx = dmu_tx_create(zfsvfs->z_os); 935209962Smm dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL); 936209962Smm if (*objp == 0) { 937209962Smm dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 938209962Smm zfs_userquota_prop_prefixes[type]); 939209962Smm } 940209962Smm if (fuid_dirtied) 941209962Smm zfs_fuid_txhold(zfsvfs, tx); 942209962Smm err = dmu_tx_assign(tx, TXG_WAIT); 943209962Smm if (err) { 944209962Smm dmu_tx_abort(tx); 945209962Smm return (err); 946209962Smm } 947209962Smm 948209962Smm mutex_enter(&zfsvfs->z_lock); 949209962Smm if (*objp == 0) { 950209962Smm *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA, 951209962Smm DMU_OT_NONE, 0, tx); 952209962Smm VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 953209962Smm zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); 954209962Smm } 955209962Smm mutex_exit(&zfsvfs->z_lock); 956209962Smm 957209962Smm if (quota == 0) { 958209962Smm err = zap_remove(zfsvfs->z_os, *objp, buf, tx); 959209962Smm if (err == ENOENT) 960209962Smm err = 0; 961209962Smm } else { 962209962Smm err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx); 963209962Smm } 964209962Smm ASSERT(err == 0); 965209962Smm if (fuid_dirtied) 966209962Smm zfs_fuid_sync(zfsvfs, tx); 967209962Smm dmu_tx_commit(tx); 968209962Smm return (err); 969209962Smm} 970209962Smm 971209962Smmboolean_t 972219089Spjdzfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) 973209962Smm{ 974209962Smm char buf[32]; 975209962Smm uint64_t used, quota, usedobj, quotaobj; 976209962Smm int err; 977209962Smm 978209962Smm usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 979209962Smm quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 980209962Smm 981209962Smm if (quotaobj == 0 || zfsvfs->z_replay) 982209962Smm return (B_FALSE); 983209962Smm 984209962Smm (void) sprintf(buf, "%llx", (longlong_t)fuid); 985209962Smm err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a); 986209962Smm if (err != 0) 987209962Smm return (B_FALSE); 988209962Smm 989209962Smm err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used); 990209962Smm if (err != 0) 991209962Smm return (B_FALSE); 992209962Smm return (used >= quota); 993209962Smm} 994209962Smm 995219089Spjdboolean_t 996219089Spjdzfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup) 997219089Spjd{ 998219089Spjd uint64_t fuid; 999219089Spjd uint64_t quotaobj; 1000219089Spjd 1001219089Spjd quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 1002219089Spjd 1003219089Spjd fuid = isgroup ? zp->z_gid : zp->z_uid; 1004219089Spjd 1005219089Spjd if (quotaobj == 0 || zfsvfs->z_replay) 1006219089Spjd return (B_FALSE); 1007219089Spjd 1008219089Spjd return (zfs_fuid_overquota(zfsvfs, isgroup, fuid)); 1009219089Spjd} 1010219089Spjd 1011303969Savg/* 1012303969Savg * Associate this zfsvfs with the given objset, which must be owned. 1013303969Savg * This will cache a bunch of on-disk state from the objset in the 1014303969Savg * zfsvfs. 1015303969Savg */ 1016303969Savgstatic int 1017303969Savgzfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) 1018209962Smm{ 1019303969Savg int error; 1020303969Savg uint64_t val; 1021209962Smm 1022274337Sdelphij zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; 1023209962Smm zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 1024209962Smm zfsvfs->z_os = os; 1025209962Smm 1026209962Smm error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 1027303969Savg if (error != 0) 1028303969Savg return (error); 1029303969Savg if (zfsvfs->z_version > 1030219089Spjd zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { 1031219089Spjd (void) printf("Can't mount a version %lld file system " 1032219089Spjd "on a version %lld pool\n. Pool must be upgraded to mount " 1033219089Spjd "this file system.", (u_longlong_t)zfsvfs->z_version, 1034219089Spjd (u_longlong_t)spa_version(dmu_objset_spa(os))); 1035303969Savg return (SET_ERROR(ENOTSUP)); 1036209962Smm } 1037303969Savg error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val); 1038303969Savg if (error != 0) 1039303969Savg return (error); 1040303969Savg zfsvfs->z_norm = (int)val; 1041209962Smm 1042303969Savg error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val); 1043303969Savg if (error != 0) 1044303969Savg return (error); 1045303969Savg zfsvfs->z_utf8 = (val != 0); 1046209962Smm 1047303969Savg error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val); 1048303969Savg if (error != 0) 1049303969Savg return (error); 1050303969Savg zfsvfs->z_case = (uint_t)val; 1051209962Smm 1052209962Smm /* 1053209962Smm * Fold case on file systems that are always or sometimes case 1054209962Smm * insensitive. 1055209962Smm */ 1056209962Smm if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 1057209962Smm zfsvfs->z_case == ZFS_CASE_MIXED) 1058209962Smm zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 1059209962Smm 1060209962Smm zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 1061219089Spjd zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 1062209962Smm 1063303969Savg uint64_t sa_obj = 0; 1064219089Spjd if (zfsvfs->z_use_sa) { 1065219089Spjd /* should either have both of these objects or none */ 1066219089Spjd error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, 1067219089Spjd &sa_obj); 1068303969Savg if (error != 0) 1069303969Savg return (error); 1070219089Spjd } 1071219089Spjd 1072219089Spjd error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 1073219089Spjd &zfsvfs->z_attr_table); 1074303969Savg if (error != 0) 1075303969Savg return (error); 1076219089Spjd 1077219089Spjd if (zfsvfs->z_version >= ZPL_VERSION_SA) 1078219089Spjd sa_register_update_callback(os, zfs_sa_upgrade); 1079219089Spjd 1080209962Smm error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 1081209962Smm &zfsvfs->z_root); 1082303969Savg if (error != 0) 1083303969Savg return (error); 1084209962Smm ASSERT(zfsvfs->z_root != 0); 1085209962Smm 1086209962Smm error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 1087209962Smm &zfsvfs->z_unlinkedobj); 1088303969Savg if (error != 0) 1089303969Savg return (error); 1090209962Smm 1091209962Smm error = zap_lookup(os, MASTER_NODE_OBJ, 1092209962Smm zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 1093209962Smm 8, 1, &zfsvfs->z_userquota_obj); 1094303969Savg if (error == ENOENT) 1095303969Savg zfsvfs->z_userquota_obj = 0; 1096303969Savg else if (error != 0) 1097303969Savg return (error); 1098209962Smm 1099209962Smm error = zap_lookup(os, MASTER_NODE_OBJ, 1100209962Smm zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 1101209962Smm 8, 1, &zfsvfs->z_groupquota_obj); 1102303969Savg if (error == ENOENT) 1103303969Savg zfsvfs->z_groupquota_obj = 0; 1104303969Savg else if (error != 0) 1105303969Savg return (error); 1106209962Smm 1107209962Smm error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 1108209962Smm &zfsvfs->z_fuid_obj); 1109303969Savg if (error == ENOENT) 1110303969Savg zfsvfs->z_fuid_obj = 0; 1111303969Savg else if (error != 0) 1112303969Savg return (error); 1113209962Smm 1114209962Smm error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 1115209962Smm &zfsvfs->z_shares_dir); 1116303969Savg if (error == ENOENT) 1117303969Savg zfsvfs->z_shares_dir = 0; 1118303969Savg else if (error != 0) 1119303969Savg return (error); 1120209962Smm 1121303970Savg /* 1122303970Savg * Only use the name cache if we are looking for a 1123303970Savg * name on a file system that does not require normalization 1124303970Savg * or case folding. We can also look there if we happen to be 1125303970Savg * on a non-normalizing, mixed sensitivity file system IF we 1126303970Savg * are looking for the exact name (which is always the case on 1127303970Savg * FreeBSD). 1128303970Savg */ 1129303970Savg zfsvfs->z_use_namecache = !zfsvfs->z_norm || 1130303970Savg ((zfsvfs->z_case == ZFS_CASE_MIXED) && 1131303970Savg !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER)); 1132303970Savg 1133303969Savg return (0); 1134303969Savg} 1135303969Savg 1136338975Smav#if defined(__FreeBSD__) 1137338975Smavtaskq_t *zfsvfs_taskq; 1138338975Smav 1139338975Smavstatic void 1140338975Smavzfsvfs_task_unlinked_drain(void *context, int pending __unused) 1141338975Smav{ 1142338975Smav 1143338975Smav zfs_unlinked_drain((zfsvfs_t *)context); 1144338975Smav} 1145338975Smav#endif 1146338975Smav 1147303969Savgint 1148303969Savgzfsvfs_create(const char *osname, zfsvfs_t **zfvp) 1149303969Savg{ 1150303969Savg objset_t *os; 1151303969Savg zfsvfs_t *zfsvfs; 1152303969Savg int error; 1153303969Savg 1154303969Savg /* 1155303969Savg * XXX: Fix struct statfs so this isn't necessary! 1156303969Savg * 1157303969Savg * The 'osname' is used as the filesystem's special node, which means 1158303969Savg * it must fit in statfs.f_mntfromname, or else it can't be 1159303969Savg * enumerated, so libzfs_mnttab_find() returns NULL, which causes 1160303969Savg * 'zfs unmount' to think it's not mounted when it is. 1161303969Savg */ 1162303969Savg if (strlen(osname) >= MNAMELEN) 1163303969Savg return (SET_ERROR(ENAMETOOLONG)); 1164303969Savg 1165303969Savg zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 1166303969Savg 1167303969Savg /* 1168303969Savg * We claim to always be readonly so we can open snapshots; 1169303969Savg * other ZPL code will prevent us from writing to snapshots. 1170303969Savg */ 1171325534Savg 1172303969Savg error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); 1173325534Savg if (error != 0) { 1174303969Savg kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1175303969Savg return (error); 1176303969Savg } 1177303969Savg 1178325534Savg error = zfsvfs_create_impl(zfvp, zfsvfs, os); 1179325534Savg if (error != 0) { 1180325534Savg dmu_objset_disown(os, zfsvfs); 1181325534Savg } 1182325534Savg return (error); 1183325534Savg} 1184325534Savg 1185325534Savg 1186325534Savgint 1187325534Savgzfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) 1188325534Savg{ 1189325534Savg int error; 1190325534Savg 1191303969Savg zfsvfs->z_vfs = NULL; 1192303969Savg zfsvfs->z_parent = zfsvfs; 1193303969Savg 1194209962Smm mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1195209962Smm mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 1196209962Smm list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 1197209962Smm offsetof(znode_t, z_link_node)); 1198338975Smav#if defined(__FreeBSD__) 1199338975Smav TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0, 1200338975Smav zfsvfs_task_unlinked_drain, zfsvfs); 1201338975Smav#endif 1202303970Savg#ifdef DIAGNOSTIC 1203303970Savg rrm_init(&zfsvfs->z_teardown_lock, B_TRUE); 1204303970Savg#else 1205268865Sdelphij rrm_init(&zfsvfs->z_teardown_lock, B_FALSE); 1206303970Savg#endif 1207209962Smm rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 1208209962Smm rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 1209303969Savg for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1210209962Smm mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1211209962Smm 1212303969Savg error = zfsvfs_init(zfsvfs, os); 1213303969Savg if (error != 0) { 1214303969Savg *zfvp = NULL; 1215303969Savg kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1216303969Savg return (error); 1217303969Savg } 1218303969Savg 1219219089Spjd *zfvp = zfsvfs; 1220209962Smm return (0); 1221209962Smm} 1222209962Smm 1223209962Smmstatic int 1224185029Spjdzfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 1225168404Spjd{ 1226185029Spjd int error; 1227185029Spjd 1228185029Spjd error = zfs_register_callbacks(zfsvfs->z_vfs); 1229185029Spjd if (error) 1230185029Spjd return (error); 1231185029Spjd 1232208689Smm zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 1233208689Smm 1234185029Spjd /* 1235185029Spjd * If we are not mounting (ie: online recv), then we don't 1236185029Spjd * have to worry about replaying the log as we blocked all 1237185029Spjd * operations out since we closed the ZIL. 1238185029Spjd */ 1239185029Spjd if (mounting) { 1240185029Spjd boolean_t readonly; 1241185029Spjd 1242185029Spjd /* 1243185029Spjd * During replay we remove the read only flag to 1244185029Spjd * allow replays to succeed. 1245185029Spjd */ 1246185029Spjd readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 1247208689Smm if (readonly != 0) 1248208689Smm zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 1249208689Smm else 1250208689Smm zfs_unlinked_drain(zfsvfs); 1251185029Spjd 1252219089Spjd /* 1253219089Spjd * Parse and replay the intent log. 1254219089Spjd * 1255219089Spjd * Because of ziltest, this must be done after 1256219089Spjd * zfs_unlinked_drain(). (Further note: ziltest 1257219089Spjd * doesn't use readonly mounts, where 1258219089Spjd * zfs_unlinked_drain() isn't called.) This is because 1259219089Spjd * ziltest causes spa_sync() to think it's committed, 1260219089Spjd * but actually it is not, so the intent log contains 1261219089Spjd * many txg's worth of changes. 1262219089Spjd * 1263219089Spjd * In particular, if object N is in the unlinked set in 1264219089Spjd * the last txg to actually sync, then it could be 1265219089Spjd * actually freed in a later txg and then reallocated 1266219089Spjd * in a yet later txg. This would write a "create 1267219089Spjd * object N" record to the intent log. Normally, this 1268219089Spjd * would be fine because the spa_sync() would have 1269219089Spjd * written out the fact that object N is free, before 1270219089Spjd * we could write the "create object N" intent log 1271219089Spjd * record. 1272219089Spjd * 1273219089Spjd * But when we are in ziltest mode, we advance the "open 1274219089Spjd * txg" without actually spa_sync()-ing the changes to 1275219089Spjd * disk. So we would see that object N is still 1276219089Spjd * allocated and in the unlinked set, and there is an 1277219089Spjd * intent log record saying to allocate it. 1278219089Spjd */ 1279219089Spjd if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { 1280219089Spjd if (zil_replay_disable) { 1281219089Spjd zil_destroy(zfsvfs->z_log, B_FALSE); 1282219089Spjd } else { 1283219089Spjd zfsvfs->z_replay = B_TRUE; 1284219089Spjd zil_replay(zfsvfs->z_os, zfsvfs, 1285219089Spjd zfs_replay_vector); 1286219089Spjd zfsvfs->z_replay = B_FALSE; 1287219089Spjd } 1288208689Smm } 1289185029Spjd zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 1290185029Spjd } 1291185029Spjd 1292310513Savg /* 1293310513Savg * Set the objset user_ptr to track its zfsvfs. 1294310513Savg */ 1295310513Savg mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1296310513Savg dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1297310513Savg mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1298310513Savg 1299185029Spjd return (0); 1300185029Spjd} 1301185029Spjd 1302210470Smmextern krwlock_t zfsvfs_lock; /* in zfs_znode.c */ 1303210470Smm 1304209962Smmvoid 1305209962Smmzfsvfs_free(zfsvfs_t *zfsvfs) 1306185029Spjd{ 1307209962Smm int i; 1308209962Smm 1309210470Smm /* 1310210470Smm * This is a barrier to prevent the filesystem from going away in 1311210470Smm * zfs_znode_move() until we can safely ensure that the filesystem is 1312210470Smm * not unmounted. We consider the filesystem valid before the barrier 1313210470Smm * and invalid after the barrier. 1314210470Smm */ 1315210470Smm rw_enter(&zfsvfs_lock, RW_READER); 1316210470Smm rw_exit(&zfsvfs_lock); 1317210470Smm 1318209962Smm zfs_fuid_destroy(zfsvfs); 1319209962Smm 1320185029Spjd mutex_destroy(&zfsvfs->z_znodes_lock); 1321209962Smm mutex_destroy(&zfsvfs->z_lock); 1322185029Spjd list_destroy(&zfsvfs->z_all_znodes); 1323268865Sdelphij rrm_destroy(&zfsvfs->z_teardown_lock); 1324185029Spjd rw_destroy(&zfsvfs->z_teardown_inactive_lock); 1325185029Spjd rw_destroy(&zfsvfs->z_fuid_lock); 1326209962Smm for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1327209962Smm mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1328185029Spjd kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1329185029Spjd} 1330185029Spjd 1331209962Smmstatic void 1332209962Smmzfs_set_fuid_feature(zfsvfs_t *zfsvfs) 1333209962Smm{ 1334209962Smm zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 1335219089Spjd if (zfsvfs->z_vfs) { 1336219089Spjd if (zfsvfs->z_use_fuids) { 1337219089Spjd vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 1338219089Spjd vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 1339219089Spjd vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 1340219089Spjd vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 1341219089Spjd vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 1342219089Spjd vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE); 1343219089Spjd } else { 1344219089Spjd vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 1345219089Spjd vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 1346219089Spjd vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 1347219089Spjd vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 1348219089Spjd vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 1349219089Spjd vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE); 1350219089Spjd } 1351209962Smm } 1352219089Spjd zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 1353209962Smm} 1354209962Smm 1355185029Spjdstatic int 1356185029Spjdzfs_domount(vfs_t *vfsp, char *osname) 1357185029Spjd{ 1358209962Smm uint64_t recordsize, fsid_guid; 1359168404Spjd int error = 0; 1360168404Spjd zfsvfs_t *zfsvfs; 1361209962Smm vnode_t *vp; 1362168404Spjd 1363168404Spjd ASSERT(vfsp); 1364168404Spjd ASSERT(osname); 1365168404Spjd 1366219089Spjd error = zfsvfs_create(osname, &zfsvfs); 1367209962Smm if (error) 1368209962Smm return (error); 1369168404Spjd zfsvfs->z_vfs = vfsp; 1370168404Spjd 1371249195Smm#ifdef illumos 1372249195Smm /* Initialize the generic filesystem structure. */ 1373249195Smm vfsp->vfs_bcount = 0; 1374249195Smm vfsp->vfs_data = NULL; 1375249195Smm 1376249195Smm if (zfs_create_unique_device(&mount_dev) == -1) { 1377249195Smm error = SET_ERROR(ENODEV); 1378249195Smm goto out; 1379249195Smm } 1380249195Smm ASSERT(vfs_devismounted(mount_dev) == 0); 1381249195Smm#endif 1382249195Smm 1383168404Spjd if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 1384168404Spjd NULL)) 1385168404Spjd goto out; 1386204101Spjd zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1387204101Spjd zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1388168404Spjd 1389168404Spjd vfsp->vfs_data = zfsvfs; 1390218386Strasz vfsp->mnt_flag |= MNT_LOCAL; 1391168404Spjd vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1392193440Sps vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1393242573Savg vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; 1394298105Savg vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ 1395168404Spjd 1396209962Smm /* 1397209962Smm * The fsid is 64 bits, composed of an 8-bit fs type, which 1398209962Smm * separates our fsid from any other filesystem types, and a 1399209962Smm * 56-bit objset unique ID. The objset unique ID is unique to 1400209962Smm * all objsets open on this system, provided by unique_create(). 1401209962Smm * The 8-bit fs type must be put in the low bits of fsid[1] 1402209962Smm * because that's where other Solaris filesystems put it. 1403209962Smm */ 1404209962Smm fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1405209962Smm ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); 1406209962Smm vfsp->vfs_fsid.val[0] = fsid_guid; 1407209962Smm vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | 1408209962Smm vfsp->mnt_vfc->vfc_typenum & 0xFF; 1409168404Spjd 1410185029Spjd /* 1411185029Spjd * Set features for file system. 1412185029Spjd */ 1413209962Smm zfs_set_fuid_feature(zfsvfs); 1414185029Spjd if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 1415185029Spjd vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1416185029Spjd vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1417185029Spjd vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 1418185029Spjd } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 1419185029Spjd vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1420185029Spjd vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1421185029Spjd } 1422219089Spjd vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED); 1423185029Spjd 1424168404Spjd if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1425185029Spjd uint64_t pval; 1426168404Spjd 1427168404Spjd atime_changed_cb(zfsvfs, B_FALSE); 1428168404Spjd readonly_changed_cb(zfsvfs, B_TRUE); 1429185029Spjd if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 1430168404Spjd goto out; 1431185029Spjd xattr_changed_cb(zfsvfs, pval); 1432168404Spjd zfsvfs->z_issnap = B_TRUE; 1433219089Spjd zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; 1434209962Smm 1435219089Spjd mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1436209962Smm dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1437219089Spjd mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1438168404Spjd } else { 1439185029Spjd error = zfsvfs_setup(zfsvfs, B_TRUE); 1440168404Spjd } 1441168404Spjd 1442168404Spjd vfs_mountedfrom(vfsp, osname); 1443168404Spjd 1444168404Spjd if (!zfsvfs->z_issnap) 1445168404Spjd zfsctl_create(zfsvfs); 1446168404Spjdout: 1447168404Spjd if (error) { 1448219089Spjd dmu_objset_disown(zfsvfs->z_os, zfsvfs); 1449209962Smm zfsvfs_free(zfsvfs); 1450168404Spjd } else { 1451270247Sdelphij atomic_inc_32(&zfs_active_fs_count); 1452168404Spjd } 1453168404Spjd 1454168404Spjd return (error); 1455168404Spjd} 1456168404Spjd 1457168404Spjdvoid 1458168404Spjdzfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1459168404Spjd{ 1460168404Spjd objset_t *os = zfsvfs->z_os; 1461168404Spjd 1462288204Sdelphij if (!dmu_objset_is_snapshot(os)) 1463288204Sdelphij dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs); 1464168404Spjd} 1465168404Spjd 1466219089Spjd#ifdef SECLABEL 1467219089Spjd/* 1468219089Spjd * Convert a decimal digit string to a uint64_t integer. 1469219089Spjd */ 1470219089Spjdstatic int 1471219089Spjdstr_to_uint64(char *str, uint64_t *objnum) 1472219089Spjd{ 1473219089Spjd uint64_t num = 0; 1474219089Spjd 1475219089Spjd while (*str) { 1476219089Spjd if (*str < '0' || *str > '9') 1477249195Smm return (SET_ERROR(EINVAL)); 1478219089Spjd 1479219089Spjd num = num*10 + *str++ - '0'; 1480219089Spjd } 1481219089Spjd 1482219089Spjd *objnum = num; 1483219089Spjd return (0); 1484219089Spjd} 1485219089Spjd 1486219089Spjd/* 1487219089Spjd * The boot path passed from the boot loader is in the form of 1488219089Spjd * "rootpool-name/root-filesystem-object-number'. Convert this 1489219089Spjd * string to a dataset name: "rootpool-name/root-filesystem-name". 1490219089Spjd */ 1491219089Spjdstatic int 1492219089Spjdzfs_parse_bootfs(char *bpath, char *outpath) 1493219089Spjd{ 1494219089Spjd char *slashp; 1495219089Spjd uint64_t objnum; 1496219089Spjd int error; 1497219089Spjd 1498219089Spjd if (*bpath == 0 || *bpath == '/') 1499249195Smm return (SET_ERROR(EINVAL)); 1500219089Spjd 1501219089Spjd (void) strcpy(outpath, bpath); 1502219089Spjd 1503219089Spjd slashp = strchr(bpath, '/'); 1504219089Spjd 1505219089Spjd /* if no '/', just return the pool name */ 1506219089Spjd if (slashp == NULL) { 1507219089Spjd return (0); 1508219089Spjd } 1509219089Spjd 1510219089Spjd /* if not a number, just return the root dataset name */ 1511219089Spjd if (str_to_uint64(slashp+1, &objnum)) { 1512219089Spjd return (0); 1513219089Spjd } 1514219089Spjd 1515219089Spjd *slashp = '\0'; 1516219089Spjd error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 1517219089Spjd *slashp = '/'; 1518219089Spjd 1519219089Spjd return (error); 1520219089Spjd} 1521219089Spjd 1522219089Spjd/* 1523251631Sdelphij * Check that the hex label string is appropriate for the dataset being 1524251631Sdelphij * mounted into the global_zone proper. 1525219089Spjd * 1526251631Sdelphij * Return an error if the hex label string is not default or 1527251631Sdelphij * admin_low/admin_high. For admin_low labels, the corresponding 1528251631Sdelphij * dataset must be readonly. 1529219089Spjd */ 1530219089Spjdint 1531219089Spjdzfs_check_global_label(const char *dsname, const char *hexsl) 1532219089Spjd{ 1533219089Spjd if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0) 1534219089Spjd return (0); 1535219089Spjd if (strcasecmp(hexsl, ADMIN_HIGH) == 0) 1536219089Spjd return (0); 1537219089Spjd if (strcasecmp(hexsl, ADMIN_LOW) == 0) { 1538219089Spjd /* must be readonly */ 1539219089Spjd uint64_t rdonly; 1540219089Spjd 1541219089Spjd if (dsl_prop_get_integer(dsname, 1542219089Spjd zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL)) 1543249195Smm return (SET_ERROR(EACCES)); 1544219089Spjd return (rdonly ? 0 : EACCES); 1545219089Spjd } 1546249195Smm return (SET_ERROR(EACCES)); 1547219089Spjd} 1548219089Spjd 1549219089Spjd/* 1550251631Sdelphij * Determine whether the mount is allowed according to MAC check. 1551251631Sdelphij * by comparing (where appropriate) label of the dataset against 1552251631Sdelphij * the label of the zone being mounted into. If the dataset has 1553251631Sdelphij * no label, create one. 1554219089Spjd * 1555251631Sdelphij * Returns 0 if access allowed, error otherwise (e.g. EACCES) 1556219089Spjd */ 1557219089Spjdstatic int 1558219089Spjdzfs_mount_label_policy(vfs_t *vfsp, char *osname) 1559219089Spjd{ 1560219089Spjd int error, retv; 1561219089Spjd zone_t *mntzone = NULL; 1562219089Spjd ts_label_t *mnt_tsl; 1563219089Spjd bslabel_t *mnt_sl; 1564219089Spjd bslabel_t ds_sl; 1565219089Spjd char ds_hexsl[MAXNAMELEN]; 1566219089Spjd 1567219089Spjd retv = EACCES; /* assume the worst */ 1568219089Spjd 1569219089Spjd /* 1570219089Spjd * Start by getting the dataset label if it exists. 1571219089Spjd */ 1572219089Spjd error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 1573219089Spjd 1, sizeof (ds_hexsl), &ds_hexsl, NULL); 1574219089Spjd if (error) 1575249195Smm return (SET_ERROR(EACCES)); 1576219089Spjd 1577219089Spjd /* 1578219089Spjd * If labeling is NOT enabled, then disallow the mount of datasets 1579219089Spjd * which have a non-default label already. No other label checks 1580219089Spjd * are needed. 1581219089Spjd */ 1582219089Spjd if (!is_system_labeled()) { 1583219089Spjd if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) 1584219089Spjd return (0); 1585249195Smm return (SET_ERROR(EACCES)); 1586219089Spjd } 1587219089Spjd 1588219089Spjd /* 1589219089Spjd * Get the label of the mountpoint. If mounting into the global 1590219089Spjd * zone (i.e. mountpoint is not within an active zone and the 1591219089Spjd * zoned property is off), the label must be default or 1592219089Spjd * admin_low/admin_high only; no other checks are needed. 1593219089Spjd */ 1594219089Spjd mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE); 1595219089Spjd if (mntzone->zone_id == GLOBAL_ZONEID) { 1596219089Spjd uint64_t zoned; 1597219089Spjd 1598219089Spjd zone_rele(mntzone); 1599219089Spjd 1600219089Spjd if (dsl_prop_get_integer(osname, 1601219089Spjd zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) 1602249195Smm return (SET_ERROR(EACCES)); 1603219089Spjd if (!zoned) 1604219089Spjd return (zfs_check_global_label(osname, ds_hexsl)); 1605219089Spjd else 1606219089Spjd /* 1607219089Spjd * This is the case of a zone dataset being mounted 1608219089Spjd * initially, before the zone has been fully created; 1609219089Spjd * allow this mount into global zone. 1610219089Spjd */ 1611219089Spjd return (0); 1612219089Spjd } 1613219089Spjd 1614219089Spjd mnt_tsl = mntzone->zone_slabel; 1615219089Spjd ASSERT(mnt_tsl != NULL); 1616219089Spjd label_hold(mnt_tsl); 1617219089Spjd mnt_sl = label2bslabel(mnt_tsl); 1618219089Spjd 1619219089Spjd if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) { 1620219089Spjd /* 1621219089Spjd * The dataset doesn't have a real label, so fabricate one. 1622219089Spjd */ 1623219089Spjd char *str = NULL; 1624219089Spjd 1625219089Spjd if (l_to_str_internal(mnt_sl, &str) == 0 && 1626248571Smm dsl_prop_set_string(osname, 1627248571Smm zfs_prop_to_name(ZFS_PROP_MLSLABEL), 1628248571Smm ZPROP_SRC_LOCAL, str) == 0) 1629219089Spjd retv = 0; 1630219089Spjd if (str != NULL) 1631219089Spjd kmem_free(str, strlen(str) + 1); 1632219089Spjd } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) { 1633219089Spjd /* 1634219089Spjd * Now compare labels to complete the MAC check. If the 1635219089Spjd * labels are equal then allow access. If the mountpoint 1636219089Spjd * label dominates the dataset label, allow readonly access. 1637219089Spjd * Otherwise, access is denied. 1638219089Spjd */ 1639219089Spjd if (blequal(mnt_sl, &ds_sl)) 1640219089Spjd retv = 0; 1641219089Spjd else if (bldominates(mnt_sl, &ds_sl)) { 1642219089Spjd vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 1643219089Spjd retv = 0; 1644219089Spjd } 1645219089Spjd } 1646219089Spjd 1647219089Spjd label_rele(mnt_tsl); 1648219089Spjd zone_rele(mntzone); 1649219089Spjd return (retv); 1650219089Spjd} 1651219089Spjd#endif /* SECLABEL */ 1652219089Spjd 1653219089Spjd#ifdef OPENSOLARIS_MOUNTROOT 1654219089Spjdstatic int 1655219089Spjdzfs_mountroot(vfs_t *vfsp, enum whymountroot why) 1656219089Spjd{ 1657219089Spjd int error = 0; 1658219089Spjd static int zfsrootdone = 0; 1659219089Spjd zfsvfs_t *zfsvfs = NULL; 1660219089Spjd znode_t *zp = NULL; 1661219089Spjd vnode_t *vp = NULL; 1662219089Spjd char *zfs_bootfs; 1663219089Spjd char *zfs_devid; 1664219089Spjd 1665219089Spjd ASSERT(vfsp); 1666219089Spjd 1667219089Spjd /* 1668219089Spjd * The filesystem that we mount as root is defined in the 1669219089Spjd * boot property "zfs-bootfs" with a format of 1670219089Spjd * "poolname/root-dataset-objnum". 1671219089Spjd */ 1672219089Spjd if (why == ROOT_INIT) { 1673219089Spjd if (zfsrootdone++) 1674249195Smm return (SET_ERROR(EBUSY)); 1675219089Spjd /* 1676219089Spjd * the process of doing a spa_load will require the 1677219089Spjd * clock to be set before we could (for example) do 1678219089Spjd * something better by looking at the timestamp on 1679219089Spjd * an uberblock, so just set it to -1. 1680219089Spjd */ 1681219089Spjd clkset(-1); 1682219089Spjd 1683219089Spjd if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) { 1684219089Spjd cmn_err(CE_NOTE, "spa_get_bootfs: can not get " 1685219089Spjd "bootfs name"); 1686249195Smm return (SET_ERROR(EINVAL)); 1687219089Spjd } 1688219089Spjd zfs_devid = spa_get_bootprop("diskdevid"); 1689219089Spjd error = spa_import_rootpool(rootfs.bo_name, zfs_devid); 1690219089Spjd if (zfs_devid) 1691219089Spjd spa_free_bootprop(zfs_devid); 1692219089Spjd if (error) { 1693219089Spjd spa_free_bootprop(zfs_bootfs); 1694219089Spjd cmn_err(CE_NOTE, "spa_import_rootpool: error %d", 1695219089Spjd error); 1696219089Spjd return (error); 1697219089Spjd } 1698219089Spjd if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) { 1699219089Spjd spa_free_bootprop(zfs_bootfs); 1700219089Spjd cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d", 1701219089Spjd error); 1702219089Spjd return (error); 1703219089Spjd } 1704219089Spjd 1705219089Spjd spa_free_bootprop(zfs_bootfs); 1706219089Spjd 1707219089Spjd if (error = vfs_lock(vfsp)) 1708219089Spjd return (error); 1709219089Spjd 1710219089Spjd if (error = zfs_domount(vfsp, rootfs.bo_name)) { 1711219089Spjd cmn_err(CE_NOTE, "zfs_domount: error %d", error); 1712219089Spjd goto out; 1713219089Spjd } 1714219089Spjd 1715219089Spjd zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 1716219089Spjd ASSERT(zfsvfs); 1717219089Spjd if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) { 1718219089Spjd cmn_err(CE_NOTE, "zfs_zget: error %d", error); 1719219089Spjd goto out; 1720219089Spjd } 1721219089Spjd 1722219089Spjd vp = ZTOV(zp); 1723219089Spjd mutex_enter(&vp->v_lock); 1724219089Spjd vp->v_flag |= VROOT; 1725219089Spjd mutex_exit(&vp->v_lock); 1726219089Spjd rootvp = vp; 1727219089Spjd 1728219089Spjd /* 1729219089Spjd * Leave rootvp held. The root file system is never unmounted. 1730219089Spjd */ 1731219089Spjd 1732219089Spjd vfs_add((struct vnode *)0, vfsp, 1733219089Spjd (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 1734219089Spjdout: 1735219089Spjd vfs_unlock(vfsp); 1736219089Spjd return (error); 1737219089Spjd } else if (why == ROOT_REMOUNT) { 1738219089Spjd readonly_changed_cb(vfsp->vfs_data, B_FALSE); 1739219089Spjd vfsp->vfs_flag |= VFS_REMOUNT; 1740219089Spjd 1741219089Spjd /* refresh mount options */ 1742219089Spjd zfs_unregister_callbacks(vfsp->vfs_data); 1743219089Spjd return (zfs_register_callbacks(vfsp)); 1744219089Spjd 1745219089Spjd } else if (why == ROOT_UNMOUNT) { 1746219089Spjd zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 1747219089Spjd (void) zfs_sync(vfsp, 0, 0); 1748219089Spjd return (0); 1749219089Spjd } 1750219089Spjd 1751219089Spjd /* 1752219089Spjd * if "why" is equal to anything else other than ROOT_INIT, 1753219089Spjd * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 1754219089Spjd */ 1755249195Smm return (SET_ERROR(ENOTSUP)); 1756219089Spjd} 1757219089Spjd#endif /* OPENSOLARIS_MOUNTROOT */ 1758219089Spjd 1759241286Savgstatic int 1760241286Savggetpoolname(const char *osname, char *poolname) 1761241286Savg{ 1762241286Savg char *p; 1763241286Savg 1764241286Savg p = strchr(osname, '/'); 1765241286Savg if (p == NULL) { 1766241286Savg if (strlen(osname) >= MAXNAMELEN) 1767241286Savg return (ENAMETOOLONG); 1768241286Savg (void) strcpy(poolname, osname); 1769241286Savg } else { 1770241286Savg if (p - osname >= MAXNAMELEN) 1771241286Savg return (ENAMETOOLONG); 1772241286Savg (void) strncpy(poolname, osname, p - osname); 1773241286Savg poolname[p - osname] = '\0'; 1774241286Savg } 1775241286Savg return (0); 1776241286Savg} 1777241286Savg 1778168404Spjd/*ARGSUSED*/ 1779168404Spjdstatic int 1780191990Sattiliozfs_mount(vfs_t *vfsp) 1781168404Spjd{ 1782191990Sattilio kthread_t *td = curthread; 1783185029Spjd vnode_t *mvp = vfsp->mnt_vnodecovered; 1784185029Spjd cred_t *cr = td->td_ucred; 1785185029Spjd char *osname; 1786185029Spjd int error = 0; 1787185029Spjd int canwrite; 1788168404Spjd 1789249195Smm#ifdef illumos 1790249195Smm if (mvp->v_type != VDIR) 1791249195Smm return (SET_ERROR(ENOTDIR)); 1792249195Smm 1793249195Smm mutex_enter(&mvp->v_lock); 1794249195Smm if ((uap->flags & MS_REMOUNT) == 0 && 1795249195Smm (uap->flags & MS_OVERLAY) == 0 && 1796249195Smm (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 1797249195Smm mutex_exit(&mvp->v_lock); 1798249195Smm return (SET_ERROR(EBUSY)); 1799249195Smm } 1800249195Smm mutex_exit(&mvp->v_lock); 1801249195Smm 1802249195Smm /* 1803249195Smm * ZFS does not support passing unparsed data in via MS_DATA. 1804249195Smm * Users should use the MS_OPTIONSTR interface; this means 1805249195Smm * that all option parsing is already done and the options struct 1806249195Smm * can be interrogated. 1807249195Smm */ 1808249195Smm if ((uap->flags & MS_DATA) && uap->datalen > 0) 1809328049Savg return (SET_ERROR(EINVAL)); 1810328049Savg 1811328049Savg /* 1812328049Savg * Get the objset name (the "special" mount argument). 1813328049Savg */ 1814328049Savg if (error = pn_get(uap->spec, fromspace, &spn)) 1815328049Savg return (error); 1816328049Savg 1817328049Savg osname = spn.pn_path; 1818277300Ssmh#else /* !illumos */ 1819232186Smm if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_ZFS)) 1820249195Smm return (SET_ERROR(EPERM)); 1821232186Smm 1822185029Spjd if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1823249195Smm return (SET_ERROR(EINVAL)); 1824185029Spjd 1825168404Spjd /* 1826185029Spjd * If full-owner-access is enabled and delegated administration is 1827185029Spjd * turned on, we must set nosuid. 1828185029Spjd */ 1829185029Spjd if (zfs_super_owner && 1830185029Spjd dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1831185029Spjd secpolicy_fs_mount_clearopts(cr, vfsp); 1832185029Spjd } 1833328049Savg#endif /* illumos */ 1834185029Spjd 1835185029Spjd /* 1836185029Spjd * Check for mount privilege? 1837185029Spjd * 1838185029Spjd * If we don't have privilege then see if 1839185029Spjd * we have local permission to allow it 1840185029Spjd */ 1841185029Spjd error = secpolicy_fs_mount(cr, mvp, vfsp); 1842185029Spjd if (error) { 1843212694Smm if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 1844196944Spjd goto out; 1845196944Spjd 1846196944Spjd if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1847185029Spjd vattr_t vattr; 1848185029Spjd 1849185029Spjd /* 1850185029Spjd * Make sure user is the owner of the mount point 1851185029Spjd * or has sufficient privileges. 1852185029Spjd */ 1853185029Spjd 1854185029Spjd vattr.va_mask = AT_UID; 1855185029Spjd 1856196662Spjd vn_lock(mvp, LK_SHARED | LK_RETRY); 1857212694Smm if (VOP_GETATTR(mvp, &vattr, cr)) { 1858196662Spjd VOP_UNLOCK(mvp, 0); 1859185029Spjd goto out; 1860185029Spjd } 1861185029Spjd 1862185029Spjd if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1863185029Spjd VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1864196662Spjd VOP_UNLOCK(mvp, 0); 1865185029Spjd goto out; 1866185029Spjd } 1867196662Spjd VOP_UNLOCK(mvp, 0); 1868196944Spjd } 1869185029Spjd 1870196944Spjd secpolicy_fs_mount_clearopts(cr, vfsp); 1871185029Spjd } 1872185029Spjd 1873185029Spjd /* 1874185029Spjd * Refuse to mount a filesystem if we are in a local zone and the 1875185029Spjd * dataset is not visible. 1876185029Spjd */ 1877185029Spjd if (!INGLOBALZONE(curthread) && 1878185029Spjd (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1879249195Smm error = SET_ERROR(EPERM); 1880185029Spjd goto out; 1881185029Spjd } 1882185029Spjd 1883219089Spjd#ifdef SECLABEL 1884219089Spjd error = zfs_mount_label_policy(vfsp, osname); 1885219089Spjd if (error) 1886219089Spjd goto out; 1887219089Spjd#endif 1888219089Spjd 1889218386Strasz vfsp->vfs_flag |= MNT_NFS4ACLS; 1890218386Strasz 1891185029Spjd /* 1892168404Spjd * When doing a remount, we simply refresh our temporary properties 1893168404Spjd * according to those options set in the current VFS options. 1894168404Spjd */ 1895185029Spjd if (vfsp->vfs_flag & MS_REMOUNT) { 1896285021Savg zfsvfs_t *zfsvfs = vfsp->vfs_data; 1897285021Savg 1898285021Savg /* 1899285021Savg * Refresh mount options with z_teardown_lock blocking I/O while 1900285021Savg * the filesystem is in an inconsistent state. 1901285021Savg * The lock also serializes this code with filesystem 1902285021Savg * manipulations between entry to zfs_suspend_fs() and return 1903285021Savg * from zfs_resume_fs(). 1904285021Savg */ 1905285021Savg rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1906285021Savg zfs_unregister_callbacks(zfsvfs); 1907185029Spjd error = zfs_register_callbacks(vfsp); 1908285021Savg rrm_exit(&zfsvfs->z_teardown_lock, FTAG); 1909185029Spjd goto out; 1910185029Spjd } 1911168404Spjd 1912241286Savg /* Initial root mount: try hard to import the requested root pool. */ 1913241286Savg if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 && 1914241286Savg (vfsp->vfs_flag & MNT_UPDATE) == 0) { 1915241286Savg char pname[MAXNAMELEN]; 1916241286Savg 1917241286Savg error = getpoolname(osname, pname); 1918242567Savg if (error == 0) 1919242567Savg error = spa_import_rootpool(pname); 1920241286Savg if (error) 1921241286Savg goto out; 1922241286Savg } 1923168510Spjd DROP_GIANT(); 1924185029Spjd error = zfs_domount(vfsp, osname); 1925168510Spjd PICKUP_GIANT(); 1926209962Smm 1927277300Ssmh#ifdef illumos 1928209962Smm /* 1929209962Smm * Add an extra VFS_HOLD on our parent vfs so that it can't 1930209962Smm * disappear due to a forced unmount. 1931209962Smm */ 1932209962Smm if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) 1933209962Smm VFS_HOLD(mvp->v_vfsp); 1934277300Ssmh#endif 1935209962Smm 1936185029Spjdout: 1937168510Spjd return (error); 1938168404Spjd} 1939168404Spjd 1940168404Spjdstatic int 1941191990Sattiliozfs_statfs(vfs_t *vfsp, struct statfs *statp) 1942169170Spjd{ 1943168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1944168404Spjd uint64_t refdbytes, availbytes, usedobjs, availobjs; 1945168404Spjd 1946168404Spjd statp->f_version = STATFS_VERSION; 1947168404Spjd 1948168404Spjd ZFS_ENTER(zfsvfs); 1949168404Spjd 1950168404Spjd dmu_objset_space(zfsvfs->z_os, 1951168404Spjd &refdbytes, &availbytes, &usedobjs, &availobjs); 1952168404Spjd 1953168404Spjd /* 1954168404Spjd * The underlying storage pool actually uses multiple block sizes. 1955168404Spjd * We report the fragsize as the smallest block size we support, 1956168404Spjd * and we report our blocksize as the filesystem's maximum blocksize. 1957168404Spjd */ 1958204101Spjd statp->f_bsize = SPA_MINBLOCKSIZE; 1959204101Spjd statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1960168404Spjd 1961168404Spjd /* 1962168404Spjd * The following report "total" blocks of various kinds in the 1963168404Spjd * file system, but reported in terms of f_frsize - the 1964168404Spjd * "fragment" size. 1965168404Spjd */ 1966168404Spjd 1967204101Spjd statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1968168404Spjd statp->f_bfree = availbytes / statp->f_bsize; 1969168404Spjd statp->f_bavail = statp->f_bfree; /* no root reservation */ 1970168404Spjd 1971168404Spjd /* 1972168404Spjd * statvfs() should really be called statufs(), because it assumes 1973168404Spjd * static metadata. ZFS doesn't preallocate files, so the best 1974168404Spjd * we can do is report the max that could possibly fit in f_files, 1975168404Spjd * and that minus the number actually used in f_ffree. 1976168404Spjd * For f_ffree, report the smaller of the number of object available 1977168404Spjd * and the number of blocks (each object will take at least a block). 1978168404Spjd */ 1979168404Spjd statp->f_ffree = MIN(availobjs, statp->f_bfree); 1980168404Spjd statp->f_files = statp->f_ffree + usedobjs; 1981168404Spjd 1982168404Spjd /* 1983168404Spjd * We're a zfs filesystem. 1984168404Spjd */ 1985168404Spjd (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 1986168404Spjd 1987168404Spjd strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1988168404Spjd sizeof(statp->f_mntfromname)); 1989168404Spjd strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1990168404Spjd sizeof(statp->f_mntonname)); 1991168404Spjd 1992307108Smav statp->f_namemax = MAXNAMELEN - 1; 1993168404Spjd 1994168404Spjd ZFS_EXIT(zfsvfs); 1995168404Spjd return (0); 1996168404Spjd} 1997168404Spjd 1998168404Spjdstatic int 1999191990Sattiliozfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 2000168404Spjd{ 2001168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 2002168404Spjd znode_t *rootzp; 2003168404Spjd int error; 2004168404Spjd 2005282475Savg ZFS_ENTER(zfsvfs); 2006168404Spjd 2007168404Spjd error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 2008219089Spjd if (error == 0) 2009219089Spjd *vpp = ZTOV(rootzp); 2010206667Spjd 2011206667Spjd ZFS_EXIT(zfsvfs); 2012206667Spjd 2013301273Savg if (error == 0) { 2014254711Savg error = vn_lock(*vpp, flags); 2015301273Savg if (error != 0) { 2016301273Savg VN_RELE(*vpp); 2017301273Savg *vpp = NULL; 2018301273Savg } 2019301273Savg } 2020168404Spjd return (error); 2021168404Spjd} 2022168404Spjd 2023185029Spjd/* 2024185029Spjd * Teardown the zfsvfs::z_os. 2025185029Spjd * 2026331384Smav * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock' 2027185029Spjd * and 'z_teardown_inactive_lock' held. 2028185029Spjd */ 2029185029Spjdstatic int 2030185029Spjdzfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 2031185029Spjd{ 2032185029Spjd znode_t *zp; 2033185029Spjd 2034268865Sdelphij rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 2035185029Spjd 2036185029Spjd if (!unmounting) { 2037185029Spjd /* 2038185029Spjd * We purge the parent filesystem's vfsp as the parent 2039185029Spjd * filesystem and all of its snapshots have their vnode's 2040185029Spjd * v_vfsp set to the parent's filesystem's vfsp. Note, 2041185029Spjd * 'z_parent' is self referential for non-snapshots. 2042185029Spjd */ 2043185029Spjd (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 2044197351Spjd#ifdef FREEBSD_NAMECACHE 2045310959Smjg cache_purgevfs(zfsvfs->z_parent->z_vfs, true); 2046197351Spjd#endif 2047185029Spjd } 2048185029Spjd 2049185029Spjd /* 2050185029Spjd * Close the zil. NB: Can't close the zil while zfs_inactive 2051185029Spjd * threads are blocked as zil_close can call zfs_inactive. 2052185029Spjd */ 2053185029Spjd if (zfsvfs->z_log) { 2054185029Spjd zil_close(zfsvfs->z_log); 2055185029Spjd zfsvfs->z_log = NULL; 2056185029Spjd } 2057185029Spjd 2058185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 2059185029Spjd 2060185029Spjd /* 2061185029Spjd * If we are not unmounting (ie: online recv) and someone already 2062185029Spjd * unmounted this file system while we were doing the switcheroo, 2063185029Spjd * or a reopen of z_os failed then just bail out now. 2064185029Spjd */ 2065185029Spjd if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 2066185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 2067268865Sdelphij rrm_exit(&zfsvfs->z_teardown_lock, FTAG); 2068249195Smm return (SET_ERROR(EIO)); 2069185029Spjd } 2070185029Spjd 2071185029Spjd /* 2072185029Spjd * At this point there are no vops active, and any new vops will 2073185029Spjd * fail with EIO since we have z_teardown_lock for writer (only 2074185029Spjd * relavent for forced unmount). 2075185029Spjd * 2076185029Spjd * Release all holds on dbufs. 2077185029Spjd */ 2078185029Spjd mutex_enter(&zfsvfs->z_znodes_lock); 2079185029Spjd for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 2080185029Spjd zp = list_next(&zfsvfs->z_all_znodes, zp)) 2081219089Spjd if (zp->z_sa_hdl) { 2082196297Spjd ASSERT(ZTOV(zp)->v_count >= 0); 2083185029Spjd zfs_znode_dmu_fini(zp); 2084185029Spjd } 2085185029Spjd mutex_exit(&zfsvfs->z_znodes_lock); 2086185029Spjd 2087185029Spjd /* 2088185029Spjd * If we are unmounting, set the unmounted flag and let new vops 2089185029Spjd * unblock. zfs_inactive will have the unmounted behavior, and all 2090185029Spjd * other vops will fail with EIO. 2091185029Spjd */ 2092185029Spjd if (unmounting) { 2093185029Spjd zfsvfs->z_unmounted = B_TRUE; 2094331384Smav rw_exit(&zfsvfs->z_teardown_inactive_lock); 2095268865Sdelphij rrm_exit(&zfsvfs->z_teardown_lock, FTAG); 2096185029Spjd } 2097185029Spjd 2098185029Spjd /* 2099185029Spjd * z_os will be NULL if there was an error in attempting to reopen 2100185029Spjd * zfsvfs, so just return as the properties had already been 2101185029Spjd * unregistered and cached data had been evicted before. 2102185029Spjd */ 2103185029Spjd if (zfsvfs->z_os == NULL) 2104185029Spjd return (0); 2105185029Spjd 2106185029Spjd /* 2107185029Spjd * Unregister properties. 2108185029Spjd */ 2109185029Spjd zfs_unregister_callbacks(zfsvfs); 2110185029Spjd 2111185029Spjd /* 2112185029Spjd * Evict cached data 2113185029Spjd */ 2114239774Smm if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) && 2115239774Smm !(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)) 2116239774Smm txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 2117248571Smm dmu_objset_evict_dbufs(zfsvfs->z_os); 2118185029Spjd 2119185029Spjd return (0); 2120185029Spjd} 2121185029Spjd 2122168404Spjd/*ARGSUSED*/ 2123168404Spjdstatic int 2124191990Sattiliozfs_umount(vfs_t *vfsp, int fflag) 2125168404Spjd{ 2126209962Smm kthread_t *td = curthread; 2127168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 2128185029Spjd objset_t *os; 2129209962Smm cred_t *cr = td->td_ucred; 2130168404Spjd int ret; 2131168404Spjd 2132185029Spjd ret = secpolicy_fs_unmount(cr, vfsp); 2133185029Spjd if (ret) { 2134212694Smm if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 2135212694Smm ZFS_DELEG_PERM_MOUNT, cr)) 2136185029Spjd return (ret); 2137185029Spjd } 2138219089Spjd 2139185029Spjd /* 2140185029Spjd * We purge the parent filesystem's vfsp as the parent filesystem 2141185029Spjd * and all of its snapshots have their vnode's v_vfsp set to the 2142185029Spjd * parent's filesystem's vfsp. Note, 'z_parent' is self 2143185029Spjd * referential for non-snapshots. 2144185029Spjd */ 2145185029Spjd (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 2146168404Spjd 2147168404Spjd /* 2148168404Spjd * Unmount any snapshots mounted under .zfs before unmounting the 2149168404Spjd * dataset itself. 2150168404Spjd */ 2151169170Spjd if (zfsvfs->z_ctldir != NULL) { 2152168404Spjd if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 2153168404Spjd return (ret); 2154168404Spjd } 2155168404Spjd 2156197459Spjd if (fflag & MS_FORCE) { 2157197459Spjd /* 2158197459Spjd * Mark file system as unmounted before calling 2159197459Spjd * vflush(FORCECLOSE). This way we ensure no future vnops 2160197459Spjd * will be called and risk operating on DOOMED vnodes. 2161197459Spjd */ 2162268865Sdelphij rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 2163197459Spjd zfsvfs->z_unmounted = B_TRUE; 2164268865Sdelphij rrm_exit(&zfsvfs->z_teardown_lock, FTAG); 2165197459Spjd } 2166197459Spjd 2167168404Spjd /* 2168168404Spjd * Flush all the files. 2169168404Spjd */ 2170282475Savg ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 2171315842Savg if (ret != 0) 2172168404Spjd return (ret); 2173168404Spjd 2174277300Ssmh#ifdef illumos 2175185029Spjd if (!(fflag & MS_FORCE)) { 2176185029Spjd /* 2177185029Spjd * Check the number of active vnodes in the file system. 2178185029Spjd * Our count is maintained in the vfs structure, but the 2179185029Spjd * number is off by 1 to indicate a hold on the vfs 2180185029Spjd * structure itself. 2181185029Spjd * 2182185029Spjd * The '.zfs' directory maintains a reference of its 2183185029Spjd * own, and any active references underneath are 2184185029Spjd * reflected in the vnode count. 2185185029Spjd */ 2186185029Spjd if (zfsvfs->z_ctldir == NULL) { 2187185029Spjd if (vfsp->vfs_count > 1) 2188249195Smm return (SET_ERROR(EBUSY)); 2189185029Spjd } else { 2190185029Spjd if (vfsp->vfs_count > 2 || 2191185029Spjd zfsvfs->z_ctldir->v_count > 1) 2192249195Smm return (SET_ERROR(EBUSY)); 2193185029Spjd } 2194185029Spjd } 2195248653Swill#endif 2196168404Spjd 2197338975Smav while (taskqueue_cancel(zfsvfs_taskq->tq_queue, 2198338975Smav &zfsvfs->z_unlinked_drain_task, NULL) != 0) 2199338975Smav taskqueue_drain(zfsvfs_taskq->tq_queue, 2200338975Smav &zfsvfs->z_unlinked_drain_task); 2201338975Smav 2202185029Spjd VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 2203185029Spjd os = zfsvfs->z_os; 2204185029Spjd 2205185029Spjd /* 2206185029Spjd * z_os will be NULL if there was an error in 2207185029Spjd * attempting to reopen zfsvfs. 2208185029Spjd */ 2209185029Spjd if (os != NULL) { 2210168404Spjd /* 2211185029Spjd * Unset the objset user_ptr. 2212168404Spjd */ 2213219089Spjd mutex_enter(&os->os_user_ptr_lock); 2214185029Spjd dmu_objset_set_user(os, NULL); 2215219089Spjd mutex_exit(&os->os_user_ptr_lock); 2216185029Spjd 2217185029Spjd /* 2218185029Spjd * Finally release the objset 2219185029Spjd */ 2220219089Spjd dmu_objset_disown(os, zfsvfs); 2221168404Spjd } 2222168404Spjd 2223185029Spjd /* 2224185029Spjd * We can now safely destroy the '.zfs' directory node. 2225185029Spjd */ 2226185029Spjd if (zfsvfs->z_ctldir != NULL) 2227185029Spjd zfsctl_destroy(zfsvfs); 2228168404Spjd zfs_freevfs(vfsp); 2229168404Spjd 2230168404Spjd return (0); 2231168404Spjd} 2232168404Spjd 2233168404Spjdstatic int 2234168404Spjdzfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 2235168404Spjd{ 2236168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 2237168404Spjd znode_t *zp; 2238168404Spjd int err; 2239168404Spjd 2240197167Spjd /* 2241215397Savg * zfs_zget() can't operate on virtual entries like .zfs/ or 2242211855Spjd * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 2243211855Spjd * This will make NFS to switch to LOOKUP instead of using VGET. 2244197167Spjd */ 2245246532Savg if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR || 2246246532Savg (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir)) 2247197167Spjd return (EOPNOTSUPP); 2248197167Spjd 2249168404Spjd ZFS_ENTER(zfsvfs); 2250168404Spjd err = zfs_zget(zfsvfs, ino, &zp); 2251168404Spjd if (err == 0 && zp->z_unlinked) { 2252303970Savg vrele(ZTOV(zp)); 2253168404Spjd err = EINVAL; 2254168404Spjd } 2255219089Spjd if (err == 0) 2256219089Spjd *vpp = ZTOV(zp); 2257206667Spjd ZFS_EXIT(zfsvfs); 2258351807Savg if (err == 0) { 2259254711Savg err = vn_lock(*vpp, flags); 2260351807Savg if (err != 0) 2261351807Savg vrele(*vpp); 2262351807Savg } 2263168404Spjd if (err != 0) 2264168404Spjd *vpp = NULL; 2265171063Sdfr return (err); 2266168404Spjd} 2267168404Spjd 2268168404Spjdstatic int 2269196982Spjdzfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 2270196982Spjd struct ucred **credanonp, int *numsecflavors, int **secflavors) 2271196982Spjd{ 2272196982Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 2273196982Spjd 2274196982Spjd /* 2275196982Spjd * If this is regular file system vfsp is the same as 2276196982Spjd * zfsvfs->z_parent->z_vfs, but if it is snapshot, 2277196982Spjd * zfsvfs->z_parent->z_vfs represents parent file system 2278196982Spjd * which we have to use here, because only this file system 2279196982Spjd * has mnt_export configured. 2280196982Spjd */ 2281196982Spjd return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 2282196982Spjd credanonp, numsecflavors, secflavors)); 2283196982Spjd} 2284196982Spjd 2285197151SpjdCTASSERT(SHORT_FID_LEN <= sizeof(struct fid)); 2286197151SpjdCTASSERT(LONG_FID_LEN <= sizeof(struct fid)); 2287196982Spjd 2288196982Spjdstatic int 2289222167Srmacklemzfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) 2290168404Spjd{ 2291315842Savg struct componentname cn; 2292168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 2293168404Spjd znode_t *zp; 2294315842Savg vnode_t *dvp; 2295168404Spjd uint64_t object = 0; 2296168404Spjd uint64_t fid_gen = 0; 2297168404Spjd uint64_t gen_mask; 2298168404Spjd uint64_t zp_gen; 2299219089Spjd int i, err; 2300168404Spjd 2301168404Spjd *vpp = NULL; 2302168404Spjd 2303168404Spjd ZFS_ENTER(zfsvfs); 2304168404Spjd 2305196979Spjd /* 2306197177Spjd * On FreeBSD we can get snapshot's mount point or its parent file 2307197177Spjd * system mount point depending if snapshot is already mounted or not. 2308196979Spjd */ 2309197177Spjd if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 2310168404Spjd zfid_long_t *zlfid = (zfid_long_t *)fidp; 2311168404Spjd uint64_t objsetid = 0; 2312168404Spjd uint64_t setgen = 0; 2313168404Spjd 2314168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 2315168404Spjd objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 2316168404Spjd 2317168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 2318168404Spjd setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 2319168404Spjd 2320168404Spjd ZFS_EXIT(zfsvfs); 2321168404Spjd 2322168404Spjd err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 2323168404Spjd if (err) 2324249195Smm return (SET_ERROR(EINVAL)); 2325168404Spjd ZFS_ENTER(zfsvfs); 2326168404Spjd } 2327168404Spjd 2328168404Spjd if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 2329168404Spjd zfid_short_t *zfid = (zfid_short_t *)fidp; 2330168404Spjd 2331168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 2332168404Spjd object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 2333168404Spjd 2334168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 2335168404Spjd fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 2336168404Spjd } else { 2337168404Spjd ZFS_EXIT(zfsvfs); 2338249195Smm return (SET_ERROR(EINVAL)); 2339168404Spjd } 2340168404Spjd 2341246532Savg /* 2342246532Savg * A zero fid_gen means we are in .zfs or the .zfs/snapshot 2343246532Savg * directory tree. If the object == zfsvfs->z_shares_dir, then 2344246532Savg * we are in the .zfs/shares directory tree. 2345246532Savg */ 2346246532Savg if ((fid_gen == 0 && 2347246532Savg (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) || 2348246532Savg (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) { 2349315842Savg ZFS_EXIT(zfsvfs); 2350315842Savg VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp)); 2351168404Spjd if (object == ZFSCTL_INO_SNAPDIR) { 2352315842Savg cn.cn_nameptr = "snapshot"; 2353315842Savg cn.cn_namelen = strlen(cn.cn_nameptr); 2354315842Savg cn.cn_nameiop = LOOKUP; 2355315842Savg cn.cn_flags = ISLASTCN | LOCKLEAF; 2356315842Savg cn.cn_lkflags = flags; 2357315842Savg VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 2358315842Savg vput(dvp); 2359246532Savg } else if (object == zfsvfs->z_shares_dir) { 2360315842Savg /* 2361315842Savg * XXX This branch must not be taken, 2362315842Savg * if it is, then the lookup below will 2363315842Savg * explode. 2364315842Savg */ 2365315842Savg cn.cn_nameptr = "shares"; 2366315842Savg cn.cn_namelen = strlen(cn.cn_nameptr); 2367315842Savg cn.cn_nameiop = LOOKUP; 2368315842Savg cn.cn_flags = ISLASTCN; 2369315842Savg cn.cn_lkflags = flags; 2370315842Savg VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 2371315842Savg vput(dvp); 2372168404Spjd } else { 2373315842Savg *vpp = dvp; 2374168404Spjd } 2375219089Spjd return (err); 2376168404Spjd } 2377168404Spjd 2378168404Spjd gen_mask = -1ULL >> (64 - 8 * i); 2379168404Spjd 2380168404Spjd dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 2381168404Spjd if (err = zfs_zget(zfsvfs, object, &zp)) { 2382168404Spjd ZFS_EXIT(zfsvfs); 2383168404Spjd return (err); 2384168404Spjd } 2385219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, 2386219089Spjd sizeof (uint64_t)); 2387219089Spjd zp_gen = zp_gen & gen_mask; 2388168404Spjd if (zp_gen == 0) 2389168404Spjd zp_gen = 1; 2390168404Spjd if (zp->z_unlinked || zp_gen != fid_gen) { 2391168404Spjd dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 2392303970Savg vrele(ZTOV(zp)); 2393168404Spjd ZFS_EXIT(zfsvfs); 2394249195Smm return (SET_ERROR(EINVAL)); 2395168404Spjd } 2396168404Spjd 2397219089Spjd *vpp = ZTOV(zp); 2398206667Spjd ZFS_EXIT(zfsvfs); 2399315842Savg err = vn_lock(*vpp, flags); 2400219089Spjd if (err == 0) 2401219089Spjd vnode_create_vobject(*vpp, zp->z_size, curthread); 2402219089Spjd else 2403219089Spjd *vpp = NULL; 2404219089Spjd return (err); 2405168404Spjd} 2406168404Spjd 2407185029Spjd/* 2408185029Spjd * Block out VOPs and close zfsvfs_t::z_os 2409185029Spjd * 2410185029Spjd * Note, if successful, then we return with the 'z_teardown_lock' and 2411253816Sdelphij * 'z_teardown_inactive_lock' write held. We leave ownership of the underlying 2412253816Sdelphij * dataset and objset intact so that they can be atomically handed off during 2413253816Sdelphij * a subsequent rollback or recv operation and the resume thereafter. 2414185029Spjd */ 2415185029Spjdint 2416219089Spjdzfs_suspend_fs(zfsvfs_t *zfsvfs) 2417168404Spjd{ 2418185029Spjd int error; 2419168404Spjd 2420185029Spjd if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 2421185029Spjd return (error); 2422168404Spjd 2423185029Spjd return (0); 2424185029Spjd} 2425168404Spjd 2426185029Spjd/* 2427253816Sdelphij * Rebuild SA and release VOPs. Note that ownership of the underlying dataset 2428253816Sdelphij * is an invariant across any of the operations that can be performed while the 2429253816Sdelphij * filesystem was suspended. Whether it succeeded or failed, the preconditions 2430253816Sdelphij * are the same: the relevant objset and associated dataset are owned by 2431253816Sdelphij * zfsvfs, held, and long held on entry. 2432185029Spjd */ 2433185029Spjdint 2434310509Savgzfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 2435185029Spjd{ 2436185029Spjd int err; 2437253816Sdelphij znode_t *zp; 2438168404Spjd 2439268865Sdelphij ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock)); 2440185029Spjd ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 2441185029Spjd 2442253816Sdelphij /* 2443310509Savg * We already own this, so just update the objset_t, as the one we 2444310509Savg * had before may have been evicted. 2445253816Sdelphij */ 2446303969Savg objset_t *os; 2447310509Savg VERIFY3P(ds->ds_owner, ==, zfsvfs); 2448310509Savg VERIFY(dsl_dataset_long_held(ds)); 2449310509Savg VERIFY0(dmu_objset_from_ds(ds, &os)); 2450185029Spjd 2451303969Savg err = zfsvfs_init(zfsvfs, os); 2452303969Savg if (err != 0) 2453253816Sdelphij goto bail; 2454219089Spjd 2455253816Sdelphij VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 2456185029Spjd 2457253816Sdelphij zfs_set_fuid_feature(zfsvfs); 2458219089Spjd 2459253816Sdelphij /* 2460253816Sdelphij * Attempt to re-establish all the active znodes with 2461253816Sdelphij * their dbufs. If a zfs_rezget() fails, then we'll let 2462253816Sdelphij * any potential callers discover that via ZFS_ENTER_VERIFY_VP 2463253816Sdelphij * when they try to use their znode. 2464253816Sdelphij */ 2465253816Sdelphij mutex_enter(&zfsvfs->z_znodes_lock); 2466253816Sdelphij for (zp = list_head(&zfsvfs->z_all_znodes); zp; 2467253816Sdelphij zp = list_next(&zfsvfs->z_all_znodes, zp)) { 2468253816Sdelphij (void) zfs_rezget(zp); 2469168404Spjd } 2470253816Sdelphij mutex_exit(&zfsvfs->z_znodes_lock); 2471168404Spjd 2472219089Spjdbail: 2473185029Spjd /* release the VOPs */ 2474185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 2475268865Sdelphij rrm_exit(&zfsvfs->z_teardown_lock, FTAG); 2476185029Spjd 2477185029Spjd if (err) { 2478185029Spjd /* 2479253816Sdelphij * Since we couldn't setup the sa framework, try to force 2480253816Sdelphij * unmount this file system. 2481185029Spjd */ 2482283629Skib if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) { 2483283602Skib vfs_ref(zfsvfs->z_vfs); 2484185029Spjd (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 2485283629Skib } 2486168404Spjd } 2487185029Spjd return (err); 2488168404Spjd} 2489168404Spjd 2490168404Spjdstatic void 2491168404Spjdzfs_freevfs(vfs_t *vfsp) 2492168404Spjd{ 2493168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 2494168404Spjd 2495277300Ssmh#ifdef illumos 2496209962Smm /* 2497209962Smm * If this is a snapshot, we have an extra VFS_HOLD on our parent 2498219089Spjd * from zfs_mount(). Release it here. If we came through 2499219089Spjd * zfs_mountroot() instead, we didn't grab an extra hold, so 2500219089Spjd * skip the VFS_RELE for rootvfs. 2501209962Smm */ 2502219089Spjd if (zfsvfs->z_issnap && (vfsp != rootvfs)) 2503209962Smm VFS_RELE(zfsvfs->z_parent->z_vfs); 2504277300Ssmh#endif 2505168404Spjd 2506209962Smm zfsvfs_free(zfsvfs); 2507185029Spjd 2508270247Sdelphij atomic_dec_32(&zfs_active_fs_count); 2509168404Spjd} 2510168404Spjd 2511172135Spjd#ifdef __i386__ 2512172135Spjdstatic int desiredvnodes_backup; 2513172135Spjd#endif 2514172135Spjd 2515172135Spjdstatic void 2516172135Spjdzfs_vnodes_adjust(void) 2517172135Spjd{ 2518172135Spjd#ifdef __i386__ 2519185029Spjd int newdesiredvnodes; 2520172135Spjd 2521172135Spjd desiredvnodes_backup = desiredvnodes; 2522172135Spjd 2523172135Spjd /* 2524172135Spjd * We calculate newdesiredvnodes the same way it is done in 2525172135Spjd * vntblinit(). If it is equal to desiredvnodes, it means that 2526172135Spjd * it wasn't tuned by the administrator and we can tune it down. 2527172135Spjd */ 2528263620Sbdrewery newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 * 2529185029Spjd vm_kmem_size / (5 * (sizeof(struct vm_object) + 2530185029Spjd sizeof(struct vnode)))); 2531185029Spjd if (newdesiredvnodes == desiredvnodes) 2532185029Spjd desiredvnodes = (3 * newdesiredvnodes) / 4; 2533172135Spjd#endif 2534172135Spjd} 2535172135Spjd 2536172135Spjdstatic void 2537172135Spjdzfs_vnodes_adjust_back(void) 2538172135Spjd{ 2539172135Spjd 2540172135Spjd#ifdef __i386__ 2541172135Spjd desiredvnodes = desiredvnodes_backup; 2542172135Spjd#endif 2543172135Spjd} 2544172135Spjd 2545168404Spjdvoid 2546168404Spjdzfs_init(void) 2547168404Spjd{ 2548168404Spjd 2549236884Smm printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n"); 2550168404Spjd 2551168404Spjd /* 2552219089Spjd * Initialize .zfs directory structures 2553168404Spjd */ 2554219089Spjd zfsctl_init(); 2555168404Spjd 2556168404Spjd /* 2557219089Spjd * Initialize znode cache, vnode ops, etc... 2558168404Spjd */ 2559219089Spjd zfs_znode_init(); 2560172135Spjd 2561172135Spjd /* 2562219089Spjd * Reduce number of vnodes. Originally number of vnodes is calculated 2563172135Spjd * with UFS inode in mind. We reduce it here, because it's too big for 2564172135Spjd * ZFS/i386. 2565172135Spjd */ 2566172135Spjd zfs_vnodes_adjust(); 2567209962Smm 2568209962Smm dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); 2569338975Smav#if defined(__FreeBSD__) 2570338975Smav zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); 2571338975Smav#endif 2572168404Spjd} 2573168404Spjd 2574168404Spjdvoid 2575168404Spjdzfs_fini(void) 2576168404Spjd{ 2577338975Smav#if defined(__FreeBSD__) 2578338975Smav taskq_destroy(zfsvfs_taskq); 2579338975Smav#endif 2580168404Spjd zfsctl_fini(); 2581168404Spjd zfs_znode_fini(); 2582172135Spjd zfs_vnodes_adjust_back(); 2583168404Spjd} 2584168404Spjd 2585168404Spjdint 2586168404Spjdzfs_busy(void) 2587168404Spjd{ 2588168404Spjd return (zfs_active_fs_count != 0); 2589168404Spjd} 2590185029Spjd 2591185029Spjdint 2592209962Smmzfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 2593185029Spjd{ 2594185029Spjd int error; 2595209962Smm objset_t *os = zfsvfs->z_os; 2596185029Spjd dmu_tx_t *tx; 2597185029Spjd 2598185029Spjd if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 2599249195Smm return (SET_ERROR(EINVAL)); 2600185029Spjd 2601209962Smm if (newvers < zfsvfs->z_version) 2602249195Smm return (SET_ERROR(EINVAL)); 2603185029Spjd 2604219089Spjd if (zfs_spa_version_map(newvers) > 2605219089Spjd spa_version(dmu_objset_spa(zfsvfs->z_os))) 2606249195Smm return (SET_ERROR(ENOTSUP)); 2607219089Spjd 2608185029Spjd tx = dmu_tx_create(os); 2609209962Smm dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 2610219089Spjd if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2611219089Spjd dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 2612219089Spjd ZFS_SA_ATTRS); 2613219089Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2614219089Spjd } 2615185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2616185029Spjd if (error) { 2617185029Spjd dmu_tx_abort(tx); 2618209962Smm return (error); 2619185029Spjd } 2620219089Spjd 2621209962Smm error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 2622209962Smm 8, 1, &newvers, tx); 2623185029Spjd 2624209962Smm if (error) { 2625209962Smm dmu_tx_commit(tx); 2626209962Smm return (error); 2627209962Smm } 2628209962Smm 2629219089Spjd if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2630219089Spjd uint64_t sa_obj; 2631219089Spjd 2632219089Spjd ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, 2633219089Spjd SPA_VERSION_SA); 2634219089Spjd sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 2635219089Spjd DMU_OT_NONE, 0, tx); 2636219089Spjd 2637219089Spjd error = zap_add(os, MASTER_NODE_OBJ, 2638219089Spjd ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 2639240415Smm ASSERT0(error); 2640219089Spjd 2641219089Spjd VERIFY(0 == sa_set_sa_object(os, sa_obj)); 2642219089Spjd sa_register_update_callback(os, zfs_sa_upgrade); 2643219089Spjd } 2644219089Spjd 2645248571Smm spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, 2646248571Smm "from %llu to %llu", zfsvfs->z_version, newvers); 2647209962Smm 2648185029Spjd dmu_tx_commit(tx); 2649185029Spjd 2650209962Smm zfsvfs->z_version = newvers; 2651339109Smav os->os_version = newvers; 2652209962Smm 2653219089Spjd zfs_set_fuid_feature(zfsvfs); 2654209962Smm 2655209962Smm return (0); 2656185029Spjd} 2657219089Spjd 2658185029Spjd/* 2659185029Spjd * Read a property stored within the master node. 2660185029Spjd */ 2661185029Spjdint 2662185029Spjdzfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 2663185029Spjd{ 2664339109Smav uint64_t *cached_copy = NULL; 2665185029Spjd 2666185029Spjd /* 2667339109Smav * Figure out where in the objset_t the cached copy would live, if it 2668339109Smav * is available for the requested property. 2669185029Spjd */ 2670339109Smav if (os != NULL) { 2671339109Smav switch (prop) { 2672339109Smav case ZFS_PROP_VERSION: 2673339109Smav cached_copy = &os->os_version; 2674339109Smav break; 2675339109Smav case ZFS_PROP_NORMALIZE: 2676339109Smav cached_copy = &os->os_normalization; 2677339109Smav break; 2678339109Smav case ZFS_PROP_UTF8ONLY: 2679339109Smav cached_copy = &os->os_utf8only; 2680339109Smav break; 2681339109Smav case ZFS_PROP_CASE: 2682339109Smav cached_copy = &os->os_casesensitivity; 2683339109Smav break; 2684339109Smav default: 2685339109Smav break; 2686339109Smav } 2687339109Smav } 2688339109Smav if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { 2689339109Smav *value = *cached_copy; 2690339109Smav return (0); 2691339109Smav } 2692339109Smav 2693339109Smav /* 2694339109Smav * If the property wasn't cached, look up the file system's value for 2695339109Smav * the property. For the version property, we look up a slightly 2696339109Smav * different string. 2697339109Smav */ 2698339109Smav const char *pname; 2699339109Smav int error = ENOENT; 2700339109Smav if (prop == ZFS_PROP_VERSION) { 2701185029Spjd pname = ZPL_VERSION_STR; 2702339109Smav } else { 2703185029Spjd pname = zfs_prop_to_name(prop); 2704339109Smav } 2705185029Spjd 2706321556Smav if (os != NULL) { 2707321556Smav ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); 2708185029Spjd error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 2709321556Smav } 2710185029Spjd 2711185029Spjd if (error == ENOENT) { 2712185029Spjd /* No value set, use the default value */ 2713185029Spjd switch (prop) { 2714185029Spjd case ZFS_PROP_VERSION: 2715185029Spjd *value = ZPL_VERSION; 2716185029Spjd break; 2717185029Spjd case ZFS_PROP_NORMALIZE: 2718185029Spjd case ZFS_PROP_UTF8ONLY: 2719185029Spjd *value = 0; 2720185029Spjd break; 2721185029Spjd case ZFS_PROP_CASE: 2722185029Spjd *value = ZFS_CASE_SENSITIVE; 2723185029Spjd break; 2724185029Spjd default: 2725185029Spjd return (error); 2726185029Spjd } 2727185029Spjd error = 0; 2728185029Spjd } 2729339109Smav 2730339109Smav /* 2731339109Smav * If one of the methods for getting the property value above worked, 2732339109Smav * copy it into the objset_t's cache. 2733339109Smav */ 2734339109Smav if (error == 0 && cached_copy != NULL) { 2735339109Smav *cached_copy = *value; 2736339109Smav } 2737339109Smav 2738185029Spjd return (error); 2739185029Spjd} 2740226676Spjd 2741331384Smav/* 2742331384Smav * Return true if the coresponding vfs's unmounted flag is set. 2743331384Smav * Otherwise return false. 2744331384Smav * If this function returns true we know VFS unmount has been initiated. 2745331384Smav */ 2746331384Smavboolean_t 2747331384Smavzfs_get_vfs_flag_unmounted(objset_t *os) 2748331384Smav{ 2749331384Smav zfsvfs_t *zfvp; 2750331384Smav boolean_t unmounted = B_FALSE; 2751331384Smav 2752331384Smav ASSERT(dmu_objset_type(os) == DMU_OST_ZFS); 2753331384Smav 2754331384Smav mutex_enter(&os->os_user_ptr_lock); 2755331384Smav zfvp = dmu_objset_get_user(os); 2756331384Smav if (zfvp != NULL && zfvp->z_vfs != NULL && 2757331384Smav (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT)) 2758331384Smav unmounted = B_TRUE; 2759331384Smav mutex_exit(&os->os_user_ptr_lock); 2760331384Smav 2761331384Smav return (unmounted); 2762331384Smav} 2763331384Smav 2764226676Spjd#ifdef _KERNEL 2765226676Spjdvoid 2766226676Spjdzfsvfs_update_fromname(const char *oldname, const char *newname) 2767226676Spjd{ 2768226676Spjd char tmpbuf[MAXPATHLEN]; 2769226676Spjd struct mount *mp; 2770226676Spjd char *fromname; 2771226676Spjd size_t oldlen; 2772226676Spjd 2773226676Spjd oldlen = strlen(oldname); 2774226676Spjd 2775226676Spjd mtx_lock(&mountlist_mtx); 2776226676Spjd TAILQ_FOREACH(mp, &mountlist, mnt_list) { 2777226676Spjd fromname = mp->mnt_stat.f_mntfromname; 2778226676Spjd if (strcmp(fromname, oldname) == 0) { 2779226676Spjd (void)strlcpy(fromname, newname, 2780226676Spjd sizeof(mp->mnt_stat.f_mntfromname)); 2781226676Spjd continue; 2782226676Spjd } 2783226676Spjd if (strncmp(fromname, oldname, oldlen) == 0 && 2784226700Spjd (fromname[oldlen] == '/' || fromname[oldlen] == '@')) { 2785226676Spjd (void)snprintf(tmpbuf, sizeof(tmpbuf), "%s%s", 2786226676Spjd newname, fromname + oldlen); 2787226676Spjd (void)strlcpy(fromname, tmpbuf, 2788226676Spjd sizeof(mp->mnt_stat.f_mntfromname)); 2789226676Spjd continue; 2790226676Spjd } 2791226676Spjd } 2792226676Spjd mtx_unlock(&mountlist_mtx); 2793226676Spjd} 2794226676Spjd#endif 2795