zfs_ctldir.c revision 197513
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22185029Spjd * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26168404Spjd#pragma ident "%Z%%M% %I% %E% SMI" 27168404Spjd 28168404Spjd/* 29168404Spjd * ZFS control directory (a.k.a. ".zfs") 30168404Spjd * 31168404Spjd * This directory provides a common location for all ZFS meta-objects. 32168404Spjd * Currently, this is only the 'snapshot' directory, but this may expand in the 33168404Spjd * future. The elements are built using the GFS primitives, as the hierarchy 34168404Spjd * does not actually exist on disk. 35168404Spjd * 36168404Spjd * For 'snapshot', we don't want to have all snapshots always mounted, because 37168404Spjd * this would take up a huge amount of space in /etc/mnttab. We have three 38168404Spjd * types of objects: 39168404Spjd * 40168404Spjd * ctldir ------> snapshotdir -------> snapshot 41168404Spjd * | 42168404Spjd * | 43168404Spjd * V 44168404Spjd * mounted fs 45168404Spjd * 46168404Spjd * The 'snapshot' node contains just enough information to lookup '..' and act 47168404Spjd * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we 48168404Spjd * perform an automount of the underlying filesystem and return the 49168404Spjd * corresponding vnode. 50168404Spjd * 51168404Spjd * All mounts are handled automatically by the kernel, but unmounts are 52168404Spjd * (currently) handled from user land. The main reason is that there is no 53168404Spjd * reliable way to auto-unmount the filesystem when it's "no longer in use". 54168404Spjd * When the user unmounts a filesystem, we call zfsctl_unmount(), which 55168404Spjd * unmounts any snapshots within the snapshot directory. 56185029Spjd * 57185029Spjd * The '.zfs', '.zfs/snapshot', and all directories created under 58185029Spjd * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and 59185029Spjd * share the same vfs_t as the head filesystem (what '.zfs' lives under). 60185029Spjd * 61185029Spjd * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>' 62185029Spjd * (ie: snapshots) are ZFS nodes and have their own unique vfs_t. 63185029Spjd * However, vnodes within these mounted on file systems have their v_vfsp 64185029Spjd * fields set to the head filesystem to make NFS happy (see 65185029Spjd * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t 66185029Spjd * so that it cannot be freed until all snapshots have been unmounted. 67168404Spjd */ 68168404Spjd 69168404Spjd#include <sys/zfs_context.h> 70168404Spjd#include <sys/zfs_ctldir.h> 71168404Spjd#include <sys/zfs_ioctl.h> 72168404Spjd#include <sys/zfs_vfsops.h> 73168404Spjd#include <sys/namei.h> 74168404Spjd#include <sys/gfs.h> 75168404Spjd#include <sys/stat.h> 76168404Spjd#include <sys/dmu.h> 77185029Spjd#include <sys/dsl_deleg.h> 78168404Spjd#include <sys/mount.h> 79185029Spjd#include <sys/sunddi.h> 80168404Spjd 81185029Spjd#include "zfs_namecheck.h" 82185029Spjd 83185029Spjdtypedef struct zfsctl_node { 84185029Spjd gfs_dir_t zc_gfs_private; 85185029Spjd uint64_t zc_id; 86185029Spjd timestruc_t zc_cmtime; /* ctime and mtime, always the same */ 87185029Spjd} zfsctl_node_t; 88185029Spjd 89185029Spjdtypedef struct zfsctl_snapdir { 90185029Spjd zfsctl_node_t sd_node; 91185029Spjd kmutex_t sd_lock; 92185029Spjd avl_tree_t sd_snaps; 93185029Spjd} zfsctl_snapdir_t; 94185029Spjd 95168404Spjdtypedef struct { 96168404Spjd char *se_name; 97168404Spjd vnode_t *se_root; 98168404Spjd avl_node_t se_node; 99168404Spjd} zfs_snapentry_t; 100168404Spjd 101168404Spjdstatic int 102168404Spjdsnapentry_compare(const void *a, const void *b) 103168404Spjd{ 104168404Spjd const zfs_snapentry_t *sa = a; 105168404Spjd const zfs_snapentry_t *sb = b; 106168404Spjd int ret = strcmp(sa->se_name, sb->se_name); 107168404Spjd 108168404Spjd if (ret < 0) 109168404Spjd return (-1); 110168404Spjd else if (ret > 0) 111168404Spjd return (1); 112168404Spjd else 113168404Spjd return (0); 114168404Spjd} 115168404Spjd 116168404Spjdstatic struct vop_vector zfsctl_ops_root; 117168404Spjdstatic struct vop_vector zfsctl_ops_snapdir; 118168404Spjdstatic struct vop_vector zfsctl_ops_snapshot; 119168404Spjd 120168404Spjdstatic vnode_t *zfsctl_mknode_snapdir(vnode_t *); 121168404Spjdstatic vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset); 122185029Spjdstatic int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *); 123168404Spjd 124168404Spjd/* 125168404Spjd * Root directory elements. We have only a single static entry, 'snapshot'. 126168404Spjd */ 127168404Spjdstatic gfs_dirent_t zfsctl_root_entries[] = { 128168404Spjd { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE }, 129168404Spjd { NULL } 130168404Spjd}; 131168404Spjd 132168404Spjd/* include . and .. in the calculation */ 133168404Spjd#define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \ 134168404Spjd sizeof (gfs_dirent_t)) + 1) 135168404Spjd 136168404Spjd 137168404Spjd/* 138168404Spjd * Initialize the various GFS pieces we'll need to create and manipulate .zfs 139168404Spjd * directories. This is called from the ZFS init routine, and initializes the 140168404Spjd * vnode ops vectors that we'll be using. 141168404Spjd */ 142168404Spjdvoid 143168404Spjdzfsctl_init(void) 144168404Spjd{ 145168404Spjd} 146168404Spjd 147168404Spjdvoid 148168404Spjdzfsctl_fini(void) 149168404Spjd{ 150168404Spjd} 151168404Spjd 152168404Spjd/* 153168404Spjd * Return the inode number associated with the 'snapshot' directory. 154168404Spjd */ 155168404Spjd/* ARGSUSED */ 156168404Spjdstatic ino64_t 157168404Spjdzfsctl_root_inode_cb(vnode_t *vp, int index) 158168404Spjd{ 159168404Spjd ASSERT(index == 0); 160168404Spjd return (ZFSCTL_INO_SNAPDIR); 161168404Spjd} 162168404Spjd 163168404Spjd/* 164168404Spjd * Create the '.zfs' directory. This directory is cached as part of the VFS 165168404Spjd * structure. This results in a hold on the vfs_t. The code in zfs_umount() 166168404Spjd * therefore checks against a vfs_count of 2 instead of 1. This reference 167168404Spjd * is removed when the ctldir is destroyed in the unmount. 168168404Spjd */ 169168404Spjdvoid 170168404Spjdzfsctl_create(zfsvfs_t *zfsvfs) 171168404Spjd{ 172168404Spjd vnode_t *vp, *rvp; 173168404Spjd zfsctl_node_t *zcp; 174168404Spjd 175168404Spjd ASSERT(zfsvfs->z_ctldir == NULL); 176168404Spjd 177168404Spjd vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs, 178168404Spjd &zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries, 179168404Spjd zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL); 180168404Spjd zcp = vp->v_data; 181168404Spjd zcp->zc_id = ZFSCTL_INO_ROOT; 182168404Spjd 183191990Sattilio VERIFY(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp) == 0); 184168404Spjd ZFS_TIME_DECODE(&zcp->zc_cmtime, VTOZ(rvp)->z_phys->zp_crtime); 185168404Spjd VN_URELE(rvp); 186168404Spjd 187168404Spjd /* 188168404Spjd * We're only faking the fact that we have a root of a filesystem for 189168404Spjd * the sake of the GFS interfaces. Undo the flag manipulation it did 190168404Spjd * for us. 191168404Spjd */ 192168404Spjd vp->v_vflag &= ~VV_ROOT; 193168404Spjd 194168404Spjd zfsvfs->z_ctldir = vp; 195182781Spjd 196182781Spjd VOP_UNLOCK(vp, 0); 197168404Spjd} 198168404Spjd 199168404Spjd/* 200168404Spjd * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. 201168404Spjd * There might still be more references if we were force unmounted, but only 202168404Spjd * new zfs_inactive() calls can occur and they don't reference .zfs 203168404Spjd */ 204168404Spjdvoid 205168404Spjdzfsctl_destroy(zfsvfs_t *zfsvfs) 206168404Spjd{ 207168404Spjd VN_RELE(zfsvfs->z_ctldir); 208168404Spjd zfsvfs->z_ctldir = NULL; 209168404Spjd} 210168404Spjd 211168404Spjd/* 212168404Spjd * Given a root znode, retrieve the associated .zfs directory. 213168404Spjd * Add a hold to the vnode and return it. 214168404Spjd */ 215168404Spjdvnode_t * 216168404Spjdzfsctl_root(znode_t *zp) 217168404Spjd{ 218168404Spjd ASSERT(zfs_has_ctldir(zp)); 219168404Spjd VN_HOLD(zp->z_zfsvfs->z_ctldir); 220168404Spjd return (zp->z_zfsvfs->z_ctldir); 221168404Spjd} 222168404Spjd 223168404Spjd/* 224168404Spjd * Common open routine. Disallow any write access. 225168404Spjd */ 226168404Spjd/* ARGSUSED */ 227168404Spjdstatic int 228168404Spjdzfsctl_common_open(struct vop_open_args *ap) 229168404Spjd{ 230168404Spjd int flags = ap->a_mode; 231168404Spjd 232168404Spjd if (flags & FWRITE) 233168404Spjd return (EACCES); 234168404Spjd 235168404Spjd return (0); 236168404Spjd} 237168404Spjd 238168404Spjd/* 239168404Spjd * Common close routine. Nothing to do here. 240168404Spjd */ 241168404Spjd/* ARGSUSED */ 242168404Spjdstatic int 243168404Spjdzfsctl_common_close(struct vop_close_args *ap) 244168404Spjd{ 245168404Spjd return (0); 246168404Spjd} 247168404Spjd 248168404Spjd/* 249168404Spjd * Common access routine. Disallow writes. 250168404Spjd */ 251168404Spjd/* ARGSUSED */ 252168404Spjdstatic int 253168404Spjdzfsctl_common_access(ap) 254168404Spjd struct vop_access_args /* { 255168404Spjd struct vnode *a_vp; 256185029Spjd int a_accmode; 257168404Spjd struct ucred *a_cred; 258168404Spjd struct thread *a_td; 259168404Spjd } */ *ap; 260168404Spjd{ 261185029Spjd int mode = ap->a_accmode; 262168404Spjd 263185029Spjd if (mode & VWRITE) 264168404Spjd return (EACCES); 265168404Spjd 266168404Spjd return (0); 267168404Spjd} 268168404Spjd 269168404Spjd/* 270168404Spjd * Common getattr function. Fill in basic information. 271168404Spjd */ 272168404Spjdstatic void 273168404Spjdzfsctl_common_getattr(vnode_t *vp, vattr_t *vap) 274168404Spjd{ 275168404Spjd zfsctl_node_t *zcp = vp->v_data; 276168404Spjd timestruc_t now; 277168404Spjd 278168404Spjd vap->va_uid = 0; 279168404Spjd vap->va_gid = 0; 280168404Spjd vap->va_rdev = 0; 281168404Spjd /* 282168404Spjd * We are a purly virtual object, so we have no 283168404Spjd * blocksize or allocated blocks. 284168404Spjd */ 285168404Spjd vap->va_blksize = 0; 286168404Spjd vap->va_nblocks = 0; 287168404Spjd vap->va_seq = 0; 288168404Spjd vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 289168404Spjd vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | 290168404Spjd S_IROTH | S_IXOTH; 291168404Spjd vap->va_type = VDIR; 292168404Spjd /* 293168404Spjd * We live in the now (for atime). 294168404Spjd */ 295168404Spjd gethrestime(&now); 296168404Spjd vap->va_atime = now; 297168404Spjd vap->va_mtime = vap->va_ctime = vap->va_birthtime = zcp->zc_cmtime; 298168404Spjd /* FreeBSD: Reset chflags(2) flags. */ 299168404Spjd vap->va_flags = 0; 300168404Spjd} 301168404Spjd 302185029Spjd/*ARGSUSED*/ 303168404Spjdstatic int 304168404Spjdzfsctl_common_fid(ap) 305168404Spjd struct vop_fid_args /* { 306168404Spjd struct vnode *a_vp; 307168404Spjd struct fid *a_fid; 308168404Spjd } */ *ap; 309168404Spjd{ 310168404Spjd vnode_t *vp = ap->a_vp; 311168404Spjd fid_t *fidp = (void *)ap->a_fid; 312168404Spjd zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 313168404Spjd zfsctl_node_t *zcp = vp->v_data; 314168404Spjd uint64_t object = zcp->zc_id; 315168404Spjd zfid_short_t *zfid; 316168404Spjd int i; 317168404Spjd 318168404Spjd ZFS_ENTER(zfsvfs); 319168404Spjd 320168404Spjd fidp->fid_len = SHORT_FID_LEN; 321168404Spjd 322168404Spjd zfid = (zfid_short_t *)fidp; 323168404Spjd 324168404Spjd zfid->zf_len = SHORT_FID_LEN; 325168404Spjd 326168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 327168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 328168404Spjd 329168404Spjd /* .zfs znodes always have a generation number of 0 */ 330168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 331168404Spjd zfid->zf_gen[i] = 0; 332168404Spjd 333168404Spjd ZFS_EXIT(zfsvfs); 334168404Spjd return (0); 335168404Spjd} 336168404Spjd 337168404Spjdstatic int 338168404Spjdzfsctl_common_reclaim(ap) 339168404Spjd struct vop_reclaim_args /* { 340168404Spjd struct vnode *a_vp; 341168404Spjd struct thread *a_td; 342168404Spjd } */ *ap; 343168404Spjd{ 344168404Spjd vnode_t *vp = ap->a_vp; 345168404Spjd 346168404Spjd /* 347168404Spjd * Destroy the vm object and flush associated pages. 348168404Spjd */ 349168404Spjd vnode_destroy_vobject(vp); 350168404Spjd VI_LOCK(vp); 351168404Spjd vp->v_data = NULL; 352168404Spjd VI_UNLOCK(vp); 353168404Spjd return (0); 354168404Spjd} 355168404Spjd 356168404Spjd/* 357168404Spjd * .zfs inode namespace 358168404Spjd * 359168404Spjd * We need to generate unique inode numbers for all files and directories 360168404Spjd * within the .zfs pseudo-filesystem. We use the following scheme: 361168404Spjd * 362168404Spjd * ENTRY ZFSCTL_INODE 363168404Spjd * .zfs 1 364168404Spjd * .zfs/snapshot 2 365168404Spjd * .zfs/snapshot/<snap> objectid(snap) 366168404Spjd */ 367168404Spjd 368168404Spjd#define ZFSCTL_INO_SNAP(id) (id) 369168404Spjd 370168404Spjd/* 371168404Spjd * Get root directory attributes. 372168404Spjd */ 373168404Spjd/* ARGSUSED */ 374168404Spjdstatic int 375168404Spjdzfsctl_root_getattr(ap) 376168404Spjd struct vop_getattr_args /* { 377168404Spjd struct vnode *a_vp; 378168404Spjd struct vattr *a_vap; 379168404Spjd struct ucred *a_cred; 380185029Spjd struct thread *a_td; 381168404Spjd } */ *ap; 382168404Spjd{ 383168404Spjd struct vnode *vp = ap->a_vp; 384168404Spjd struct vattr *vap = ap->a_vap; 385168404Spjd zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 386168404Spjd 387168404Spjd ZFS_ENTER(zfsvfs); 388168404Spjd vap->va_nodeid = ZFSCTL_INO_ROOT; 389168404Spjd vap->va_nlink = vap->va_size = NROOT_ENTRIES; 390168404Spjd 391168404Spjd zfsctl_common_getattr(vp, vap); 392168404Spjd ZFS_EXIT(zfsvfs); 393168404Spjd 394168404Spjd return (0); 395168404Spjd} 396168404Spjd 397168404Spjd/* 398168404Spjd * Special case the handling of "..". 399168404Spjd */ 400168404Spjd/* ARGSUSED */ 401168404Spjdint 402168404Spjdzfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, 403185029Spjd int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 404185029Spjd int *direntflags, pathname_t *realpnp) 405168404Spjd{ 406168404Spjd zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 407168404Spjd int err; 408168404Spjd 409185029Spjd /* 410185029Spjd * No extended attributes allowed under .zfs 411185029Spjd */ 412185029Spjd if (flags & LOOKUP_XATTR) 413185029Spjd return (EINVAL); 414185029Spjd 415168404Spjd ZFS_ENTER(zfsvfs); 416168404Spjd 417168404Spjd if (strcmp(nm, "..") == 0) { 418191990Sattilio err = VFS_ROOT(dvp->v_vfsp, LK_EXCLUSIVE, vpp); 419168404Spjd if (err == 0) 420175294Sattilio VOP_UNLOCK(*vpp, 0); 421168404Spjd } else { 422185029Spjd err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir, 423185029Spjd cr, ct, direntflags, realpnp); 424168404Spjd } 425168404Spjd 426168404Spjd ZFS_EXIT(zfsvfs); 427168404Spjd 428168404Spjd return (err); 429168404Spjd} 430168404Spjd 431168404Spjd/* 432168404Spjd * Special case the handling of "..". 433168404Spjd */ 434168404Spjd/* ARGSUSED */ 435168404Spjdint 436185029Spjdzfsctl_freebsd_root_lookup(ap) 437168404Spjd struct vop_lookup_args /* { 438168404Spjd struct vnode *a_dvp; 439168404Spjd struct vnode **a_vpp; 440168404Spjd struct componentname *a_cnp; 441168404Spjd } */ *ap; 442168404Spjd{ 443168404Spjd vnode_t *dvp = ap->a_dvp; 444168404Spjd vnode_t **vpp = ap->a_vpp; 445168404Spjd cred_t *cr = ap->a_cnp->cn_cred; 446168404Spjd int flags = ap->a_cnp->cn_flags; 447168404Spjd int nameiop = ap->a_cnp->cn_nameiop; 448168404Spjd char nm[NAME_MAX + 1]; 449168404Spjd int err; 450168404Spjd 451168404Spjd if ((flags & ISLASTCN) && (nameiop == RENAME || nameiop == CREATE)) 452168404Spjd return (EOPNOTSUPP); 453168404Spjd 454168404Spjd ASSERT(ap->a_cnp->cn_namelen < sizeof(nm)); 455168404Spjd strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1); 456168404Spjd 457185029Spjd err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr, NULL, NULL, NULL); 458168404Spjd if (err == 0 && (nm[0] != '.' || nm[1] != '\0')) 459175202Sattilio vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 460168404Spjd 461168404Spjd return (err); 462168404Spjd} 463168404Spjd 464168404Spjdstatic struct vop_vector zfsctl_ops_root = { 465168404Spjd .vop_default = &default_vnodeops, 466168404Spjd .vop_open = zfsctl_common_open, 467168404Spjd .vop_close = zfsctl_common_close, 468168404Spjd .vop_ioctl = VOP_EINVAL, 469168404Spjd .vop_getattr = zfsctl_root_getattr, 470168404Spjd .vop_access = zfsctl_common_access, 471168404Spjd .vop_readdir = gfs_vop_readdir, 472185029Spjd .vop_lookup = zfsctl_freebsd_root_lookup, 473168404Spjd .vop_inactive = gfs_vop_inactive, 474168404Spjd .vop_reclaim = zfsctl_common_reclaim, 475168404Spjd .vop_fid = zfsctl_common_fid, 476168404Spjd}; 477168404Spjd 478168404Spjdstatic int 479168404Spjdzfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname) 480168404Spjd{ 481168404Spjd objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; 482168404Spjd 483185029Spjd if (snapshot_namecheck(name, NULL, NULL) != 0) 484185029Spjd return (EILSEQ); 485168404Spjd dmu_objset_name(os, zname); 486168404Spjd if (strlen(zname) + 1 + strlen(name) >= len) 487168404Spjd return (ENAMETOOLONG); 488168404Spjd (void) strcat(zname, "@"); 489168404Spjd (void) strcat(zname, name); 490168404Spjd return (0); 491168404Spjd} 492168404Spjd 493168404Spjdstatic int 494185029Spjdzfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr) 495168404Spjd{ 496185029Spjd vnode_t *svp = sep->se_root; 497185029Spjd int error; 498168404Spjd 499185029Spjd ASSERT(vn_ismntpt(svp)); 500168404Spjd 501168404Spjd /* this will be dropped by dounmount() */ 502185029Spjd if ((error = vn_vfswlock(svp)) != 0) 503185029Spjd return (error); 504168404Spjd 505185029Spjd return (dounmount(vn_mountedvfs(svp), fflags, curthread)); 506168404Spjd} 507168404Spjd 508168404Spjd#if 0 509168404Spjdstatic void 510168404Spjdzfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm) 511168404Spjd{ 512168404Spjd avl_index_t where; 513168404Spjd vfs_t *vfsp; 514168404Spjd refstr_t *pathref; 515168404Spjd char newpath[MAXNAMELEN]; 516168404Spjd char *tail; 517168404Spjd 518168404Spjd ASSERT(MUTEX_HELD(&sdp->sd_lock)); 519168404Spjd ASSERT(sep != NULL); 520168404Spjd 521168404Spjd vfsp = vn_mountedvfs(sep->se_root); 522168404Spjd ASSERT(vfsp != NULL); 523168404Spjd 524168404Spjd vfs_lock_wait(vfsp); 525168404Spjd 526168404Spjd /* 527168404Spjd * Change the name in the AVL tree. 528168404Spjd */ 529168404Spjd avl_remove(&sdp->sd_snaps, sep); 530168404Spjd kmem_free(sep->se_name, strlen(sep->se_name) + 1); 531168404Spjd sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); 532168404Spjd (void) strcpy(sep->se_name, nm); 533168404Spjd VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL); 534168404Spjd avl_insert(&sdp->sd_snaps, sep, where); 535168404Spjd 536168404Spjd /* 537168404Spjd * Change the current mountpoint info: 538168404Spjd * - update the tail of the mntpoint path 539168404Spjd * - update the tail of the resource path 540168404Spjd */ 541168404Spjd pathref = vfs_getmntpoint(vfsp); 542168404Spjd (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath)); 543168404Spjd VERIFY((tail = strrchr(newpath, '/')) != NULL); 544168404Spjd *(tail+1) = '\0'; 545168404Spjd ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath)); 546168404Spjd (void) strcat(newpath, nm); 547168404Spjd refstr_rele(pathref); 548168404Spjd vfs_setmntpoint(vfsp, newpath); 549168404Spjd 550168404Spjd pathref = vfs_getresource(vfsp); 551168404Spjd (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath)); 552168404Spjd VERIFY((tail = strrchr(newpath, '@')) != NULL); 553168404Spjd *(tail+1) = '\0'; 554168404Spjd ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath)); 555168404Spjd (void) strcat(newpath, nm); 556168404Spjd refstr_rele(pathref); 557168404Spjd vfs_setresource(vfsp, newpath); 558168404Spjd 559168404Spjd vfs_unlock(vfsp); 560168404Spjd} 561168404Spjd#endif 562168404Spjd 563168404Spjd#if 0 564185029Spjd/*ARGSUSED*/ 565168404Spjdstatic int 566168404Spjdzfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, 567185029Spjd cred_t *cr, caller_context_t *ct, int flags) 568168404Spjd{ 569168404Spjd zfsctl_snapdir_t *sdp = sdvp->v_data; 570168404Spjd zfs_snapentry_t search, *sep; 571185029Spjd zfsvfs_t *zfsvfs; 572168404Spjd avl_index_t where; 573168404Spjd char from[MAXNAMELEN], to[MAXNAMELEN]; 574185029Spjd char real[MAXNAMELEN]; 575168404Spjd int err; 576168404Spjd 577185029Spjd zfsvfs = sdvp->v_vfsp->vfs_data; 578185029Spjd ZFS_ENTER(zfsvfs); 579185029Spjd 580185029Spjd if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 581185029Spjd err = dmu_snapshot_realname(zfsvfs->z_os, snm, real, 582185029Spjd MAXNAMELEN, NULL); 583185029Spjd if (err == 0) { 584185029Spjd snm = real; 585185029Spjd } else if (err != ENOTSUP) { 586185029Spjd ZFS_EXIT(zfsvfs); 587185029Spjd return (err); 588185029Spjd } 589185029Spjd } 590185029Spjd 591185029Spjd ZFS_EXIT(zfsvfs); 592185029Spjd 593168404Spjd err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from); 594185029Spjd if (!err) 595185029Spjd err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to); 596185029Spjd if (!err) 597185029Spjd err = zfs_secpolicy_rename_perms(from, to, cr); 598168404Spjd if (err) 599168404Spjd return (err); 600168404Spjd 601168404Spjd /* 602168404Spjd * Cannot move snapshots out of the snapdir. 603168404Spjd */ 604168404Spjd if (sdvp != tdvp) 605168404Spjd return (EINVAL); 606168404Spjd 607168404Spjd if (strcmp(snm, tnm) == 0) 608168404Spjd return (0); 609168404Spjd 610168404Spjd mutex_enter(&sdp->sd_lock); 611168404Spjd 612168404Spjd search.se_name = (char *)snm; 613168404Spjd if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) { 614168404Spjd mutex_exit(&sdp->sd_lock); 615168404Spjd return (ENOENT); 616168404Spjd } 617168404Spjd 618168676Spjd err = dmu_objset_rename(from, to, B_FALSE); 619168404Spjd if (err == 0) 620168404Spjd zfsctl_rename_snap(sdp, sep, tnm); 621168404Spjd 622168404Spjd mutex_exit(&sdp->sd_lock); 623168404Spjd 624168404Spjd return (err); 625168404Spjd} 626168404Spjd#endif 627168404Spjd 628168404Spjd#if 0 629168404Spjd/* ARGSUSED */ 630168404Spjdstatic int 631185029Spjdzfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 632185029Spjd caller_context_t *ct, int flags) 633168404Spjd{ 634169170Spjd zfsctl_snapdir_t *sdp = dvp->v_data; 635185029Spjd zfs_snapentry_t *sep; 636185029Spjd zfs_snapentry_t search; 637185029Spjd zfsvfs_t *zfsvfs; 638169170Spjd char snapname[MAXNAMELEN]; 639185029Spjd char real[MAXNAMELEN]; 640169170Spjd int err; 641168404Spjd 642185029Spjd zfsvfs = dvp->v_vfsp->vfs_data; 643185029Spjd ZFS_ENTER(zfsvfs); 644185029Spjd 645185029Spjd if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 646185029Spjd 647185029Spjd err = dmu_snapshot_realname(zfsvfs->z_os, name, real, 648185029Spjd MAXNAMELEN, NULL); 649185029Spjd if (err == 0) { 650185029Spjd name = real; 651185029Spjd } else if (err != ENOTSUP) { 652185029Spjd ZFS_EXIT(zfsvfs); 653185029Spjd return (err); 654185029Spjd } 655185029Spjd } 656185029Spjd 657185029Spjd ZFS_EXIT(zfsvfs); 658185029Spjd 659169170Spjd err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname); 660185029Spjd if (!err) 661185029Spjd err = zfs_secpolicy_destroy_perms(snapname, cr); 662169170Spjd if (err) 663169170Spjd return (err); 664168404Spjd 665169170Spjd mutex_enter(&sdp->sd_lock); 666168404Spjd 667185029Spjd search.se_name = name; 668185029Spjd sep = avl_find(&sdp->sd_snaps, &search, NULL); 669185029Spjd if (sep) { 670185029Spjd avl_remove(&sdp->sd_snaps, sep); 671185029Spjd err = zfsctl_unmount_snap(sep, MS_FORCE, cr); 672196954Spjd if (err) { 673196954Spjd avl_index_t where; 674196954Spjd 675196954Spjd if (avl_find(&sdp->sd_snaps, sep, &where) == NULL) 676196954Spjd avl_insert(&sdp->sd_snaps, sep, where); 677196954Spjd } else 678185029Spjd err = dmu_objset_destroy(snapname); 679185029Spjd } else { 680185029Spjd err = ENOENT; 681169170Spjd } 682168404Spjd 683169170Spjd mutex_exit(&sdp->sd_lock); 684168404Spjd 685169170Spjd return (err); 686168404Spjd} 687168404Spjd#endif 688168404Spjd 689168404Spjd/* 690185029Spjd * This creates a snapshot under '.zfs/snapshot'. 691185029Spjd */ 692185029Spjd/* ARGSUSED */ 693185029Spjdstatic int 694185029Spjdzfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, 695185029Spjd cred_t *cr, caller_context_t *cc, int flags, vsecattr_t *vsecp) 696185029Spjd{ 697185029Spjd zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 698185029Spjd char name[MAXNAMELEN]; 699185029Spjd int err; 700185029Spjd static enum symfollow follow = NO_FOLLOW; 701185029Spjd static enum uio_seg seg = UIO_SYSSPACE; 702185029Spjd 703185029Spjd if (snapshot_namecheck(dirname, NULL, NULL) != 0) 704185029Spjd return (EILSEQ); 705185029Spjd 706185029Spjd dmu_objset_name(zfsvfs->z_os, name); 707185029Spjd 708185029Spjd *vpp = NULL; 709185029Spjd 710185029Spjd err = zfs_secpolicy_snapshot_perms(name, cr); 711185029Spjd if (err) 712185029Spjd return (err); 713185029Spjd 714185029Spjd if (err == 0) { 715185029Spjd err = dmu_objset_snapshot(name, dirname, B_FALSE); 716185029Spjd if (err) 717185029Spjd return (err); 718185029Spjd err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp); 719185029Spjd } 720185029Spjd 721185029Spjd return (err); 722185029Spjd} 723185029Spjd 724185029Spjdstatic int 725185029Spjdzfsctl_freebsd_snapdir_mkdir(ap) 726185029Spjd struct vop_mkdir_args /* { 727185029Spjd struct vnode *a_dvp; 728185029Spjd struct vnode **a_vpp; 729185029Spjd struct componentname *a_cnp; 730185029Spjd struct vattr *a_vap; 731185029Spjd } */ *ap; 732185029Spjd{ 733185029Spjd 734185029Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 735185029Spjd 736185029Spjd return (zfsctl_snapdir_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, NULL, 737185029Spjd ap->a_vpp, ap->a_cnp->cn_cred, NULL, 0, NULL)); 738185029Spjd} 739185029Spjd 740185029Spjd/* 741168404Spjd * Lookup entry point for the 'snapshot' directory. Try to open the 742168404Spjd * snapshot if it exist, creating the pseudo filesystem vnode as necessary. 743168404Spjd * Perform a mount of the associated dataset on top of the vnode. 744168404Spjd */ 745168404Spjd/* ARGSUSED */ 746168404Spjdint 747168404Spjdzfsctl_snapdir_lookup(ap) 748168404Spjd struct vop_lookup_args /* { 749168404Spjd struct vnode *a_dvp; 750168404Spjd struct vnode **a_vpp; 751168404Spjd struct componentname *a_cnp; 752168404Spjd } */ *ap; 753168404Spjd{ 754168404Spjd vnode_t *dvp = ap->a_dvp; 755168404Spjd vnode_t **vpp = ap->a_vpp; 756185029Spjd struct componentname *cnp = ap->a_cnp; 757168404Spjd char nm[NAME_MAX + 1]; 758168404Spjd zfsctl_snapdir_t *sdp = dvp->v_data; 759168404Spjd objset_t *snap; 760168404Spjd char snapname[MAXNAMELEN]; 761185029Spjd char real[MAXNAMELEN]; 762168404Spjd char *mountpoint; 763168404Spjd zfs_snapentry_t *sep, search; 764168404Spjd size_t mountpoint_len; 765168404Spjd avl_index_t where; 766168404Spjd zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 767168404Spjd int err; 768185029Spjd int flags = 0; 769168404Spjd 770185029Spjd /* 771185029Spjd * No extended attributes allowed under .zfs 772185029Spjd */ 773185029Spjd if (flags & LOOKUP_XATTR) 774185029Spjd return (EINVAL); 775168404Spjd ASSERT(ap->a_cnp->cn_namelen < sizeof(nm)); 776168404Spjd strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1); 777168404Spjd 778168404Spjd ASSERT(dvp->v_type == VDIR); 779168404Spjd 780168404Spjd if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) 781168404Spjd return (0); 782168404Spjd 783168404Spjd *vpp = NULL; 784168404Spjd 785168404Spjd /* 786168404Spjd * If we get a recursive call, that means we got called 787168404Spjd * from the domount() code while it was trying to look up the 788168404Spjd * spec (which looks like a local path for zfs). We need to 789168404Spjd * add some flag to domount() to tell it not to do this lookup. 790168404Spjd */ 791168404Spjd if (MUTEX_HELD(&sdp->sd_lock)) 792168404Spjd return (ENOENT); 793168404Spjd 794168404Spjd ZFS_ENTER(zfsvfs); 795168404Spjd 796185029Spjd if (flags & FIGNORECASE) { 797185029Spjd boolean_t conflict = B_FALSE; 798185029Spjd 799185029Spjd err = dmu_snapshot_realname(zfsvfs->z_os, nm, real, 800185029Spjd MAXNAMELEN, &conflict); 801185029Spjd if (err == 0) { 802185029Spjd strlcpy(nm, real, sizeof(nm)); 803185029Spjd } else if (err != ENOTSUP) { 804185029Spjd ZFS_EXIT(zfsvfs); 805185029Spjd return (err); 806185029Spjd } 807185029Spjd#if 0 808185029Spjd if (realpnp) 809185029Spjd (void) strlcpy(realpnp->pn_buf, nm, 810185029Spjd realpnp->pn_bufsize); 811185029Spjd if (conflict && direntflags) 812185029Spjd *direntflags = ED_CASE_CONFLICT; 813185029Spjd#endif 814185029Spjd } 815185029Spjd 816168404Spjd mutex_enter(&sdp->sd_lock); 817168404Spjd search.se_name = (char *)nm; 818168404Spjd if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) { 819168404Spjd *vpp = sep->se_root; 820168404Spjd VN_HOLD(*vpp); 821197513Spjd err = traverse(vpp, LK_EXCLUSIVE | LK_RETRY); 822197513Spjd if (err) { 823197513Spjd VN_RELE(*vpp); 824197513Spjd *vpp = NULL; 825197513Spjd } else if (*vpp == sep->se_root) { 826168404Spjd /* 827168404Spjd * The snapshot was unmounted behind our backs, 828168404Spjd * try to remount it. 829168404Spjd */ 830168404Spjd goto domount; 831185029Spjd } else { 832185029Spjd /* 833185029Spjd * VROOT was set during the traverse call. We need 834185029Spjd * to clear it since we're pretending to be part 835185029Spjd * of our parent's vfs. 836185029Spjd */ 837185029Spjd (*vpp)->v_flag &= ~VROOT; 838168404Spjd } 839168404Spjd mutex_exit(&sdp->sd_lock); 840168404Spjd ZFS_EXIT(zfsvfs); 841197513Spjd return (err); 842168404Spjd } 843168404Spjd 844168404Spjd /* 845168404Spjd * The requested snapshot is not currently mounted, look it up. 846168404Spjd */ 847168404Spjd err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname); 848168404Spjd if (err) { 849168404Spjd mutex_exit(&sdp->sd_lock); 850168404Spjd ZFS_EXIT(zfsvfs); 851185029Spjd /* 852185029Spjd * handle "ls *" or "?" in a graceful manner, 853185029Spjd * forcing EILSEQ to ENOENT. 854185029Spjd * Since shell ultimately passes "*" or "?" as name to lookup 855185029Spjd */ 856185029Spjd return (err == EILSEQ ? ENOENT : err); 857168404Spjd } 858168404Spjd if (dmu_objset_open(snapname, DMU_OST_ZFS, 859185029Spjd DS_MODE_USER | DS_MODE_READONLY, &snap) != 0) { 860168404Spjd mutex_exit(&sdp->sd_lock); 861185029Spjd /* Translate errors and add SAVENAME when needed. */ 862185029Spjd if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) { 863185029Spjd err = EJUSTRETURN; 864185029Spjd cnp->cn_flags |= SAVENAME; 865185029Spjd } else { 866185029Spjd err = ENOENT; 867185029Spjd } 868168404Spjd ZFS_EXIT(zfsvfs); 869185029Spjd return (err); 870168404Spjd } 871168404Spjd 872168404Spjd sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP); 873168404Spjd sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); 874168404Spjd (void) strcpy(sep->se_name, nm); 875168404Spjd *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap)); 876168404Spjd VN_HOLD(*vpp); 877168404Spjd avl_insert(&sdp->sd_snaps, sep, where); 878168404Spjd 879168404Spjd dmu_objset_close(snap); 880168404Spjddomount: 881168404Spjd mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) + 882168404Spjd strlen("/.zfs/snapshot/") + strlen(nm) + 1; 883168404Spjd mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP); 884168404Spjd (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s", 885168404Spjd dvp->v_vfsp->mnt_stat.f_mntonname, nm); 886197201Spjd err = mount_snapshot(curthread, vpp, "zfs", mountpoint, snapname, 0); 887168404Spjd kmem_free(mountpoint, mountpoint_len); 888196980Spjd if (err == 0) { 889196980Spjd /* 890196980Spjd * Fix up the root vnode mounted on .zfs/snapshot/<snapname>. 891196980Spjd * 892196980Spjd * This is where we lie about our v_vfsp in order to 893196980Spjd * make .zfs/snapshot/<snapname> accessible over NFS 894196980Spjd * without requiring manual mounts of <snapname>. 895196980Spjd */ 896196980Spjd ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs); 897196980Spjd VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs; 898196980Spjd } 899168404Spjd mutex_exit(&sdp->sd_lock); 900183037Spjd ZFS_EXIT(zfsvfs); 901168404Spjd return (err); 902168404Spjd} 903168404Spjd 904168404Spjd/* ARGSUSED */ 905168404Spjdstatic int 906185029Spjdzfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp, 907185029Spjd offset_t *offp, offset_t *nextp, void *data, int flags) 908168404Spjd{ 909168404Spjd zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 910168404Spjd char snapname[MAXNAMELEN]; 911168404Spjd uint64_t id, cookie; 912185029Spjd boolean_t case_conflict; 913185029Spjd int error; 914168404Spjd 915168404Spjd ZFS_ENTER(zfsvfs); 916168404Spjd 917168404Spjd cookie = *offp; 918185029Spjd error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id, 919185029Spjd &cookie, &case_conflict); 920185029Spjd if (error) { 921168404Spjd ZFS_EXIT(zfsvfs); 922185029Spjd if (error == ENOENT) { 923185029Spjd *eofp = 1; 924185029Spjd return (0); 925185029Spjd } 926185029Spjd return (error); 927168404Spjd } 928168404Spjd 929185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 930185029Spjd edirent_t *eodp = dp; 931185029Spjd 932185029Spjd (void) strcpy(eodp->ed_name, snapname); 933185029Spjd eodp->ed_ino = ZFSCTL_INO_SNAP(id); 934185029Spjd eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0; 935185029Spjd } else { 936185029Spjd struct dirent64 *odp = dp; 937185029Spjd 938185029Spjd (void) strcpy(odp->d_name, snapname); 939185029Spjd odp->d_ino = ZFSCTL_INO_SNAP(id); 940185029Spjd } 941168404Spjd *nextp = cookie; 942168404Spjd 943168404Spjd ZFS_EXIT(zfsvfs); 944168404Spjd 945168404Spjd return (0); 946168404Spjd} 947168404Spjd 948185029Spjd/* 949185029Spjd * pvp is the '.zfs' directory (zfsctl_node_t). 950185029Spjd * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t). 951185029Spjd * 952185029Spjd * This function is the callback to create a GFS vnode for '.zfs/snapshot' 953185029Spjd * when a lookup is performed on .zfs for "snapshot". 954185029Spjd */ 955168404Spjdvnode_t * 956168404Spjdzfsctl_mknode_snapdir(vnode_t *pvp) 957168404Spjd{ 958168404Spjd vnode_t *vp; 959168404Spjd zfsctl_snapdir_t *sdp; 960168404Spjd 961168404Spjd vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp, pvp->v_vfsp, 962168404Spjd &zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN, 963168404Spjd zfsctl_snapdir_readdir_cb, NULL); 964168404Spjd sdp = vp->v_data; 965168404Spjd sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR; 966168404Spjd sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime; 967168404Spjd mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL); 968168404Spjd avl_create(&sdp->sd_snaps, snapentry_compare, 969168404Spjd sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node)); 970182781Spjd VOP_UNLOCK(vp, 0); 971168404Spjd return (vp); 972168404Spjd} 973168404Spjd 974168404Spjd/* ARGSUSED */ 975168404Spjdstatic int 976168404Spjdzfsctl_snapdir_getattr(ap) 977168404Spjd struct vop_getattr_args /* { 978168404Spjd struct vnode *a_vp; 979168404Spjd struct vattr *a_vap; 980168404Spjd struct ucred *a_cred; 981185029Spjd struct thread *a_td; 982168404Spjd } */ *ap; 983168404Spjd{ 984168404Spjd struct vnode *vp = ap->a_vp; 985168404Spjd struct vattr *vap = ap->a_vap; 986168404Spjd zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 987168404Spjd zfsctl_snapdir_t *sdp = vp->v_data; 988168404Spjd 989168404Spjd ZFS_ENTER(zfsvfs); 990168404Spjd zfsctl_common_getattr(vp, vap); 991168404Spjd vap->va_nodeid = gfs_file_inode(vp); 992168404Spjd vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2; 993168404Spjd ZFS_EXIT(zfsvfs); 994168404Spjd 995168404Spjd return (0); 996168404Spjd} 997168404Spjd 998168404Spjd/* ARGSUSED */ 999168404Spjdstatic int 1000168404Spjdzfsctl_snapdir_inactive(ap) 1001168404Spjd struct vop_inactive_args /* { 1002168404Spjd struct vnode *a_vp; 1003168404Spjd struct thread *a_td; 1004168404Spjd } */ *ap; 1005168404Spjd{ 1006168404Spjd vnode_t *vp = ap->a_vp; 1007168404Spjd zfsctl_snapdir_t *sdp = vp->v_data; 1008168404Spjd void *private; 1009168404Spjd 1010168404Spjd private = gfs_dir_inactive(vp); 1011168404Spjd if (private != NULL) { 1012168404Spjd ASSERT(avl_numnodes(&sdp->sd_snaps) == 0); 1013168404Spjd mutex_destroy(&sdp->sd_lock); 1014168404Spjd avl_destroy(&sdp->sd_snaps); 1015168404Spjd kmem_free(private, sizeof (zfsctl_snapdir_t)); 1016168404Spjd } 1017168404Spjd return (0); 1018168404Spjd} 1019168404Spjd 1020168404Spjdstatic struct vop_vector zfsctl_ops_snapdir = { 1021168404Spjd .vop_default = &default_vnodeops, 1022168404Spjd .vop_open = zfsctl_common_open, 1023168404Spjd .vop_close = zfsctl_common_close, 1024168404Spjd .vop_ioctl = VOP_EINVAL, 1025168404Spjd .vop_getattr = zfsctl_snapdir_getattr, 1026168404Spjd .vop_access = zfsctl_common_access, 1027185029Spjd .vop_mkdir = zfsctl_freebsd_snapdir_mkdir, 1028168404Spjd .vop_readdir = gfs_vop_readdir, 1029168404Spjd .vop_lookup = zfsctl_snapdir_lookup, 1030168404Spjd .vop_inactive = zfsctl_snapdir_inactive, 1031168404Spjd .vop_reclaim = zfsctl_common_reclaim, 1032168404Spjd .vop_fid = zfsctl_common_fid, 1033168404Spjd}; 1034168404Spjd 1035185029Spjd/* 1036185029Spjd * pvp is the GFS vnode '.zfs/snapshot'. 1037185029Spjd * 1038185029Spjd * This creates a GFS node under '.zfs/snapshot' representing each 1039185029Spjd * snapshot. This newly created GFS node is what we mount snapshot 1040185029Spjd * vfs_t's ontop of. 1041185029Spjd */ 1042168404Spjdstatic vnode_t * 1043168404Spjdzfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset) 1044168404Spjd{ 1045168404Spjd vnode_t *vp; 1046168404Spjd zfsctl_node_t *zcp; 1047168404Spjd 1048168404Spjd vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, pvp->v_vfsp, 1049168404Spjd &zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL); 1050185029Spjd VN_HOLD(vp); 1051168404Spjd zcp = vp->v_data; 1052168404Spjd zcp->zc_id = objset; 1053185029Spjd VFS_HOLD(vp->v_vfsp); 1054182781Spjd VOP_UNLOCK(vp, 0); 1055168404Spjd 1056168404Spjd return (vp); 1057168404Spjd} 1058168404Spjd 1059168404Spjdstatic int 1060168404Spjdzfsctl_snapshot_inactive(ap) 1061168404Spjd struct vop_inactive_args /* { 1062168404Spjd struct vnode *a_vp; 1063168404Spjd struct thread *a_td; 1064168404Spjd } */ *ap; 1065168404Spjd{ 1066168404Spjd vnode_t *vp = ap->a_vp; 1067185029Spjd cred_t *cr = ap->a_td->td_ucred; 1068168404Spjd struct vop_inactive_args iap; 1069168404Spjd zfsctl_snapdir_t *sdp; 1070168404Spjd zfs_snapentry_t *sep, *next; 1071168404Spjd int locked; 1072168404Spjd vnode_t *dvp; 1073168404Spjd 1074185029Spjd VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0); 1075168404Spjd sdp = dvp->v_data; 1076175294Sattilio VOP_UNLOCK(dvp, 0); 1077168404Spjd 1078168404Spjd if (!(locked = MUTEX_HELD(&sdp->sd_lock))) 1079168404Spjd mutex_enter(&sdp->sd_lock); 1080168404Spjd 1081168404Spjd if (vp->v_count > 1) { 1082168404Spjd if (!locked) 1083168404Spjd mutex_exit(&sdp->sd_lock); 1084168404Spjd return (0); 1085168404Spjd } 1086168404Spjd ASSERT(!vn_ismntpt(vp)); 1087168404Spjd 1088168404Spjd sep = avl_first(&sdp->sd_snaps); 1089168404Spjd while (sep != NULL) { 1090168404Spjd next = AVL_NEXT(&sdp->sd_snaps, sep); 1091168404Spjd 1092168404Spjd if (sep->se_root == vp) { 1093168404Spjd avl_remove(&sdp->sd_snaps, sep); 1094168404Spjd kmem_free(sep->se_name, strlen(sep->se_name) + 1); 1095168404Spjd kmem_free(sep, sizeof (zfs_snapentry_t)); 1096168404Spjd break; 1097168404Spjd } 1098168404Spjd sep = next; 1099168404Spjd } 1100168404Spjd ASSERT(sep != NULL); 1101168404Spjd 1102168404Spjd if (!locked) 1103168404Spjd mutex_exit(&sdp->sd_lock); 1104168404Spjd VN_RELE(dvp); 1105185029Spjd VFS_RELE(vp->v_vfsp); 1106168404Spjd 1107168404Spjd /* 1108168404Spjd * Dispose of the vnode for the snapshot mount point. 1109168404Spjd * This is safe to do because once this entry has been removed 1110168404Spjd * from the AVL tree, it can't be found again, so cannot become 1111168404Spjd * "active". If we lookup the same name again we will end up 1112168404Spjd * creating a new vnode. 1113168404Spjd */ 1114168404Spjd iap.a_vp = vp; 1115168404Spjd return (gfs_vop_inactive(&iap)); 1116168404Spjd} 1117168404Spjd 1118168404Spjdstatic int 1119182371Sattiliozfsctl_traverse_begin(vnode_t **vpp, int lktype) 1120168404Spjd{ 1121168404Spjd 1122168404Spjd VN_HOLD(*vpp); 1123168404Spjd /* Snapshot should be already mounted, but just in case. */ 1124168404Spjd if (vn_mountedvfs(*vpp) == NULL) 1125168404Spjd return (ENOENT); 1126170281Spjd return (traverse(vpp, lktype)); 1127168404Spjd} 1128168404Spjd 1129168404Spjdstatic void 1130168404Spjdzfsctl_traverse_end(vnode_t *vp, int err) 1131168404Spjd{ 1132168404Spjd 1133168404Spjd if (err == 0) 1134168404Spjd vput(vp); 1135168404Spjd else 1136168404Spjd VN_RELE(vp); 1137168404Spjd} 1138168404Spjd 1139168404Spjdstatic int 1140168404Spjdzfsctl_snapshot_getattr(ap) 1141168404Spjd struct vop_getattr_args /* { 1142168404Spjd struct vnode *a_vp; 1143168404Spjd struct vattr *a_vap; 1144168404Spjd struct ucred *a_cred; 1145168404Spjd } */ *ap; 1146168404Spjd{ 1147168404Spjd vnode_t *vp = ap->a_vp; 1148168404Spjd int err; 1149168404Spjd 1150182371Sattilio err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY); 1151168404Spjd if (err == 0) 1152182371Sattilio err = VOP_GETATTR(vp, ap->a_vap, ap->a_cred); 1153168404Spjd zfsctl_traverse_end(vp, err); 1154168404Spjd return (err); 1155168404Spjd} 1156168404Spjd 1157168404Spjdstatic int 1158168404Spjdzfsctl_snapshot_fid(ap) 1159168404Spjd struct vop_fid_args /* { 1160168404Spjd struct vnode *a_vp; 1161168404Spjd struct fid *a_fid; 1162168404Spjd } */ *ap; 1163168404Spjd{ 1164168404Spjd vnode_t *vp = ap->a_vp; 1165168404Spjd int err; 1166168404Spjd 1167182371Sattilio err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY); 1168168404Spjd if (err == 0) 1169168404Spjd err = VOP_VPTOFH(vp, (void *)ap->a_fid); 1170168404Spjd zfsctl_traverse_end(vp, err); 1171168404Spjd return (err); 1172168404Spjd} 1173168404Spjd 1174185029Spjdstatic int 1175185029Spjdzfsctl_snapshot_lookup(ap) 1176185029Spjd struct vop_lookup_args /* { 1177185029Spjd struct vnode *a_dvp; 1178185029Spjd struct vnode **a_vpp; 1179185029Spjd struct componentname *a_cnp; 1180185029Spjd } */ *ap; 1181185029Spjd{ 1182185029Spjd vnode_t *dvp = ap->a_dvp; 1183185029Spjd vnode_t **vpp = ap->a_vpp; 1184185029Spjd struct componentname *cnp = ap->a_cnp; 1185185029Spjd cred_t *cr = ap->a_cnp->cn_cred; 1186185029Spjd zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 1187185029Spjd int error; 1188185029Spjd 1189185029Spjd if (cnp->cn_namelen != 2 || cnp->cn_nameptr[0] != '.' || 1190185029Spjd cnp->cn_nameptr[1] != '.') { 1191185029Spjd return (ENOENT); 1192185029Spjd } 1193185029Spjd 1194185029Spjd ASSERT(dvp->v_type == VDIR); 1195185029Spjd ASSERT(zfsvfs->z_ctldir != NULL); 1196185029Spjd 1197185029Spjd error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", vpp, 1198185029Spjd NULL, 0, NULL, cr, NULL, NULL, NULL); 1199185029Spjd if (error == 0) 1200185029Spjd vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1201185029Spjd return (error); 1202185029Spjd} 1203185029Spjd 1204196309Spjdstatic int 1205196309Spjdzfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap) 1206196309Spjd{ 1207196309Spjd zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data; 1208196309Spjd vnode_t *dvp, *vp; 1209196309Spjd zfsctl_snapdir_t *sdp; 1210196309Spjd zfs_snapentry_t *sep; 1211196309Spjd int error; 1212196309Spjd 1213196309Spjd ASSERT(zfsvfs->z_ctldir != NULL); 1214196309Spjd error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, 1215196309Spjd NULL, 0, NULL, kcred, NULL, NULL, NULL); 1216196309Spjd if (error != 0) 1217196309Spjd return (error); 1218196309Spjd sdp = dvp->v_data; 1219196309Spjd 1220196309Spjd mutex_enter(&sdp->sd_lock); 1221196309Spjd sep = avl_first(&sdp->sd_snaps); 1222196309Spjd while (sep != NULL) { 1223196309Spjd vp = sep->se_root; 1224196309Spjd if (vp == ap->a_vp) 1225196309Spjd break; 1226196309Spjd sep = AVL_NEXT(&sdp->sd_snaps, sep); 1227196309Spjd } 1228196309Spjd if (sep == NULL) { 1229196309Spjd mutex_exit(&sdp->sd_lock); 1230196309Spjd error = ENOENT; 1231196309Spjd } else { 1232196309Spjd size_t len; 1233196309Spjd 1234196309Spjd len = strlen(sep->se_name); 1235196309Spjd *ap->a_buflen -= len; 1236196309Spjd bcopy(sep->se_name, ap->a_buf + *ap->a_buflen, len); 1237196309Spjd mutex_exit(&sdp->sd_lock); 1238196309Spjd vhold(dvp); 1239196309Spjd *ap->a_vpp = dvp; 1240196309Spjd } 1241196309Spjd VN_RELE(dvp); 1242196309Spjd 1243196309Spjd return (error); 1244196309Spjd} 1245196309Spjd 1246168404Spjd/* 1247168404Spjd * These VP's should never see the light of day. They should always 1248168404Spjd * be covered. 1249168404Spjd */ 1250168404Spjdstatic struct vop_vector zfsctl_ops_snapshot = { 1251168404Spjd .vop_default = &default_vnodeops, 1252168404Spjd .vop_inactive = zfsctl_snapshot_inactive, 1253185029Spjd .vop_lookup = zfsctl_snapshot_lookup, 1254168404Spjd .vop_reclaim = zfsctl_common_reclaim, 1255168404Spjd .vop_getattr = zfsctl_snapshot_getattr, 1256168404Spjd .vop_fid = zfsctl_snapshot_fid, 1257196309Spjd .vop_vptocnp = zfsctl_snapshot_vptocnp, 1258168404Spjd}; 1259168404Spjd 1260168404Spjdint 1261168404Spjdzfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) 1262168404Spjd{ 1263168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1264168404Spjd vnode_t *dvp, *vp; 1265168404Spjd zfsctl_snapdir_t *sdp; 1266168404Spjd zfsctl_node_t *zcp; 1267168404Spjd zfs_snapentry_t *sep; 1268168404Spjd int error; 1269168404Spjd 1270168404Spjd ASSERT(zfsvfs->z_ctldir != NULL); 1271168404Spjd error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, 1272185029Spjd NULL, 0, NULL, kcred, NULL, NULL, NULL); 1273168404Spjd if (error != 0) 1274168404Spjd return (error); 1275168404Spjd sdp = dvp->v_data; 1276168404Spjd 1277168404Spjd mutex_enter(&sdp->sd_lock); 1278168404Spjd sep = avl_first(&sdp->sd_snaps); 1279168404Spjd while (sep != NULL) { 1280168404Spjd vp = sep->se_root; 1281168404Spjd zcp = vp->v_data; 1282168404Spjd if (zcp->zc_id == objsetid) 1283168404Spjd break; 1284168404Spjd 1285168404Spjd sep = AVL_NEXT(&sdp->sd_snaps, sep); 1286168404Spjd } 1287168404Spjd 1288168404Spjd if (sep != NULL) { 1289168404Spjd VN_HOLD(vp); 1290185029Spjd /* 1291185029Spjd * Return the mounted root rather than the covered mount point. 1292185029Spjd * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid> 1293185029Spjd * and returns the ZFS vnode mounted on top of the GFS node. 1294185029Spjd * This ZFS vnode is the root of the vfs for objset 'objsetid'. 1295185029Spjd */ 1296170281Spjd error = traverse(&vp, LK_SHARED | LK_RETRY); 1297168404Spjd if (error == 0) { 1298168404Spjd if (vp == sep->se_root) 1299168404Spjd error = EINVAL; 1300168404Spjd else 1301168404Spjd *zfsvfsp = VTOZ(vp)->z_zfsvfs; 1302168404Spjd } 1303168404Spjd mutex_exit(&sdp->sd_lock); 1304170281Spjd if (error == 0) 1305170281Spjd VN_URELE(vp); 1306170281Spjd else 1307170281Spjd VN_RELE(vp); 1308168404Spjd } else { 1309168404Spjd error = EINVAL; 1310168404Spjd mutex_exit(&sdp->sd_lock); 1311168404Spjd } 1312168404Spjd 1313168404Spjd VN_RELE(dvp); 1314168404Spjd 1315168404Spjd return (error); 1316168404Spjd} 1317168404Spjd 1318168404Spjd/* 1319168404Spjd * Unmount any snapshots for the given filesystem. This is called from 1320168404Spjd * zfs_umount() - if we have a ctldir, then go through and unmount all the 1321168404Spjd * snapshots. 1322168404Spjd */ 1323168404Spjdint 1324168404Spjdzfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) 1325168404Spjd{ 1326168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1327185029Spjd vnode_t *dvp; 1328168404Spjd zfsctl_snapdir_t *sdp; 1329168404Spjd zfs_snapentry_t *sep, *next; 1330168404Spjd int error; 1331168404Spjd 1332168404Spjd ASSERT(zfsvfs->z_ctldir != NULL); 1333168404Spjd error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, 1334185029Spjd NULL, 0, NULL, cr, NULL, NULL, NULL); 1335168404Spjd if (error != 0) 1336168404Spjd return (error); 1337168404Spjd sdp = dvp->v_data; 1338168404Spjd 1339168404Spjd mutex_enter(&sdp->sd_lock); 1340168404Spjd 1341168404Spjd sep = avl_first(&sdp->sd_snaps); 1342168404Spjd while (sep != NULL) { 1343168404Spjd next = AVL_NEXT(&sdp->sd_snaps, sep); 1344168404Spjd 1345168404Spjd /* 1346168404Spjd * If this snapshot is not mounted, then it must 1347168404Spjd * have just been unmounted by somebody else, and 1348168404Spjd * will be cleaned up by zfsctl_snapdir_inactive(). 1349168404Spjd */ 1350185029Spjd if (vn_ismntpt(sep->se_root)) { 1351185029Spjd error = zfsctl_unmount_snap(sep, fflags, cr); 1352185029Spjd if (error) { 1353196954Spjd avl_index_t where; 1354196954Spjd 1355196953Spjd /* 1356196953Spjd * Before reinserting snapshot to the tree, 1357196953Spjd * check if it was actually removed. For example 1358196953Spjd * when snapshot mount point is busy, we will 1359196953Spjd * have an error here, but there will be no need 1360196953Spjd * to reinsert snapshot. 1361196953Spjd */ 1362196954Spjd if (avl_find(&sdp->sd_snaps, sep, &where) == NULL) 1363196954Spjd avl_insert(&sdp->sd_snaps, sep, where); 1364185029Spjd break; 1365168404Spjd } 1366168404Spjd } 1367168404Spjd sep = next; 1368168404Spjd } 1369185029Spjd 1370168404Spjd mutex_exit(&sdp->sd_lock); 1371168404Spjd VN_RELE(dvp); 1372168404Spjd 1373168404Spjd return (error); 1374168404Spjd} 1375