155714Skris/* 255714Skris * CDDL HEADER START 355714Skris * 455714Skris * The contents of this file are subject to the terms of the 555714Skris * Common Development and Distribution License (the "License"). 655714Skris * You may not use this file except in compliance with the License. 755714Skris * 8280304Sjkim * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 955714Skris * or https://opensource.org/licenses/CDDL-1.0. 1055714Skris * See the License for the specific language governing permissions 1155714Skris * and limitations under the License. 1255714Skris * 1355714Skris * When distributing Covered Code, include this CDDL HEADER in each 1455714Skris * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15280304Sjkim * If applicable, add the following below this CDDL HEADER, with the 1655714Skris * fields enclosed by brackets "[]" replaced with your own identifying 1755714Skris * information: Portions Copyright [yyyy] [name of copyright owner] 1855714Skris * 1955714Skris * CDDL HEADER END 2055714Skris */ 2155714Skris/* 22280304Sjkim * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 2355714Skris * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 2455714Skris * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. 2555714Skris */ 2655714Skris 2755714Skris/* 2855714Skris * ZFS control directory (a.k.a. ".zfs") 2955714Skris * 3055714Skris * This directory provides a common location for all ZFS meta-objects. 3155714Skris * Currently, this is only the 'snapshot' directory, but this may expand in the 3255714Skris * future. The elements are built using the GFS primitives, as the hierarchy 3355714Skris * does not actually exist on disk. 3455714Skris * 3555714Skris * For 'snapshot', we don't want to have all snapshots always mounted, because 3655714Skris * this would take up a huge amount of space in /etc/mnttab. We have three 37280304Sjkim * types of objects: 3855714Skris * 3955714Skris * ctldir ------> snapshotdir -------> snapshot 40280304Sjkim * | 4155714Skris * | 4255714Skris * V 4355714Skris * mounted fs 4455714Skris * 4555714Skris * The 'snapshot' node contains just enough information to lookup '..' and act 4655714Skris * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we 4755714Skris * perform an automount of the underlying filesystem and return the 4855714Skris * corresponding vnode. 4955714Skris * 5055714Skris * All mounts are handled automatically by the kernel, but unmounts are 5155714Skris * (currently) handled from user land. The main reason is that there is no 52280304Sjkim * reliable way to auto-unmount the filesystem when it's "no longer in use". 5355714Skris * When the user unmounts a filesystem, we call zfsctl_unmount(), which 5455714Skris * unmounts any snapshots within the snapshot directory. 5555714Skris * 5655714Skris * The '.zfs', '.zfs/snapshot', and all directories created under 5755714Skris * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and 5855714Skris * share the same vfs_t as the head filesystem (what '.zfs' lives under). 5955714Skris * 6055714Skris * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>' 6155714Skris * (ie: snapshots) are ZFS nodes and have their own unique vfs_t. 6255714Skris * However, vnodes within these mounted on file systems have their v_vfsp 6355714Skris * fields set to the head filesystem to make NFS happy (see 6455714Skris * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t 6555714Skris * so that it cannot be freed until all snapshots have been unmounted. 6655714Skris */ 6755714Skris 6855714Skris#include <sys/types.h> 69109998Smarkm#include <sys/param.h> 7055714Skris#include <sys/libkern.h> 7155714Skris#include <sys/dirent.h> 72280304Sjkim#include <sys/zfs_context.h> 73280304Sjkim#include <sys/zfs_ctldir.h> 74280304Sjkim#include <sys/zfs_ioctl.h> 75280304Sjkim#include <sys/zfs_vfsops.h> 76280304Sjkim#include <sys/namei.h> 77280304Sjkim#include <sys/stat.h> 78280304Sjkim#include <sys/dmu.h> 79280304Sjkim#include <sys/dsl_dataset.h> 80280304Sjkim#include <sys/dsl_destroy.h> 81280304Sjkim#include <sys/dsl_deleg.h> 82280304Sjkim#include <sys/mount.h> 83280304Sjkim#include <sys/zap.h> 84280304Sjkim#include <sys/sysproto.h> 8555714Skris 8655714Skris#include "zfs_namecheck.h" 87280304Sjkim 88280304Sjkim#include <sys/kernel.h> 89280304Sjkim#include <sys/ccompat.h> 9055714Skris 91280304Sjkim/* Common access mode for all virtual directories under the ctldir */ 92280304Sjkimconst uint16_t zfsctl_ctldir_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | 93280304Sjkim S_IROTH | S_IXOTH; 94280304Sjkim 95280304Sjkim/* 9655714Skris * "Synthetic" filesystem implementation. 97280304Sjkim */ 98280304Sjkim 99280304Sjkim/* 100280304Sjkim * Assert that A implies B. 101280304Sjkim */ 102280304Sjkim#define KASSERT_IMPLY(A, B, msg) KASSERT(!(A) || (B), (msg)); 103280304Sjkim 104109998Smarkmstatic MALLOC_DEFINE(M_SFSNODES, "sfs_nodes", "synthetic-fs nodes"); 105280304Sjkim 106280304Sjkimtypedef struct sfs_node { 107280304Sjkim char sn_name[ZFS_MAX_DATASET_NAME_LEN]; 108280304Sjkim uint64_t sn_parent_id; 109109998Smarkm uint64_t sn_id; 110280304Sjkim} sfs_node_t; 111280304Sjkim 112280304Sjkim/* 113280304Sjkim * Check the parent's ID as well as the node's to account for a chance 114280304Sjkim * that IDs originating from different domains (snapshot IDs, artificial 115280304Sjkim * IDs, znode IDs) may clash. 116280304Sjkim */ 117280304Sjkimstatic int 118280304Sjkimsfs_compare_ids(struct vnode *vp, void *arg) 119280304Sjkim{ 120280304Sjkim sfs_node_t *n1 = vp->v_data; 121280304Sjkim sfs_node_t *n2 = arg; 122280304Sjkim bool equal; 123280304Sjkim 12455714Skris equal = n1->sn_id == n2->sn_id && 12555714Skris n1->sn_parent_id == n2->sn_parent_id; 126280304Sjkim 127280304Sjkim /* Zero means equality. */ 128280304Sjkim return (!equal); 129280304Sjkim} 130280304Sjkim 13155714Skrisstatic int 132280304Sjkimsfs_vnode_get(const struct mount *mp, int flags, uint64_t parent_id, 133280304Sjkim uint64_t id, struct vnode **vpp) 134280304Sjkim{ 13555714Skris sfs_node_t search; 136280304Sjkim int err; 137280304Sjkim 138280304Sjkim search.sn_id = id; 139280304Sjkim search.sn_parent_id = parent_id; 14055714Skris err = vfs_hash_get(mp, (uint32_t)id, flags, curthread, vpp, 141280304Sjkim sfs_compare_ids, &search); 142280304Sjkim return (err); 143280304Sjkim} 144280304Sjkim 145280304Sjkimstatic int 146280304Sjkimsfs_vnode_insert(struct vnode *vp, int flags, uint64_t parent_id, 147280304Sjkim uint64_t id, struct vnode **vpp) 148280304Sjkim{ 149280304Sjkim int err; 150280304Sjkim 151280304Sjkim KASSERT(vp->v_data != NULL, ("sfs_vnode_insert with NULL v_data")); 152280304Sjkim err = vfs_hash_insert(vp, (uint32_t)id, flags, curthread, vpp, 153280304Sjkim sfs_compare_ids, vp->v_data); 154280304Sjkim return (err); 155280304Sjkim} 156280304Sjkim 157280304Sjkimstatic void 158280304Sjkimsfs_vnode_remove(struct vnode *vp) 159280304Sjkim{ 160280304Sjkim vfs_hash_remove(vp); 161280304Sjkim} 162280304Sjkim 163280304Sjkimtypedef void sfs_vnode_setup_fn(vnode_t *vp, void *arg); 164280304Sjkim 165280304Sjkimstatic int 166280304Sjkimsfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id, 167280304Sjkim const char *tag, struct vop_vector *vops, 168280304Sjkim sfs_vnode_setup_fn setup, void *arg, 169280304Sjkim struct vnode **vpp) 170280304Sjkim{ 171280304Sjkim struct vnode *vp; 172280304Sjkim int error; 173280304Sjkim 174280304Sjkim error = sfs_vnode_get(mp, flags, parent_id, id, vpp); 175280304Sjkim if (error != 0 || *vpp != NULL) { 176280304Sjkim KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL, 177280304Sjkim "sfs vnode with no data"); 178280304Sjkim return (error); 179280304Sjkim } 180280304Sjkim 181280304Sjkim /* Allocate a new vnode/inode. */ 182280304Sjkim error = getnewvnode(tag, mp, vops, &vp); 18355714Skris if (error != 0) { 18455714Skris *vpp = NULL; 185280304Sjkim return (error); 186280304Sjkim } 187280304Sjkim 188280304Sjkim /* 189280304Sjkim * Exclusively lock the vnode vnode while it's being constructed. 19055714Skris */ 191280304Sjkim lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 192280304Sjkim error = insmntque(vp, mp); 193280304Sjkim if (error != 0) { 19455714Skris *vpp = NULL; 195280304Sjkim return (error); 196280304Sjkim } 197280304Sjkim 198280304Sjkim setup(vp, arg); 19955714Skris 200280304Sjkim error = sfs_vnode_insert(vp, flags, parent_id, id, vpp); 201280304Sjkim if (error != 0 || *vpp != NULL) { 202280304Sjkim KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL, 203280304Sjkim "sfs vnode with no data"); 204280304Sjkim return (error); 205280304Sjkim } 206280304Sjkim 207280304Sjkim#if __FreeBSD_version >= 1400077 208280304Sjkim vn_set_state(vp, VSTATE_CONSTRUCTED); 209280304Sjkim#endif 210280304Sjkim 211280304Sjkim *vpp = vp; 212280304Sjkim return (0); 213280304Sjkim} 214280304Sjkim 215280304Sjkimstatic void 216280304Sjkimsfs_print_node(sfs_node_t *node) 217280304Sjkim{ 218280304Sjkim printf("\tname = %s\n", node->sn_name); 219280304Sjkim printf("\tparent_id = %ju\n", (uintmax_t)node->sn_parent_id); 220280304Sjkim printf("\tid = %ju\n", (uintmax_t)node->sn_id); 221280304Sjkim} 222280304Sjkim 223280304Sjkimstatic sfs_node_t * 224280304Sjkimsfs_alloc_node(size_t size, const char *name, uint64_t parent_id, uint64_t id) 225280304Sjkim{ 226280304Sjkim struct sfs_node *node; 227280304Sjkim 228280304Sjkim KASSERT(strlen(name) < sizeof (node->sn_name), 229280304Sjkim ("sfs node name is too long")); 230280304Sjkim KASSERT(size >= sizeof (*node), ("sfs node size is too small")); 231280304Sjkim node = malloc(size, M_SFSNODES, M_WAITOK | M_ZERO); 232280304Sjkim strlcpy(node->sn_name, name, sizeof (node->sn_name)); 233280304Sjkim node->sn_parent_id = parent_id; 234280304Sjkim node->sn_id = id; 235280304Sjkim 236280304Sjkim return (node); 237280304Sjkim} 238280304Sjkim 239280304Sjkimstatic void 240280304Sjkimsfs_destroy_node(sfs_node_t *node) 241280304Sjkim{ 24255714Skris free(node, M_SFSNODES); 24359191Skris} 24459191Skris 245280304Sjkimstatic void * 246280304Sjkimsfs_reclaim_vnode(vnode_t *vp) 247280304Sjkim{ 248280304Sjkim void *data; 249280304Sjkim 250280304Sjkim sfs_vnode_remove(vp); 251280304Sjkim data = vp->v_data; 252280304Sjkim vp->v_data = NULL; 253280304Sjkim return (data); 254280304Sjkim} 255280304Sjkim 256280304Sjkimstatic int 257280304Sjkimsfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap, 258280304Sjkim zfs_uio_t *uio, off_t *offp) 259280304Sjkim{ 260280304Sjkim struct dirent entry; 261280304Sjkim int error; 262280304Sjkim 263280304Sjkim /* Reset ncookies for subsequent use of vfs_read_dirent. */ 264280304Sjkim if (ap->a_ncookies != NULL) 265280304Sjkim *ap->a_ncookies = 0; 266280304Sjkim 267280304Sjkim if (zfs_uio_resid(uio) < sizeof (entry)) 268280304Sjkim return (SET_ERROR(EINVAL)); 269280304Sjkim 270280304Sjkim if (zfs_uio_offset(uio) < 0) 271280304Sjkim return (SET_ERROR(EINVAL)); 272280304Sjkim if (zfs_uio_offset(uio) == 0) { 273280304Sjkim entry.d_fileno = id; 274280304Sjkim entry.d_type = DT_DIR; 27559191Skris entry.d_name[0] = '.'; 27659191Skris entry.d_name[1] = '\0'; 277280304Sjkim entry.d_namlen = 1; 278 entry.d_reclen = sizeof (entry); 279 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio)); 280 if (error != 0) 281 return (SET_ERROR(error)); 282 } 283 284 if (zfs_uio_offset(uio) < sizeof (entry)) 285 return (SET_ERROR(EINVAL)); 286 if (zfs_uio_offset(uio) == sizeof (entry)) { 287 entry.d_fileno = parent_id; 288 entry.d_type = DT_DIR; 289 entry.d_name[0] = '.'; 290 entry.d_name[1] = '.'; 291 entry.d_name[2] = '\0'; 292 entry.d_namlen = 2; 293 entry.d_reclen = sizeof (entry); 294 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio)); 295 if (error != 0) 296 return (SET_ERROR(error)); 297 } 298 299 if (offp != NULL) 300 *offp = 2 * sizeof (entry); 301 return (0); 302} 303 304 305/* 306 * .zfs inode namespace 307 * 308 * We need to generate unique inode numbers for all files and directories 309 * within the .zfs pseudo-filesystem. We use the following scheme: 310 * 311 * ENTRY ZFSCTL_INODE 312 * .zfs 1 313 * .zfs/snapshot 2 314 * .zfs/snapshot/<snap> objectid(snap) 315 */ 316#define ZFSCTL_INO_SNAP(id) (id) 317 318static struct vop_vector zfsctl_ops_root; 319static struct vop_vector zfsctl_ops_snapdir; 320static struct vop_vector zfsctl_ops_snapshot; 321 322void 323zfsctl_init(void) 324{ 325} 326 327void 328zfsctl_fini(void) 329{ 330} 331 332boolean_t 333zfsctl_is_node(vnode_t *vp) 334{ 335 return (vn_matchops(vp, zfsctl_ops_root) || 336 vn_matchops(vp, zfsctl_ops_snapdir) || 337 vn_matchops(vp, zfsctl_ops_snapshot)); 338 339} 340 341typedef struct zfsctl_root { 342 sfs_node_t node; 343 sfs_node_t *snapdir; 344 timestruc_t cmtime; 345} zfsctl_root_t; 346 347 348/* 349 * Create the '.zfs' directory. 350 */ 351void 352zfsctl_create(zfsvfs_t *zfsvfs) 353{ 354 zfsctl_root_t *dot_zfs; 355 sfs_node_t *snapdir; 356 vnode_t *rvp; 357 uint64_t crtime[2]; 358 359 ASSERT3P(zfsvfs->z_ctldir, ==, NULL); 360 361 snapdir = sfs_alloc_node(sizeof (*snapdir), "snapshot", ZFSCTL_INO_ROOT, 362 ZFSCTL_INO_SNAPDIR); 363 dot_zfs = (zfsctl_root_t *)sfs_alloc_node(sizeof (*dot_zfs), ".zfs", 0, 364 ZFSCTL_INO_ROOT); 365 dot_zfs->snapdir = snapdir; 366 367 VERIFY0(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp)); 368 VERIFY0(sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 369 &crtime, sizeof (crtime))); 370 ZFS_TIME_DECODE(&dot_zfs->cmtime, crtime); 371 vput(rvp); 372 373 zfsvfs->z_ctldir = dot_zfs; 374} 375 376/* 377 * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. 378 * The nodes must not have any associated vnodes by now as they should be 379 * vflush-ed. 380 */ 381void 382zfsctl_destroy(zfsvfs_t *zfsvfs) 383{ 384 sfs_destroy_node(zfsvfs->z_ctldir->snapdir); 385 sfs_destroy_node((sfs_node_t *)zfsvfs->z_ctldir); 386 zfsvfs->z_ctldir = NULL; 387} 388 389static int 390zfsctl_fs_root_vnode(struct mount *mp, void *arg __unused, int flags, 391 struct vnode **vpp) 392{ 393 return (VFS_ROOT(mp, flags, vpp)); 394} 395 396static void 397zfsctl_common_vnode_setup(vnode_t *vp, void *arg) 398{ 399 ASSERT_VOP_ELOCKED(vp, __func__); 400 401 /* We support shared locking. */ 402 VN_LOCK_ASHARE(vp); 403 vp->v_type = VDIR; 404 vp->v_data = arg; 405} 406 407static int 408zfsctl_root_vnode(struct mount *mp, void *arg __unused, int flags, 409 struct vnode **vpp) 410{ 411 void *node; 412 int err; 413 414 node = ((zfsvfs_t *)mp->mnt_data)->z_ctldir; 415 err = sfs_vgetx(mp, flags, 0, ZFSCTL_INO_ROOT, "zfs", &zfsctl_ops_root, 416 zfsctl_common_vnode_setup, node, vpp); 417 return (err); 418} 419 420static int 421zfsctl_snapdir_vnode(struct mount *mp, void *arg __unused, int flags, 422 struct vnode **vpp) 423{ 424 void *node; 425 int err; 426 427 node = ((zfsvfs_t *)mp->mnt_data)->z_ctldir->snapdir; 428 err = sfs_vgetx(mp, flags, ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, "zfs", 429 &zfsctl_ops_snapdir, zfsctl_common_vnode_setup, node, vpp); 430 return (err); 431} 432 433/* 434 * Given a root znode, retrieve the associated .zfs directory. 435 * Add a hold to the vnode and return it. 436 */ 437int 438zfsctl_root(zfsvfs_t *zfsvfs, int flags, vnode_t **vpp) 439{ 440 int error; 441 442 error = zfsctl_root_vnode(zfsvfs->z_vfs, NULL, flags, vpp); 443 return (error); 444} 445 446/* 447 * Common open routine. Disallow any write access. 448 */ 449static int 450zfsctl_common_open(struct vop_open_args *ap) 451{ 452 int flags = ap->a_mode; 453 454 if (flags & FWRITE) 455 return (SET_ERROR(EACCES)); 456 457 return (0); 458} 459 460/* 461 * Common close routine. Nothing to do here. 462 */ 463static int 464zfsctl_common_close(struct vop_close_args *ap) 465{ 466 (void) ap; 467 return (0); 468} 469 470/* 471 * Common access routine. Disallow writes. 472 */ 473static int 474zfsctl_common_access(struct vop_access_args *ap) 475{ 476 accmode_t accmode = ap->a_accmode; 477 478 if (accmode & VWRITE) 479 return (SET_ERROR(EACCES)); 480 return (0); 481} 482 483/* 484 * Common getattr function. Fill in basic information. 485 */ 486static void 487zfsctl_common_getattr(vnode_t *vp, vattr_t *vap) 488{ 489 timestruc_t now; 490 sfs_node_t *node; 491 492 node = vp->v_data; 493 494 vap->va_uid = 0; 495 vap->va_gid = 0; 496 vap->va_rdev = 0; 497 /* 498 * We are a purely virtual object, so we have no 499 * blocksize or allocated blocks. 500 */ 501 vap->va_blksize = 0; 502 vap->va_nblocks = 0; 503 vap->va_gen = 0; 504 vn_fsid(vp, vap); 505 vap->va_mode = zfsctl_ctldir_mode; 506 vap->va_type = VDIR; 507 /* 508 * We live in the now (for atime). 509 */ 510 gethrestime(&now); 511 vap->va_atime = now; 512 /* FreeBSD: Reset chflags(2) flags. */ 513 vap->va_flags = 0; 514 515 vap->va_nodeid = node->sn_id; 516 517 /* At least '.' and '..'. */ 518 vap->va_nlink = 2; 519} 520 521#ifndef _OPENSOLARIS_SYS_VNODE_H_ 522struct vop_fid_args { 523 struct vnode *a_vp; 524 struct fid *a_fid; 525}; 526#endif 527 528static int 529zfsctl_common_fid(struct vop_fid_args *ap) 530{ 531 vnode_t *vp = ap->a_vp; 532 fid_t *fidp = (void *)ap->a_fid; 533 sfs_node_t *node = vp->v_data; 534 uint64_t object = node->sn_id; 535 zfid_short_t *zfid; 536 int i; 537 538 zfid = (zfid_short_t *)fidp; 539 zfid->zf_len = SHORT_FID_LEN; 540 541 for (i = 0; i < sizeof (zfid->zf_object); i++) 542 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 543 544 /* .zfs nodes always have a generation number of 0 */ 545 for (i = 0; i < sizeof (zfid->zf_gen); i++) 546 zfid->zf_gen[i] = 0; 547 548 return (0); 549} 550 551#ifndef _SYS_SYSPROTO_H_ 552struct vop_reclaim_args { 553 struct vnode *a_vp; 554 struct thread *a_td; 555}; 556#endif 557 558static int 559zfsctl_common_reclaim(struct vop_reclaim_args *ap) 560{ 561 vnode_t *vp = ap->a_vp; 562 563 (void) sfs_reclaim_vnode(vp); 564 return (0); 565} 566 567#ifndef _SYS_SYSPROTO_H_ 568struct vop_print_args { 569 struct vnode *a_vp; 570}; 571#endif 572 573static int 574zfsctl_common_print(struct vop_print_args *ap) 575{ 576 sfs_print_node(ap->a_vp->v_data); 577 return (0); 578} 579 580#ifndef _SYS_SYSPROTO_H_ 581struct vop_getattr_args { 582 struct vnode *a_vp; 583 struct vattr *a_vap; 584 struct ucred *a_cred; 585}; 586#endif 587 588/* 589 * Get root directory attributes. 590 */ 591static int 592zfsctl_root_getattr(struct vop_getattr_args *ap) 593{ 594 struct vnode *vp = ap->a_vp; 595 struct vattr *vap = ap->a_vap; 596 zfsctl_root_t *node = vp->v_data; 597 598 zfsctl_common_getattr(vp, vap); 599 vap->va_ctime = node->cmtime; 600 vap->va_mtime = vap->va_ctime; 601 vap->va_birthtime = vap->va_ctime; 602 vap->va_nlink += 1; /* snapdir */ 603 vap->va_size = vap->va_nlink; 604 return (0); 605} 606 607/* 608 * When we lookup "." we still can be asked to lock it 609 * differently, can't we? 610 */ 611static int 612zfsctl_relock_dot(vnode_t *dvp, int ltype) 613{ 614 vref(dvp); 615 if (ltype != VOP_ISLOCKED(dvp)) { 616 if (ltype == LK_EXCLUSIVE) 617 vn_lock(dvp, LK_UPGRADE | LK_RETRY); 618 else /* if (ltype == LK_SHARED) */ 619 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 620 621 /* Relock for the "." case may left us with reclaimed vnode. */ 622 if (VN_IS_DOOMED(dvp)) { 623 vrele(dvp); 624 return (SET_ERROR(ENOENT)); 625 } 626 } 627 return (0); 628} 629 630/* 631 * Special case the handling of "..". 632 */ 633static int 634zfsctl_root_lookup(struct vop_lookup_args *ap) 635{ 636 struct componentname *cnp = ap->a_cnp; 637 vnode_t *dvp = ap->a_dvp; 638 vnode_t **vpp = ap->a_vpp; 639 int flags = ap->a_cnp->cn_flags; 640 int lkflags = ap->a_cnp->cn_lkflags; 641 int nameiop = ap->a_cnp->cn_nameiop; 642 int err; 643 644 ASSERT3S(dvp->v_type, ==, VDIR); 645 646 if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP) 647 return (SET_ERROR(ENOTSUP)); 648 649 if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') { 650 err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK); 651 if (err == 0) 652 *vpp = dvp; 653 } else if ((flags & ISDOTDOT) != 0) { 654 err = vn_vget_ino_gen(dvp, zfsctl_fs_root_vnode, NULL, 655 lkflags, vpp); 656 } else if (strncmp(cnp->cn_nameptr, "snapshot", cnp->cn_namelen) == 0) { 657 err = zfsctl_snapdir_vnode(dvp->v_mount, NULL, lkflags, vpp); 658 } else { 659 err = SET_ERROR(ENOENT); 660 } 661 if (err != 0) 662 *vpp = NULL; 663 return (err); 664} 665 666static int 667zfsctl_root_readdir(struct vop_readdir_args *ap) 668{ 669 struct dirent entry; 670 vnode_t *vp = ap->a_vp; 671 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 672 zfsctl_root_t *node = vp->v_data; 673 zfs_uio_t uio; 674 int *eofp = ap->a_eofflag; 675 off_t dots_offset; 676 int error; 677 678 zfs_uio_init(&uio, ap->a_uio); 679 680 ASSERT3S(vp->v_type, ==, VDIR); 681 682 /* 683 * FIXME: this routine only ever emits 3 entries and does not tolerate 684 * being called with a buffer too small to handle all of them. 685 * 686 * The check below facilitates the idiom of repeating calls until the 687 * count to return is 0. 688 */ 689 if (zfs_uio_offset(&uio) == 3 * sizeof(entry)) { 690 return (0); 691 } 692 693 error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio, 694 &dots_offset); 695 if (error != 0) { 696 if (error == ENAMETOOLONG) /* ran out of destination space */ 697 error = 0; 698 return (error); 699 } 700 if (zfs_uio_offset(&uio) != dots_offset) 701 return (SET_ERROR(EINVAL)); 702 703 _Static_assert(sizeof (node->snapdir->sn_name) <= sizeof (entry.d_name), 704 "node->snapdir->sn_name too big for entry.d_name"); 705 entry.d_fileno = node->snapdir->sn_id; 706 entry.d_type = DT_DIR; 707 strcpy(entry.d_name, node->snapdir->sn_name); 708 entry.d_namlen = strlen(entry.d_name); 709 entry.d_reclen = sizeof (entry); 710 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio)); 711 if (error != 0) { 712 if (error == ENAMETOOLONG) 713 error = 0; 714 return (SET_ERROR(error)); 715 } 716 if (eofp != NULL) 717 *eofp = 1; 718 return (0); 719} 720 721static int 722zfsctl_root_vptocnp(struct vop_vptocnp_args *ap) 723{ 724 static const char dotzfs_name[4] = ".zfs"; 725 vnode_t *dvp; 726 int error; 727 728 if (*ap->a_buflen < sizeof (dotzfs_name)) 729 return (SET_ERROR(ENOMEM)); 730 731 error = vn_vget_ino_gen(ap->a_vp, zfsctl_fs_root_vnode, NULL, 732 LK_SHARED, &dvp); 733 if (error != 0) 734 return (SET_ERROR(error)); 735 736 VOP_UNLOCK1(dvp); 737 *ap->a_vpp = dvp; 738 *ap->a_buflen -= sizeof (dotzfs_name); 739 memcpy(ap->a_buf + *ap->a_buflen, dotzfs_name, sizeof (dotzfs_name)); 740 return (0); 741} 742 743static int 744zfsctl_common_pathconf(struct vop_pathconf_args *ap) 745{ 746 /* 747 * We care about ACL variables so that user land utilities like ls 748 * can display them correctly. Since the ctldir's st_dev is set to be 749 * the same as the parent dataset, we must support all variables that 750 * it supports. 751 */ 752 switch (ap->a_name) { 753 case _PC_LINK_MAX: 754 *ap->a_retval = MIN(LONG_MAX, ZFS_LINK_MAX); 755 return (0); 756 757 case _PC_FILESIZEBITS: 758 *ap->a_retval = 64; 759 return (0); 760 761 case _PC_MIN_HOLE_SIZE: 762 *ap->a_retval = (int)SPA_MINBLOCKSIZE; 763 return (0); 764 765 case _PC_ACL_EXTENDED: 766 *ap->a_retval = 0; 767 return (0); 768 769 case _PC_ACL_NFS4: 770 *ap->a_retval = 1; 771 return (0); 772 773 case _PC_ACL_PATH_MAX: 774 *ap->a_retval = ACL_MAX_ENTRIES; 775 return (0); 776 777 case _PC_NAME_MAX: 778 *ap->a_retval = NAME_MAX; 779 return (0); 780 781 default: 782 return (vop_stdpathconf(ap)); 783 } 784} 785 786/* 787 * Returns a trivial ACL 788 */ 789static int 790zfsctl_common_getacl(struct vop_getacl_args *ap) 791{ 792 int i; 793 794 if (ap->a_type != ACL_TYPE_NFS4) 795 return (EINVAL); 796 797 acl_nfs4_sync_acl_from_mode(ap->a_aclp, zfsctl_ctldir_mode, 0); 798 /* 799 * acl_nfs4_sync_acl_from_mode assumes that the owner can always modify 800 * attributes. That is not the case for the ctldir, so we must clear 801 * those bits. We also must clear ACL_READ_NAMED_ATTRS, because xattrs 802 * aren't supported by the ctldir. 803 */ 804 for (i = 0; i < ap->a_aclp->acl_cnt; i++) { 805 struct acl_entry *entry; 806 entry = &(ap->a_aclp->acl_entry[i]); 807 entry->ae_perm &= ~(ACL_WRITE_ACL | ACL_WRITE_OWNER | 808 ACL_WRITE_ATTRIBUTES | ACL_WRITE_NAMED_ATTRS | 809 ACL_READ_NAMED_ATTRS); 810 } 811 812 return (0); 813} 814 815static struct vop_vector zfsctl_ops_root = { 816 .vop_default = &default_vnodeops, 817#if __FreeBSD_version >= 1300121 818 .vop_fplookup_vexec = VOP_EAGAIN, 819#endif 820#if __FreeBSD_version >= 1300139 821 .vop_fplookup_symlink = VOP_EAGAIN, 822#endif 823 .vop_open = zfsctl_common_open, 824 .vop_close = zfsctl_common_close, 825 .vop_ioctl = VOP_EINVAL, 826 .vop_getattr = zfsctl_root_getattr, 827 .vop_access = zfsctl_common_access, 828 .vop_readdir = zfsctl_root_readdir, 829 .vop_lookup = zfsctl_root_lookup, 830 .vop_inactive = VOP_NULL, 831 .vop_reclaim = zfsctl_common_reclaim, 832 .vop_fid = zfsctl_common_fid, 833 .vop_print = zfsctl_common_print, 834 .vop_vptocnp = zfsctl_root_vptocnp, 835 .vop_pathconf = zfsctl_common_pathconf, 836 .vop_getacl = zfsctl_common_getacl, 837#if __FreeBSD_version >= 1400043 838 .vop_add_writecount = vop_stdadd_writecount_nomsync, 839#endif 840}; 841VFS_VOP_VECTOR_REGISTER(zfsctl_ops_root); 842 843static int 844zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname) 845{ 846 objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; 847 848 dmu_objset_name(os, zname); 849 if (strlen(zname) + 1 + strlen(name) >= len) 850 return (SET_ERROR(ENAMETOOLONG)); 851 (void) strcat(zname, "@"); 852 (void) strcat(zname, name); 853 return (0); 854} 855 856static int 857zfsctl_snapshot_lookup(vnode_t *vp, const char *name, uint64_t *id) 858{ 859 objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; 860 int err; 861 862 err = dsl_dataset_snap_lookup(dmu_objset_ds(os), name, id); 863 return (err); 864} 865 866/* 867 * Given a vnode get a root vnode of a filesystem mounted on top of 868 * the vnode, if any. The root vnode is referenced and locked. 869 * If no filesystem is mounted then the orinal vnode remains referenced 870 * and locked. If any error happens the orinal vnode is unlocked and 871 * released. 872 */ 873static int 874zfsctl_mounted_here(vnode_t **vpp, int flags) 875{ 876 struct mount *mp; 877 int err; 878 879 ASSERT_VOP_LOCKED(*vpp, __func__); 880 ASSERT3S((*vpp)->v_type, ==, VDIR); 881 882 if ((mp = (*vpp)->v_mountedhere) != NULL) { 883 err = vfs_busy(mp, 0); 884 KASSERT(err == 0, ("vfs_busy(mp, 0) failed with %d", err)); 885 KASSERT(vrefcnt(*vpp) > 1, ("unreferenced mountpoint")); 886 vput(*vpp); 887 err = VFS_ROOT(mp, flags, vpp); 888 vfs_unbusy(mp); 889 return (err); 890 } 891 return (EJUSTRETURN); 892} 893 894typedef struct { 895 const char *snap_name; 896 uint64_t snap_id; 897} snapshot_setup_arg_t; 898 899static void 900zfsctl_snapshot_vnode_setup(vnode_t *vp, void *arg) 901{ 902 snapshot_setup_arg_t *ssa = arg; 903 sfs_node_t *node; 904 905 ASSERT_VOP_ELOCKED(vp, __func__); 906 907 node = sfs_alloc_node(sizeof (sfs_node_t), 908 ssa->snap_name, ZFSCTL_INO_SNAPDIR, ssa->snap_id); 909 zfsctl_common_vnode_setup(vp, node); 910 911 /* We have to support recursive locking. */ 912 VN_LOCK_AREC(vp); 913} 914 915/* 916 * Lookup entry point for the 'snapshot' directory. Try to open the 917 * snapshot if it exist, creating the pseudo filesystem vnode as necessary. 918 * Perform a mount of the associated dataset on top of the vnode. 919 * There are four possibilities: 920 * - the snapshot node and vnode do not exist 921 * - the snapshot vnode is covered by the mounted snapshot 922 * - the snapshot vnode is not covered yet, the mount operation is in progress 923 * - the snapshot vnode is not covered, because the snapshot has been unmounted 924 * The last two states are transient and should be relatively short-lived. 925 */ 926static int 927zfsctl_snapdir_lookup(struct vop_lookup_args *ap) 928{ 929 vnode_t *dvp = ap->a_dvp; 930 vnode_t **vpp = ap->a_vpp; 931 struct componentname *cnp = ap->a_cnp; 932 char name[NAME_MAX + 1]; 933 char fullname[ZFS_MAX_DATASET_NAME_LEN]; 934 char *mountpoint; 935 size_t mountpoint_len; 936 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 937 uint64_t snap_id; 938 int nameiop = cnp->cn_nameiop; 939 int lkflags = cnp->cn_lkflags; 940 int flags = cnp->cn_flags; 941 int err; 942 943 ASSERT3S(dvp->v_type, ==, VDIR); 944 945 if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP) 946 return (SET_ERROR(ENOTSUP)); 947 948 if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') { 949 err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK); 950 if (err == 0) 951 *vpp = dvp; 952 return (err); 953 } 954 if (flags & ISDOTDOT) { 955 err = vn_vget_ino_gen(dvp, zfsctl_root_vnode, NULL, lkflags, 956 vpp); 957 return (err); 958 } 959 960 if (cnp->cn_namelen >= sizeof (name)) 961 return (SET_ERROR(ENAMETOOLONG)); 962 963 strlcpy(name, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1); 964 err = zfsctl_snapshot_lookup(dvp, name, &snap_id); 965 if (err != 0) 966 return (SET_ERROR(ENOENT)); 967 968 for (;;) { 969 snapshot_setup_arg_t ssa; 970 971 ssa.snap_name = name; 972 ssa.snap_id = snap_id; 973 err = sfs_vgetx(dvp->v_mount, LK_SHARED, ZFSCTL_INO_SNAPDIR, 974 snap_id, "zfs", &zfsctl_ops_snapshot, 975 zfsctl_snapshot_vnode_setup, &ssa, vpp); 976 if (err != 0) 977 return (err); 978 979 /* Check if a new vnode has just been created. */ 980 if (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE) 981 break; 982 983 /* 984 * Check if a snapshot is already mounted on top of the vnode. 985 */ 986 err = zfsctl_mounted_here(vpp, lkflags); 987 if (err != EJUSTRETURN) 988 return (err); 989 990 /* 991 * If the vnode is not covered, then either the mount operation 992 * is in progress or the snapshot has already been unmounted 993 * but the vnode hasn't been inactivated and reclaimed yet. 994 * We can try to re-use the vnode in the latter case. 995 */ 996 VI_LOCK(*vpp); 997 if (((*vpp)->v_iflag & VI_MOUNT) == 0) { 998 VI_UNLOCK(*vpp); 999 /* 1000 * Upgrade to exclusive lock in order to: 1001 * - avoid race conditions 1002 * - satisfy the contract of mount_snapshot() 1003 */ 1004 err = VOP_LOCK(*vpp, LK_TRYUPGRADE); 1005 if (err == 0) 1006 break; 1007 } else { 1008 VI_UNLOCK(*vpp); 1009 } 1010 1011 /* 1012 * In this state we can loop on uncontested locks and starve 1013 * the thread doing the lengthy, non-trivial mount operation. 1014 * So, yield to prevent that from happening. 1015 */ 1016 vput(*vpp); 1017 kern_yield(PRI_USER); 1018 } 1019 1020 VERIFY0(zfsctl_snapshot_zname(dvp, name, sizeof (fullname), fullname)); 1021 1022 mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) + 1023 strlen("/" ZFS_CTLDIR_NAME "/snapshot/") + strlen(name) + 1; 1024 mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP); 1025 (void) snprintf(mountpoint, mountpoint_len, 1026 "%s/" ZFS_CTLDIR_NAME "/snapshot/%s", 1027 dvp->v_vfsp->mnt_stat.f_mntonname, name); 1028 1029 err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0, 1030 dvp->v_vfsp); 1031 kmem_free(mountpoint, mountpoint_len); 1032 if (err == 0) { 1033 /* 1034 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>. 1035 * 1036 * This is where we lie about our v_vfsp in order to 1037 * make .zfs/snapshot/<snapname> accessible over NFS 1038 * without requiring manual mounts of <snapname>. 1039 */ 1040 ASSERT3P(VTOZ(*vpp)->z_zfsvfs, !=, zfsvfs); 1041 VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs; 1042 1043 /* Clear the root flag (set via VFS_ROOT) as well. */ 1044 (*vpp)->v_vflag &= ~VV_ROOT; 1045 } 1046 1047 if (err != 0) 1048 *vpp = NULL; 1049 return (err); 1050} 1051 1052static int 1053zfsctl_snapdir_readdir(struct vop_readdir_args *ap) 1054{ 1055 char snapname[ZFS_MAX_DATASET_NAME_LEN]; 1056 struct dirent entry; 1057 vnode_t *vp = ap->a_vp; 1058 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 1059 zfs_uio_t uio; 1060 int *eofp = ap->a_eofflag; 1061 off_t dots_offset; 1062 int error; 1063 1064 zfs_uio_init(&uio, ap->a_uio); 1065 1066 ASSERT3S(vp->v_type, ==, VDIR); 1067 1068 error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap, 1069 &uio, &dots_offset); 1070 if (error != 0) { 1071 if (error == ENAMETOOLONG) /* ran out of destination space */ 1072 error = 0; 1073 return (error); 1074 } 1075 1076 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1077 return (error); 1078 for (;;) { 1079 uint64_t cookie; 1080 uint64_t id; 1081 1082 cookie = zfs_uio_offset(&uio) - dots_offset; 1083 1084 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG); 1085 error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname), 1086 snapname, &id, &cookie, NULL); 1087 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG); 1088 if (error != 0) { 1089 if (error == ENOENT) { 1090 if (eofp != NULL) 1091 *eofp = 1; 1092 error = 0; 1093 } 1094 zfs_exit(zfsvfs, FTAG); 1095 return (error); 1096 } 1097 1098 entry.d_fileno = id; 1099 entry.d_type = DT_DIR; 1100 strcpy(entry.d_name, snapname); 1101 entry.d_namlen = strlen(entry.d_name); 1102 entry.d_reclen = sizeof (entry); 1103 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio)); 1104 if (error != 0) { 1105 if (error == ENAMETOOLONG) 1106 error = 0; 1107 zfs_exit(zfsvfs, FTAG); 1108 return (SET_ERROR(error)); 1109 } 1110 zfs_uio_setoffset(&uio, cookie + dots_offset); 1111 } 1112 __builtin_unreachable(); 1113} 1114 1115static int 1116zfsctl_snapdir_getattr(struct vop_getattr_args *ap) 1117{ 1118 vnode_t *vp = ap->a_vp; 1119 vattr_t *vap = ap->a_vap; 1120 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 1121 dsl_dataset_t *ds; 1122 uint64_t snap_count; 1123 int err; 1124 1125 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1126 return (err); 1127 ds = dmu_objset_ds(zfsvfs->z_os); 1128 zfsctl_common_getattr(vp, vap); 1129 vap->va_ctime = dmu_objset_snap_cmtime(zfsvfs->z_os); 1130 vap->va_mtime = vap->va_ctime; 1131 vap->va_birthtime = vap->va_ctime; 1132 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0) { 1133 err = zap_count(dmu_objset_pool(ds->ds_objset)->dp_meta_objset, 1134 dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count); 1135 if (err != 0) { 1136 zfs_exit(zfsvfs, FTAG); 1137 return (err); 1138 } 1139 vap->va_nlink += snap_count; 1140 } 1141 vap->va_size = vap->va_nlink; 1142 1143 zfs_exit(zfsvfs, FTAG); 1144 return (0); 1145} 1146 1147static struct vop_vector zfsctl_ops_snapdir = { 1148 .vop_default = &default_vnodeops, 1149#if __FreeBSD_version >= 1300121 1150 .vop_fplookup_vexec = VOP_EAGAIN, 1151#endif 1152#if __FreeBSD_version >= 1300139 1153 .vop_fplookup_symlink = VOP_EAGAIN, 1154#endif 1155 .vop_open = zfsctl_common_open, 1156 .vop_close = zfsctl_common_close, 1157 .vop_getattr = zfsctl_snapdir_getattr, 1158 .vop_access = zfsctl_common_access, 1159 .vop_readdir = zfsctl_snapdir_readdir, 1160 .vop_lookup = zfsctl_snapdir_lookup, 1161 .vop_reclaim = zfsctl_common_reclaim, 1162 .vop_fid = zfsctl_common_fid, 1163 .vop_print = zfsctl_common_print, 1164 .vop_pathconf = zfsctl_common_pathconf, 1165 .vop_getacl = zfsctl_common_getacl, 1166#if __FreeBSD_version >= 1400043 1167 .vop_add_writecount = vop_stdadd_writecount_nomsync, 1168#endif 1169}; 1170VFS_VOP_VECTOR_REGISTER(zfsctl_ops_snapdir); 1171 1172 1173static int 1174zfsctl_snapshot_inactive(struct vop_inactive_args *ap) 1175{ 1176 vnode_t *vp = ap->a_vp; 1177 1178 vrecycle(vp); 1179 return (0); 1180} 1181 1182static int 1183zfsctl_snapshot_reclaim(struct vop_reclaim_args *ap) 1184{ 1185 vnode_t *vp = ap->a_vp; 1186 void *data = vp->v_data; 1187 1188 sfs_reclaim_vnode(vp); 1189 sfs_destroy_node(data); 1190 return (0); 1191} 1192 1193static int 1194zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap) 1195{ 1196 struct mount *mp; 1197 vnode_t *dvp; 1198 vnode_t *vp; 1199 sfs_node_t *node; 1200 size_t len; 1201 int locked; 1202 int error; 1203 1204 vp = ap->a_vp; 1205 node = vp->v_data; 1206 len = strlen(node->sn_name); 1207 if (*ap->a_buflen < len) 1208 return (SET_ERROR(ENOMEM)); 1209 1210 /* 1211 * Prevent unmounting of the snapshot while the vnode lock 1212 * is not held. That is not strictly required, but allows 1213 * us to assert that an uncovered snapshot vnode is never 1214 * "leaked". 1215 */ 1216 mp = vp->v_mountedhere; 1217 if (mp == NULL) 1218 return (SET_ERROR(ENOENT)); 1219 error = vfs_busy(mp, 0); 1220 KASSERT(error == 0, ("vfs_busy(mp, 0) failed with %d", error)); 1221 1222 /* 1223 * We can vput the vnode as we can now depend on the reference owned 1224 * by the busied mp. But we also need to hold the vnode, because 1225 * the reference may go after vfs_unbusy() which has to be called 1226 * before we can lock the vnode again. 1227 */ 1228 locked = VOP_ISLOCKED(vp); 1229#if __FreeBSD_version >= 1300045 1230 enum vgetstate vs = vget_prep(vp); 1231#else 1232 vhold(vp); 1233#endif 1234 vput(vp); 1235 1236 /* Look up .zfs/snapshot, our parent. */ 1237 error = zfsctl_snapdir_vnode(vp->v_mount, NULL, LK_SHARED, &dvp); 1238 if (error == 0) { 1239 VOP_UNLOCK1(dvp); 1240 *ap->a_vpp = dvp; 1241 *ap->a_buflen -= len; 1242 memcpy(ap->a_buf + *ap->a_buflen, node->sn_name, len); 1243 } 1244 vfs_unbusy(mp); 1245#if __FreeBSD_version >= 1300045 1246 vget_finish(vp, locked | LK_RETRY, vs); 1247#else 1248 vget(vp, locked | LK_VNHELD | LK_RETRY, curthread); 1249#endif 1250 return (error); 1251} 1252 1253/* 1254 * These VP's should never see the light of day. They should always 1255 * be covered. 1256 */ 1257static struct vop_vector zfsctl_ops_snapshot = { 1258 .vop_default = NULL, /* ensure very restricted access */ 1259#if __FreeBSD_version >= 1300121 1260 .vop_fplookup_vexec = VOP_EAGAIN, 1261#endif 1262#if __FreeBSD_version >= 1300139 1263 .vop_fplookup_symlink = VOP_EAGAIN, 1264#endif 1265 .vop_open = zfsctl_common_open, 1266 .vop_close = zfsctl_common_close, 1267 .vop_inactive = zfsctl_snapshot_inactive, 1268#if __FreeBSD_version >= 1300045 1269 .vop_need_inactive = vop_stdneed_inactive, 1270#endif 1271 .vop_reclaim = zfsctl_snapshot_reclaim, 1272 .vop_vptocnp = zfsctl_snapshot_vptocnp, 1273 .vop_lock1 = vop_stdlock, 1274 .vop_unlock = vop_stdunlock, 1275 .vop_islocked = vop_stdislocked, 1276 .vop_advlockpurge = vop_stdadvlockpurge, /* called by vgone */ 1277 .vop_print = zfsctl_common_print, 1278#if __FreeBSD_version >= 1400043 1279 .vop_add_writecount = vop_stdadd_writecount_nomsync, 1280#endif 1281}; 1282VFS_VOP_VECTOR_REGISTER(zfsctl_ops_snapshot); 1283 1284int 1285zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) 1286{ 1287 zfsvfs_t *zfsvfs __unused = vfsp->vfs_data; 1288 vnode_t *vp; 1289 int error; 1290 1291 ASSERT3P(zfsvfs->z_ctldir, !=, NULL); 1292 *zfsvfsp = NULL; 1293 error = sfs_vnode_get(vfsp, LK_EXCLUSIVE, 1294 ZFSCTL_INO_SNAPDIR, objsetid, &vp); 1295 if (error == 0 && vp != NULL) { 1296 /* 1297 * XXX Probably need to at least reference, if not busy, the mp. 1298 */ 1299 if (vp->v_mountedhere != NULL) 1300 *zfsvfsp = vp->v_mountedhere->mnt_data; 1301 vput(vp); 1302 } 1303 if (*zfsvfsp == NULL) 1304 return (SET_ERROR(EINVAL)); 1305 return (0); 1306} 1307 1308/* 1309 * Unmount any snapshots for the given filesystem. This is called from 1310 * zfs_umount() - if we have a ctldir, then go through and unmount all the 1311 * snapshots. 1312 */ 1313int 1314zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) 1315{ 1316 char snapname[ZFS_MAX_DATASET_NAME_LEN]; 1317 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1318 struct mount *mp; 1319 vnode_t *vp; 1320 uint64_t cookie; 1321 int error; 1322 1323 ASSERT3P(zfsvfs->z_ctldir, !=, NULL); 1324 1325 cookie = 0; 1326 for (;;) { 1327 uint64_t id; 1328 1329 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG); 1330 error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname), 1331 snapname, &id, &cookie, NULL); 1332 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG); 1333 if (error != 0) { 1334 if (error == ENOENT) 1335 error = 0; 1336 break; 1337 } 1338 1339 for (;;) { 1340 error = sfs_vnode_get(vfsp, LK_EXCLUSIVE, 1341 ZFSCTL_INO_SNAPDIR, id, &vp); 1342 if (error != 0 || vp == NULL) 1343 break; 1344 1345 mp = vp->v_mountedhere; 1346 1347 /* 1348 * v_mountedhere being NULL means that the 1349 * (uncovered) vnode is in a transient state 1350 * (mounting or unmounting), so loop until it 1351 * settles down. 1352 */ 1353 if (mp != NULL) 1354 break; 1355 vput(vp); 1356 } 1357 if (error != 0) 1358 break; 1359 if (vp == NULL) 1360 continue; /* no mountpoint, nothing to do */ 1361 1362 /* 1363 * The mount-point vnode is kept locked to avoid spurious EBUSY 1364 * from a concurrent umount. 1365 * The vnode lock must have recursive locking enabled. 1366 */ 1367 vfs_ref(mp); 1368 error = dounmount(mp, fflags, curthread); 1369 KASSERT_IMPLY(error == 0, vrefcnt(vp) == 1, 1370 ("extra references after unmount")); 1371 vput(vp); 1372 if (error != 0) 1373 break; 1374 } 1375 KASSERT_IMPLY((fflags & MS_FORCE) != 0, error == 0, 1376 ("force unmounting failed")); 1377 return (error); 1378} 1379 1380int 1381zfsctl_snapshot_unmount(const char *snapname, int flags __unused) 1382{ 1383 vfs_t *vfsp = NULL; 1384 zfsvfs_t *zfsvfs = NULL; 1385 1386 if (strchr(snapname, '@') == NULL) 1387 return (0); 1388 1389 int err = getzfsvfs(snapname, &zfsvfs); 1390 if (err != 0) { 1391 ASSERT3P(zfsvfs, ==, NULL); 1392 return (0); 1393 } 1394 vfsp = zfsvfs->z_vfs; 1395 1396 ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os))); 1397 1398 vfs_ref(vfsp); 1399 vfs_unbusy(vfsp); 1400 return (dounmount(vfsp, MS_FORCE, curthread)); 1401} 1402