1/* 2 * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/types.h> 28#include <sys/param.h> 29#include <sys/kernel.h> 30#include <sys/systm.h> 31#include <sys/malloc.h> 32#include <sys/mount.h> 33#include <sys/cred.h> 34#include <sys/vfs.h> 35#include <sys/priv.h> 36#include <sys/libkern.h> 37 38#include <sys/mutex.h> 39#include <sys/vnode.h> 40#include <sys/taskq.h> 41 42#include <sys/ccompat.h> 43 44MALLOC_DECLARE(M_MOUNT); 45 46void 47vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg, 48 int flags __unused) 49{ 50 struct vfsopt *opt; 51 size_t namesize; 52 int locked; 53 54 if (!(locked = mtx_owned(MNT_MTX(vfsp)))) 55 MNT_ILOCK(vfsp); 56 57 if (vfsp->mnt_opt == NULL) { 58 void *opts; 59 60 MNT_IUNLOCK(vfsp); 61 opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK); 62 MNT_ILOCK(vfsp); 63 if (vfsp->mnt_opt == NULL) { 64 vfsp->mnt_opt = opts; 65 TAILQ_INIT(vfsp->mnt_opt); 66 } else { 67 free(opts, M_MOUNT); 68 } 69 } 70 71 MNT_IUNLOCK(vfsp); 72 73 opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK); 74 namesize = strlen(name) + 1; 75 opt->name = malloc(namesize, M_MOUNT, M_WAITOK); 76 strlcpy(opt->name, name, namesize); 77 opt->pos = -1; 78 opt->seen = 1; 79 if (arg == NULL) { 80 opt->value = NULL; 81 opt->len = 0; 82 } else { 83 opt->len = strlen(arg) + 1; 84 opt->value = malloc(opt->len, M_MOUNT, M_WAITOK); 85 memcpy(opt->value, arg, opt->len); 86 } 87 88 MNT_ILOCK(vfsp); 89 TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link); 90 if (!locked) 91 MNT_IUNLOCK(vfsp); 92} 93 94void 95vfs_clearmntopt(vfs_t *vfsp, const char *name) 96{ 97 int locked; 98 99 if (!(locked = mtx_owned(MNT_MTX(vfsp)))) 100 MNT_ILOCK(vfsp); 101 vfs_deleteopt(vfsp->mnt_opt, name); 102 if (!locked) 103 MNT_IUNLOCK(vfsp); 104} 105 106int 107vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp) 108{ 109 struct vfsoptlist *opts = vfsp->mnt_optnew; 110 int error; 111 112 if (opts == NULL) 113 return (0); 114 error = vfs_getopt(opts, opt, (void **)argp, NULL); 115 return (error != 0 ? 0 : 1); 116} 117 118int 119mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath, 120 char *fspec, int fsflags, vfs_t *parent_vfsp) 121{ 122 struct vfsconf *vfsp; 123 struct mount *mp; 124 vnode_t *vp, *mvp; 125 int error; 126 127 ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot"); 128 129 vp = *vpp; 130 *vpp = NULL; 131 error = 0; 132 133 /* 134 * Be ultra-paranoid about making sure the type and fspath 135 * variables will fit in our mp buffers, including the 136 * terminating NUL. 137 */ 138 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) 139 error = ENAMETOOLONG; 140 if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL) 141 error = ENODEV; 142 if (error == 0 && vp->v_type != VDIR) 143 error = ENOTDIR; 144 /* 145 * We need vnode lock to protect v_mountedhere and vnode interlock 146 * to protect v_iflag. 147 */ 148 if (error == 0) { 149 VI_LOCK(vp); 150 if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL) 151 vp->v_iflag |= VI_MOUNT; 152 else 153 error = EBUSY; 154 VI_UNLOCK(vp); 155 } 156 if (error != 0) { 157 vput(vp); 158 return (error); 159 } 160 vn_seqc_write_begin(vp); 161 VOP_UNLOCK1(vp); 162 163 /* 164 * Allocate and initialize the filesystem. 165 * We don't want regular user that triggered snapshot mount to be able 166 * to unmount it, so pass credentials of the parent mount. 167 */ 168 mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred); 169 170 mp->mnt_optnew = NULL; 171 vfs_setmntopt(mp, "from", fspec, 0); 172 mp->mnt_optnew = mp->mnt_opt; 173 mp->mnt_opt = NULL; 174 175 /* 176 * Set the mount level flags. 177 */ 178 mp->mnt_flag = fsflags & MNT_UPDATEMASK; 179 /* 180 * Snapshots are always read-only. 181 */ 182 mp->mnt_flag |= MNT_RDONLY; 183 /* 184 * We don't want snapshots to allow access to vulnerable setuid 185 * programs, so we turn off setuid when mounting snapshots. 186 */ 187 mp->mnt_flag |= MNT_NOSUID; 188 /* 189 * We don't want snapshots to be visible in regular 190 * mount(8) and df(1) output. 191 */ 192 mp->mnt_flag |= MNT_IGNORE; 193 194 error = VFS_MOUNT(mp); 195 if (error != 0) { 196 /* 197 * Clear VI_MOUNT and decrement the use count "atomically", 198 * under the vnode lock. This is not strictly required, 199 * but makes it easier to reason about the life-cycle and 200 * ownership of the covered vnode. 201 */ 202 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 203 VI_LOCK(vp); 204 vp->v_iflag &= ~VI_MOUNT; 205 VI_UNLOCK(vp); 206 vn_seqc_write_end(vp); 207 vput(vp); 208 vfs_unbusy(mp); 209 vfs_freeopts(mp->mnt_optnew); 210 mp->mnt_vnodecovered = NULL; 211 vfs_mount_destroy(mp); 212 return (error); 213 } 214 215 if (mp->mnt_opt != NULL) 216 vfs_freeopts(mp->mnt_opt); 217 mp->mnt_opt = mp->mnt_optnew; 218 (void) VFS_STATFS(mp, &mp->mnt_stat); 219 220#ifdef VFS_SUPPORTS_EXJAIL_CLONE 221 /* 222 * Clone the mnt_exjail credentials of the parent, as required. 223 */ 224 vfs_exjail_clone(parent_vfsp, mp); 225#endif 226 227 /* 228 * Prevent external consumers of mount options from reading 229 * mnt_optnew. 230 */ 231 mp->mnt_optnew = NULL; 232 233 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 234#ifdef FREEBSD_NAMECACHE 235 cache_purge(vp); 236#endif 237 VI_LOCK(vp); 238 vp->v_iflag &= ~VI_MOUNT; 239#ifdef VIRF_MOUNTPOINT 240 vn_irflag_set_locked(vp, VIRF_MOUNTPOINT); 241#endif 242 vp->v_mountedhere = mp; 243 VI_UNLOCK(vp); 244 /* Put the new filesystem on the mount list. */ 245 mtx_lock(&mountlist_mtx); 246 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 247 mtx_unlock(&mountlist_mtx); 248 vfs_event_signal(NULL, VQ_MOUNT, 0); 249 if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp)) 250 panic("mount: lost mount"); 251 vn_seqc_write_end(vp); 252 VOP_UNLOCK1(vp); 253#if __FreeBSD_version >= 1300048 254 vfs_op_exit(mp); 255#endif 256 vfs_unbusy(mp); 257 *vpp = mvp; 258 return (0); 259} 260 261/* 262 * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it 263 * asynchronously using a taskq. This can avoid deadlocks caused by re-entering 264 * the file system as a result of releasing the vnode. Note, file systems 265 * already have to handle the race where the vnode is incremented before the 266 * inactive routine is called and does its locking. 267 * 268 * Warning: Excessive use of this routine can lead to performance problems. 269 * This is because taskqs throttle back allocation if too many are created. 270 */ 271void 272vn_rele_async(vnode_t *vp, taskq_t *taskq) 273{ 274 VERIFY3U(vp->v_usecount, >, 0); 275 if (refcount_release_if_not_last(&vp->v_usecount)) { 276#if __FreeBSD_version < 1300045 277 vdrop(vp); 278#endif 279 return; 280 } 281 VERIFY3U(taskq_dispatch((taskq_t *)taskq, 282 (task_func_t *)vrele, vp, TQ_SLEEP), !=, 0); 283} 284