1139825Simp/*- 262976Smckusick * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. 362976Smckusick * 462976Smckusick * Further information about snapshots can be obtained from: 562976Smckusick * 662976Smckusick * Marshall Kirk McKusick http://www.mckusick.com/softdep/ 762976Smckusick * 1614 Oxford Street mckusick@mckusick.com 862976Smckusick * Berkeley, CA 94709-1608 +1-510-843-9542 962976Smckusick * USA 1062976Smckusick * 1162976Smckusick * Redistribution and use in source and binary forms, with or without 1262976Smckusick * modification, are permitted provided that the following conditions 1362976Smckusick * are met: 1462976Smckusick * 1562976Smckusick * 1. Redistributions of source code must retain the above copyright 1662976Smckusick * notice, this list of conditions and the following disclaimer. 1762976Smckusick * 2. Redistributions in binary form must reproduce the above copyright 1862976Smckusick * notice, this list of conditions and the following disclaimer in the 1962976Smckusick * documentation and/or other materials provided with the distribution. 2062976Smckusick * 2162976Smckusick * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY 2262976Smckusick * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 2362976Smckusick * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 2462976Smckusick * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR 2562976Smckusick * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2662976Smckusick * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2762976Smckusick * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2862976Smckusick * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2962976Smckusick * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3062976Smckusick * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3162976Smckusick * SUCH DAMAGE. 3262976Smckusick * 3363788Smckusick * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00 3462976Smckusick */ 3562976Smckusick 36116192Sobrien#include <sys/cdefs.h> 37116192Sobrien__FBSDID("$FreeBSD$"); 38116192Sobrien 39158322Stegge#include "opt_quota.h" 40158322Stegge 4162976Smckusick#include <sys/param.h> 42105191Smckusick#include <sys/kernel.h> 4362976Smckusick#include <sys/systm.h> 4473942Smckusick#include <sys/conf.h> 4562976Smckusick#include <sys/bio.h> 4662976Smckusick#include <sys/buf.h> 47177785Skib#include <sys/fcntl.h> 4862976Smckusick#include <sys/proc.h> 4962976Smckusick#include <sys/namei.h> 50113376Sjeff#include <sys/sched.h> 5162976Smckusick#include <sys/stat.h> 5262976Smckusick#include <sys/malloc.h> 5362976Smckusick#include <sys/mount.h> 5462976Smckusick#include <sys/resource.h> 5562976Smckusick#include <sys/resourcevar.h> 5662976Smckusick#include <sys/vnode.h> 5762976Smckusick 58137035Sphk#include <geom/geom.h> 59137035Sphk 6062976Smckusick#include <ufs/ufs/extattr.h> 6162976Smckusick#include <ufs/ufs/quota.h> 6262976Smckusick#include <ufs/ufs/ufsmount.h> 6362976Smckusick#include <ufs/ufs/inode.h> 6462976Smckusick#include <ufs/ufs/ufs_extern.h> 6562976Smckusick 6662976Smckusick#include <ufs/ffs/fs.h> 6762976Smckusick#include <ufs/ffs/ffs_extern.h> 6862976Smckusick 6991420Sjhb#define KERNCRED thread0.td_ucred 7065998Sdes#define DEBUG 1 7162976Smckusick 72154065Simp#include "opt_ffs.h" 73154065Simp 74154065Simp#ifdef NO_FFS_SNAPSHOT 75154065Simpint 76154065Simpffs_snapshot(mp, snapfile) 77154065Simp struct mount *mp; 78154065Simp char *snapfile; 79154065Simp{ 80154065Simp return (EINVAL); 81154065Simp} 82154065Simp 83154065Simpint 84223127Smckusickffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd) 85154065Simp struct fs *fs; 86154065Simp struct vnode *devvp; 87154065Simp ufs2_daddr_t bno; 88154065Simp long size; 89154065Simp ino_t inum; 90223127Smckusick enum vtype vtype; 91223020Smckusick struct workhead *wkhd; 92154065Simp{ 93154065Simp return (EINVAL); 94154065Simp} 95154065Simp 96154065Simpvoid 97154065Simpffs_snapremove(vp) 98154065Simp struct vnode *vp; 99154065Simp{ 100154065Simp} 101154065Simp 102154065Simpvoid 103154065Simpffs_snapshot_mount(mp) 104154065Simp struct mount *mp; 105154065Simp{ 106154065Simp} 107154065Simp 108154065Simpvoid 109154065Simpffs_snapshot_unmount(mp) 110154065Simp struct mount *mp; 111154065Simp{ 112154065Simp} 113154065Simp 114154065Simpvoid 115154065Simpffs_snapgone(ip) 116154065Simp struct inode *ip; 117154065Simp{ 118154065Simp} 119154065Simp 120154065Simpint 121154065Simpffs_copyonwrite(devvp, bp) 122154065Simp struct vnode *devvp; 123154065Simp struct buf *bp; 124154065Simp{ 125154065Simp return (EINVAL); 126154065Simp} 127154065Simp 128223020Smckusickvoid 129223020Smckusickffs_sync_snap(mp, waitfor) 130223020Smckusick struct mount *mp; 131223020Smckusick int waitfor; 132223020Smckusick{ 133223020Smckusick} 134223020Smckusick 135154065Simp#else 136218485SnetchildFEATURE(ffs_snapshot, "FFS snapshot support"); 137154065Simp 138177778SjeffLIST_HEAD(, snapdata) snapfree; 139177778Sjeffstatic struct mtx snapfree_lock; 140177778SjeffMTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF); 141177778Sjeff 14292728Salfredstatic int cgaccount(int, struct vnode *, struct buf *, int); 14398542Smckusickstatic int expunge_ufs1(struct vnode *, struct inode *, struct fs *, 14498542Smckusick int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 145207141Sjeff ufs_lbn_t, int), int, int); 14698542Smckusickstatic int indiracct_ufs1(struct vnode *, struct vnode *, int, 14798542Smckusick ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 14898542Smckusick int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 14998542Smckusick ufs_lbn_t, int), int); 15098542Smckusickstatic int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 15198542Smckusick struct fs *, ufs_lbn_t, int); 15298542Smckusickstatic int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 15398542Smckusick struct fs *, ufs_lbn_t, int); 15498542Smckusickstatic int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 15598542Smckusick struct fs *, ufs_lbn_t, int); 15698542Smckusickstatic int expunge_ufs2(struct vnode *, struct inode *, struct fs *, 15798542Smckusick int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 158207141Sjeff ufs_lbn_t, int), int, int); 15998542Smckusickstatic int indiracct_ufs2(struct vnode *, struct vnode *, int, 16098542Smckusick ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 16198542Smckusick int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 16298542Smckusick ufs_lbn_t, int), int); 16398542Smckusickstatic int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 16498542Smckusick struct fs *, ufs_lbn_t, int); 16598542Smckusickstatic int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 16698542Smckusick struct fs *, ufs_lbn_t, int); 16798542Smckusickstatic int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 16898542Smckusick struct fs *, ufs_lbn_t, int); 169135138Sphkstatic int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t); 170177778Sjeffstatic void try_free_snapdata(struct vnode *devvp); 171177778Sjeffstatic struct snapdata *ffs_snapdata_acquire(struct vnode *devvp); 172166193Skibstatic int ffs_bp_snapblk(struct vnode *, struct buf *); 17362976Smckusick 17476580Smckusick/* 17576580Smckusick * To ensure the consistency of snapshots across crashes, we must 17676580Smckusick * synchronously write out copied blocks before allowing the 17776580Smckusick * originals to be modified. Because of the rather severe speed 178223127Smckusick * penalty that this imposes, the code normally only ensures 179223127Smckusick * persistence for the filesystem metadata contained within a 180223127Smckusick * snapshot. Setting the following flag allows this crash 181223127Smckusick * persistence to be enabled for file contents. 18276580Smckusick */ 18376580Smckusickint dopersistence = 0; 18476580Smckusick 18562976Smckusick#ifdef DEBUG 18662976Smckusick#include <sys/sysctl.h> 18776580SmckusickSYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, ""); 188114293Smarkmstatic int snapdebug = 0; 18962976SmckusickSYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, ""); 19087827Smckusickint collectsnapstats = 0; 19187827SmckusickSYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats, 19287827Smckusick 0, ""); 19362976Smckusick#endif /* DEBUG */ 19462976Smckusick 19562976Smckusick/* 19662976Smckusick * Create a snapshot file and initialize it for the filesystem. 19762976Smckusick */ 19862976Smckusickint 19962976Smckusickffs_snapshot(mp, snapfile) 20062976Smckusick struct mount *mp; 20162976Smckusick char *snapfile; 20262976Smckusick{ 203111240Smckusick ufs2_daddr_t numblks, blkno, *blkp, *snapblklist; 20498542Smckusick int error, cg, snaploc; 20590098Smckusick int i, size, len, loc; 206233630Smckusick ufs2_daddr_t blockno; 207225850Smckusick uint64_t flag; 20887827Smckusick struct timespec starttime = {0, 0}, endtime; 20987827Smckusick char saved_nice = 0; 210111240Smckusick long redo = 0, snaplistsize = 0; 21176269Smckusick int32_t *lp; 21271073Siedowse void *space; 213140706Sjeff struct fs *copy_fs = NULL, *fs; 21483366Sjulian struct thread *td = curthread; 21573942Smckusick struct inode *ip, *xp; 216225851Smckusick struct buf *bp, *nbp, *ibp; 21762976Smckusick struct nameidata nd; 21862976Smckusick struct mount *wrtmp; 21962976Smckusick struct vattr vat; 220154152Stegge struct vnode *vp, *xvp, *mvp, *devvp; 221104698Smckusick struct uio auio; 222104698Smckusick struct iovec aiov; 223135138Sphk struct snapdata *sn; 224140706Sjeff struct ufsmount *ump; 22562976Smckusick 226140706Sjeff ump = VFSTOUFS(mp); 227140706Sjeff fs = ump->um_fs; 228158632Stegge sn = NULL; 229230725Smckusick /* 230230725Smckusick * At the moment, journaled soft updates cannot support 231230725Smckusick * taking snapshots. 232230725Smckusick */ 233230725Smckusick if (MOUNTEDSUJ(mp)) { 234230725Smckusick vfs_mount_error(mp, "%s: Snapshots are not yet supported when " 235230725Smckusick "running with journaled soft updates", fs->fs_fsmnt); 236230725Smckusick return (EOPNOTSUPP); 237230725Smckusick } 238162647Stegge MNT_ILOCK(mp); 239162647Stegge flag = mp->mnt_flag; 240162647Stegge MNT_IUNLOCK(mp); 241135138Sphk /* 24262976Smckusick * Need to serialize access to snapshot code per filesystem. 24362976Smckusick */ 24462976Smckusick /* 24562976Smckusick * Assign a snapshot slot in the superblock. 24662976Smckusick */ 247140706Sjeff UFS_LOCK(ump); 24862976Smckusick for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 24962976Smckusick if (fs->fs_snapinum[snaploc] == 0) 25062976Smckusick break; 251140706Sjeff UFS_UNLOCK(ump); 25262976Smckusick if (snaploc == FSMAXSNAP) 25362976Smckusick return (ENOSPC); 25462976Smckusick /* 25562976Smckusick * Create the snapshot file. 25662976Smckusick */ 25762976Smckusickrestart: 258138557Sphk NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_SYSSPACE, snapfile, td); 25962976Smckusick if ((error = namei(&nd)) != 0) 26062976Smckusick return (error); 26162976Smckusick if (nd.ni_vp != NULL) { 26262976Smckusick vput(nd.ni_vp); 26362976Smckusick error = EEXIST; 26462976Smckusick } 26562976Smckusick if (nd.ni_dvp->v_mount != mp) 26662976Smckusick error = EXDEV; 26762976Smckusick if (error) { 26862976Smckusick NDFREE(&nd, NDF_ONLY_PNBUF); 26962976Smckusick if (nd.ni_dvp == nd.ni_vp) 27062976Smckusick vrele(nd.ni_dvp); 27162976Smckusick else 27262976Smckusick vput(nd.ni_dvp); 27362976Smckusick return (error); 27462976Smckusick } 27562976Smckusick VATTR_NULL(&vat); 27662976Smckusick vat.va_type = VREG; 27762976Smckusick vat.va_mode = S_IRUSR; 27862976Smckusick vat.va_vaflags |= VA_EXCLUSIVE; 27962976Smckusick if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp)) 28062976Smckusick wrtmp = NULL; 28162976Smckusick if (wrtmp != mp) 28262976Smckusick panic("ffs_snapshot: mount mismatch"); 283157325Sjeff vfs_rel(wrtmp); 28462985Smckusick if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) { 28562976Smckusick NDFREE(&nd, NDF_ONLY_PNBUF); 28662976Smckusick vput(nd.ni_dvp); 28762985Smckusick if ((error = vn_start_write(NULL, &wrtmp, 28862985Smckusick V_XSLEEP | PCATCH)) != 0) 28962976Smckusick return (error); 29062976Smckusick goto restart; 29162976Smckusick } 29262976Smckusick error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat); 293175294Sattilio VOP_UNLOCK(nd.ni_dvp, 0); 29462976Smckusick if (error) { 29562976Smckusick NDFREE(&nd, NDF_ONLY_PNBUF); 29662976Smckusick vn_finished_write(wrtmp); 297156895Stegge vrele(nd.ni_dvp); 29862976Smckusick return (error); 29962976Smckusick } 30062976Smckusick vp = nd.ni_vp; 301166142Smpp vp->v_vflag |= VV_SYSTEM; 30262976Smckusick ip = VTOI(vp); 303107414Smckusick devvp = ip->i_devvp; 30462976Smckusick /* 30562976Smckusick * Allocate and copy the last block contents so as to be able 30662976Smckusick * to set size to that of the filesystem. 30762976Smckusick */ 30862976Smckusick numblks = howmany(fs->fs_size, fs->fs_frag); 30976132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 31098658Sdillon fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 31162976Smckusick if (error) 31262976Smckusick goto out; 31362976Smckusick ip->i_size = lblktosize(fs, (off_t)numblks); 314132775Skan DIP_SET(ip, i_size, ip->i_size); 31562976Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 316158633Stegge error = readblock(vp, bp, numblks - 1); 317158633Stegge bawrite(bp); 318158633Stegge if (error != 0) 31962976Smckusick goto out; 32062976Smckusick /* 32162976Smckusick * Preallocate critical data structures so that we can copy 32262976Smckusick * them in without further allocation after we suspend all 32362976Smckusick * operations on the filesystem. We would like to just release 32462976Smckusick * the allocated buffers without writing them since they will 32562976Smckusick * be filled in below once we are ready to go, but this upsets 32662976Smckusick * the soft update code, so we go ahead and write the new buffers. 32762976Smckusick * 32875993Smckusick * Allocate all indirect blocks and mark all of them as not 32975993Smckusick * needing to be copied. 33062976Smckusick */ 33162976Smckusick for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 33276132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 33398658Sdillon fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp); 33462976Smckusick if (error) 33562976Smckusick goto out; 336107406Smckusick bawrite(ibp); 33762976Smckusick } 33862976Smckusick /* 33962976Smckusick * Allocate copies for the superblock and its summary information. 34062976Smckusick */ 341107294Smckusick error = UFS_BALLOC(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED, 342107294Smckusick 0, &nbp); 34376269Smckusick if (error) 34462976Smckusick goto out; 34562976Smckusick bawrite(nbp); 34662976Smckusick blkno = fragstoblks(fs, fs->fs_csaddr); 34762976Smckusick len = howmany(fs->fs_cssize, fs->fs_bsize); 34862976Smckusick for (loc = 0; loc < len; loc++) { 34976132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)), 35062976Smckusick fs->fs_bsize, KERNCRED, 0, &nbp); 35162976Smckusick if (error) 35262976Smckusick goto out; 35362976Smckusick bawrite(nbp); 35462976Smckusick } 35562976Smckusick /* 35687827Smckusick * Allocate all cylinder group blocks. 35787827Smckusick */ 35887827Smckusick for (cg = 0; cg < fs->fs_ncg; cg++) { 359111238Smckusick error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 36087827Smckusick fs->fs_bsize, KERNCRED, 0, &nbp); 36187827Smckusick if (error) 36287827Smckusick goto out; 363107406Smckusick bawrite(nbp); 364184934Sambrisko if (cg % 10 == 0) 365233630Smckusick ffs_syncvnode(vp, MNT_WAIT, 0); 36687827Smckusick } 36787827Smckusick /* 36887827Smckusick * Copy all the cylinder group maps. Although the 36987827Smckusick * filesystem is still active, we hope that only a few 37087827Smckusick * cylinder groups will change between now and when we 37187827Smckusick * suspend operations. Thus, we will be able to quickly 37287827Smckusick * touch up the few cylinder groups that changed during 37387827Smckusick * the suspension period. 37487827Smckusick */ 37589450Smckusick len = howmany(fs->fs_ncg, NBBY); 376184205Sdes space = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 377140706Sjeff UFS_LOCK(ump); 378140706Sjeff fs->fs_active = space; 379140706Sjeff UFS_UNLOCK(ump); 38087827Smckusick for (cg = 0; cg < fs->fs_ncg; cg++) { 381111238Smckusick error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 382107558Smckusick fs->fs_bsize, KERNCRED, 0, &nbp); 383107558Smckusick if (error) 38487827Smckusick goto out; 38587827Smckusick error = cgaccount(cg, vp, nbp, 1); 38687827Smckusick bawrite(nbp); 387184934Sambrisko if (cg % 10 == 0) 388233630Smckusick ffs_syncvnode(vp, MNT_WAIT, 0); 38987827Smckusick if (error) 39087827Smckusick goto out; 39187827Smckusick } 39287827Smckusick /* 39362976Smckusick * Change inode to snapshot type file. 39462976Smckusick */ 39563897Smckusick ip->i_flags |= SF_SNAPSHOT; 396132775Skan DIP_SET(ip, i_flags, ip->i_flags); 39762976Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 39862976Smckusick /* 39962976Smckusick * Ensure that the snapshot is completely on disk. 400107406Smckusick * Since we have marked it as a snapshot it is safe to 401107406Smckusick * unlock it as no process will be allowed to write to it. 40262976Smckusick */ 403233630Smckusick if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) 40462976Smckusick goto out; 405175294Sattilio VOP_UNLOCK(vp, 0); 40662976Smckusick /* 40762976Smckusick * All allocations are done, so we can now snapshot the system. 40862976Smckusick * 40987827Smckusick * Recind nice scheduling while running with the filesystem suspended. 41087827Smckusick */ 411130551Sjulian if (td->td_proc->p_nice > 0) { 412170307Sjeff struct proc *p; 413170307Sjeff 414170307Sjeff p = td->td_proc; 415170307Sjeff PROC_LOCK(p); 416170307Sjeff saved_nice = p->p_nice; 417170307Sjeff sched_nice(p, 0); 418170307Sjeff PROC_UNLOCK(p); 41987827Smckusick } 42087827Smckusick /* 42162976Smckusick * Suspend operation on filesystem. 42262976Smckusick */ 42362976Smckusick for (;;) { 42462976Smckusick vn_finished_write(wrtmp); 425105902Smckusick if ((error = vfs_write_suspend(vp->v_mount)) != 0) { 426105902Smckusick vn_start_write(NULL, &wrtmp, V_WAIT); 427175202Sattilio vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 428105902Smckusick goto out; 429105902Smckusick } 43062976Smckusick if (mp->mnt_kern_flag & MNTK_SUSPENDED) 43162976Smckusick break; 43262985Smckusick vn_start_write(NULL, &wrtmp, V_WAIT); 43362976Smckusick } 434175202Sattilio vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 435158262Stegge if (ip->i_effnlink == 0) { 436158262Stegge error = ENOENT; /* Snapshot file unlinked */ 437158262Stegge goto out1; 438158262Stegge } 43990098Smckusick if (collectsnapstats) 44090098Smckusick nanotime(&starttime); 441158634Stegge 442158634Stegge /* The last block might have changed. Copy it again to be sure. */ 443158634Stegge error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 444158634Stegge fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 445158634Stegge if (error != 0) 446158634Stegge goto out1; 447158634Stegge error = readblock(vp, bp, numblks - 1); 448158634Stegge bp->b_flags |= B_VALIDSUSPWRT; 449158634Stegge bawrite(bp); 450158634Stegge if (error != 0) 451158634Stegge goto out1; 45262976Smckusick /* 45387827Smckusick * First, copy all the cylinder group maps that have changed. 45462976Smckusick */ 45562976Smckusick for (cg = 0; cg < fs->fs_ncg; cg++) { 45688138Smckusick if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0) 45787827Smckusick continue; 45887827Smckusick redo++; 459111238Smckusick error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 460107558Smckusick fs->fs_bsize, KERNCRED, 0, &nbp); 461107558Smckusick if (error) 46262976Smckusick goto out1; 46387827Smckusick error = cgaccount(cg, vp, nbp, 2); 46489450Smckusick bawrite(nbp); 46587827Smckusick if (error) 46662976Smckusick goto out1; 46762976Smckusick } 46862976Smckusick /* 46976269Smckusick * Grab a copy of the superblock and its summary information. 47076269Smckusick * We delay writing it until the suspension is released below. 47176269Smckusick */ 472225851Smckusick copy_fs = malloc((u_long)fs->fs_bsize, M_UFSMNT, M_WAITOK); 47376269Smckusick bcopy(fs, copy_fs, fs->fs_sbsize); 47476269Smckusick if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) 47576269Smckusick copy_fs->fs_clean = 1; 476111972Smckusick size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE; 477111972Smckusick if (fs->fs_sbsize < size) 478225851Smckusick bzero(&((char *)copy_fs)[fs->fs_sbsize], 479225851Smckusick size - fs->fs_sbsize); 48076269Smckusick size = blkroundup(fs, fs->fs_cssize); 48176269Smckusick if (fs->fs_contigsumsize > 0) 48276269Smckusick size += fs->fs_ncg * sizeof(int32_t); 483111119Simp space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 48476269Smckusick copy_fs->fs_csp = space; 48576269Smckusick bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize); 486130246Sstefanf space = (char *)space + fs->fs_cssize; 48776269Smckusick loc = howmany(fs->fs_cssize, fs->fs_fsize); 48876356Smckusick i = fs->fs_frag - loc % fs->fs_frag; 48976356Smckusick len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize; 49076356Smckusick if (len > 0) { 491107414Smckusick if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc), 49276269Smckusick len, KERNCRED, &bp)) != 0) { 49390098Smckusick brelse(bp); 49476269Smckusick free(copy_fs->fs_csp, M_UFSMNT); 495225851Smckusick free(copy_fs, M_UFSMNT); 496225851Smckusick copy_fs = NULL; 49776269Smckusick goto out1; 49862976Smckusick } 49976269Smckusick bcopy(bp->b_data, space, (u_int)len); 500130246Sstefanf space = (char *)space + len; 50176269Smckusick bp->b_flags |= B_INVAL | B_NOCACHE; 50276269Smckusick brelse(bp); 50362976Smckusick } 50476269Smckusick if (fs->fs_contigsumsize > 0) { 50576269Smckusick copy_fs->fs_maxcluster = lp = space; 50676269Smckusick for (i = 0; i < fs->fs_ncg; i++) 50776269Smckusick *lp++ = fs->fs_contigsumsize; 50876269Smckusick } 50962976Smckusick /* 51090098Smckusick * We must check for active files that have been unlinked 51190098Smckusick * (e.g., with a zero link count). We have to expunge all 51290098Smckusick * trace of these files from the snapshot so that they are 51390098Smckusick * not reclaimed prematurely by fsck or unnecessarily dumped. 51490098Smckusick * We turn off the MNTK_SUSPENDED flag to avoid a panic from 51590098Smckusick * spec_strategy about writing on a suspended filesystem. 516104698Smckusick * Note that we skip unlinked snapshot files as they will 517104698Smckusick * be handled separately below. 518111240Smckusick * 519111240Smckusick * We also calculate the needed size for the snapshot list. 52090098Smckusick */ 521111240Smckusick snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) + 522111240Smckusick FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */; 523140706Sjeff MNT_ILOCK(mp); 52490098Smckusick mp->mnt_kern_flag &= ~MNTK_SUSPENDED; 525235626Smckusick MNT_IUNLOCK(mp); 52690098Smckusickloop: 527235626Smckusick MNT_VNODE_FOREACH_ALL(xvp, mp, mvp) { 528235626Smckusick if ((xvp->v_usecount == 0 && 529156560Stegge (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) || 530156560Stegge xvp->v_type == VNON || 531233630Smckusick IS_SNAPSHOT(VTOI(xvp))) { 532120740Sjeff VI_UNLOCK(xvp); 53390098Smckusick continue; 53490098Smckusick } 535130690Skuriyama /* 536130690Skuriyama * We can skip parent directory vnode because it must have 537130690Skuriyama * this snapshot file in it. 538130690Skuriyama */ 539130690Skuriyama if (xvp == nd.ni_dvp) { 540130690Skuriyama VI_UNLOCK(xvp); 541130690Skuriyama continue; 542130690Skuriyama } 543156560Stegge vholdl(xvp); 544175202Sattilio if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK) != 0) { 545235626Smckusick MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 546156560Stegge vdrop(xvp); 54790098Smckusick goto loop; 548120740Sjeff } 549156560Stegge VI_LOCK(xvp); 550156560Stegge if (xvp->v_usecount == 0 && 551156560Stegge (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) { 552156560Stegge VI_UNLOCK(xvp); 553175294Sattilio VOP_UNLOCK(xvp, 0); 554156560Stegge vdrop(xvp); 555156560Stegge continue; 556156560Stegge } 557156560Stegge VI_UNLOCK(xvp); 558124119Skan if (snapdebug) 559124119Skan vprint("ffs_snapshot: busy vnode", xvp); 560182371Sattilio if (VOP_GETATTR(xvp, &vat, td->td_ucred) == 0 && 561120740Sjeff vat.va_nlink > 0) { 562175294Sattilio VOP_UNLOCK(xvp, 0); 563156560Stegge vdrop(xvp); 564120740Sjeff continue; 565120740Sjeff } 56690098Smckusick xp = VTOI(xvp); 567111239Smckusick if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) { 568175294Sattilio VOP_UNLOCK(xvp, 0); 569156560Stegge vdrop(xvp); 570111239Smckusick continue; 571111239Smckusick } 57290098Smckusick /* 57390098Smckusick * If there is a fragment, clear it here. 57490098Smckusick */ 57590098Smckusick blkno = 0; 57690098Smckusick loc = howmany(xp->i_size, fs->fs_bsize) - 1; 57790098Smckusick if (loc < NDADDR) { 57890098Smckusick len = fragroundup(fs, blkoff(fs, xp->i_size)); 579142074Sdelphij if (len != 0 && len < fs->fs_bsize) { 580140706Sjeff ffs_blkfree(ump, copy_fs, vp, 581207141Sjeff DIP(xp, i_db[loc]), len, xp->i_number, 582223127Smckusick xvp->v_type, NULL); 58398542Smckusick blkno = DIP(xp, i_db[loc]); 584132775Skan DIP_SET(xp, i_db[loc], 0); 58590098Smckusick } 58690098Smckusick } 587111240Smckusick snaplistsize += 1; 58898542Smckusick if (xp->i_ump->um_fstype == UFS1) 58998542Smckusick error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, 590207141Sjeff BLK_NOCOPY, 1); 59198542Smckusick else 59298542Smckusick error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, 593207141Sjeff BLK_NOCOPY, 1); 59490098Smckusick if (blkno) 595132775Skan DIP_SET(xp, i_db[loc], blkno); 59690098Smckusick if (!error) 597140706Sjeff error = ffs_freefile(ump, copy_fs, vp, xp->i_number, 598207141Sjeff xp->i_mode, NULL); 599175294Sattilio VOP_UNLOCK(xvp, 0); 600156560Stegge vdrop(xvp); 60190098Smckusick if (error) { 60290098Smckusick free(copy_fs->fs_csp, M_UFSMNT); 603225851Smckusick free(copy_fs, M_UFSMNT); 604225851Smckusick copy_fs = NULL; 605235626Smckusick MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 60690098Smckusick goto out1; 60790098Smckusick } 60890098Smckusick } 60990098Smckusick /* 610207141Sjeff * Erase the journal file from the snapshot. 611207141Sjeff */ 612207141Sjeff if (fs->fs_flags & FS_SUJ) { 613207141Sjeff error = softdep_journal_lookup(mp, &xvp); 614207141Sjeff if (error) { 615207141Sjeff free(copy_fs->fs_csp, M_UFSMNT); 616225851Smckusick free(copy_fs, M_UFSMNT); 617225851Smckusick copy_fs = NULL; 618207141Sjeff goto out1; 619207141Sjeff } 620207141Sjeff xp = VTOI(xvp); 621207141Sjeff if (xp->i_ump->um_fstype == UFS1) 622207141Sjeff error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, 623207141Sjeff BLK_NOCOPY, 0); 624207141Sjeff else 625207141Sjeff error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, 626207141Sjeff BLK_NOCOPY, 0); 627207141Sjeff vput(xvp); 628207141Sjeff } 629207141Sjeff /* 630177778Sjeff * Acquire a lock on the snapdata structure, creating it if necessary. 631105191Smckusick */ 632177778Sjeff sn = ffs_snapdata_acquire(devvp); 633177778Sjeff /* 634177778Sjeff * Change vnode to use shared snapshot lock instead of the original 635177778Sjeff * private lock. 636177778Sjeff */ 637177778Sjeff vp->v_vnlock = &sn->sn_lock; 638175635Sattilio lockmgr(&vp->v_lock, LK_RELEASE, NULL); 639177778Sjeff xp = TAILQ_FIRST(&sn->sn_head); 640105191Smckusick /* 641111240Smckusick * If this is the first snapshot on this filesystem, then we need 642111240Smckusick * to allocate the space for the list of preallocated snapshot blocks. 643111240Smckusick * This list will be refined below, but this preliminary one will 644111240Smckusick * keep us out of deadlock until the full one is ready. 645111240Smckusick */ 646111240Smckusick if (xp == NULL) { 647184205Sdes snapblklist = malloc(snaplistsize * sizeof(daddr_t), 648111240Smckusick M_UFSMNT, M_WAITOK); 649111240Smckusick blkp = &snapblklist[1]; 650111240Smckusick *blkp++ = lblkno(fs, fs->fs_sblockloc); 651111240Smckusick blkno = fragstoblks(fs, fs->fs_csaddr); 652111240Smckusick for (cg = 0; cg < fs->fs_ncg; cg++) { 653111240Smckusick if (fragstoblks(fs, cgtod(fs, cg) > blkno)) 654111240Smckusick break; 655111240Smckusick *blkp++ = fragstoblks(fs, cgtod(fs, cg)); 656111240Smckusick } 657111240Smckusick len = howmany(fs->fs_cssize, fs->fs_bsize); 658111240Smckusick for (loc = 0; loc < len; loc++) 659111240Smckusick *blkp++ = blkno + loc; 660111240Smckusick for (; cg < fs->fs_ncg; cg++) 661111240Smckusick *blkp++ = fragstoblks(fs, cgtod(fs, cg)); 662111240Smckusick snapblklist[0] = blkp - snapblklist; 663111240Smckusick VI_LOCK(devvp); 664135138Sphk if (sn->sn_blklist != NULL) 665111240Smckusick panic("ffs_snapshot: non-empty list"); 666135138Sphk sn->sn_blklist = snapblklist; 667135138Sphk sn->sn_listsize = blkp - snapblklist; 668111240Smckusick VI_UNLOCK(devvp); 669111240Smckusick } 670111240Smckusick /* 67162976Smckusick * Record snapshot inode. Since this is the newest snapshot, 67262976Smckusick * it must be placed at the end of the list. 67362976Smckusick */ 674107414Smckusick VI_LOCK(devvp); 67562976Smckusick fs->fs_snapinum[snaploc] = ip->i_number; 67673942Smckusick if (ip->i_nextsnap.tqe_prev != 0) 67762976Smckusick panic("ffs_snapshot: %d already on list", ip->i_number); 678135138Sphk TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap); 679107414Smckusick devvp->v_vflag |= VV_COPYONWRITE; 680107414Smckusick VI_UNLOCK(devvp); 681101308Sjeff ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp"); 68287827Smckusickout1: 683225851Smckusick KASSERT((sn != NULL && copy_fs != NULL && error == 0) || 684225851Smckusick (sn == NULL && copy_fs == NULL && error != 0), 685158632Stegge ("email phk@ and mckusick@")); 68662976Smckusick /* 68762976Smckusick * Resume operation on filesystem. 68862976Smckusick */ 689245283Skib vfs_write_resume_flags(vp->v_mount, VR_START_WRITE | VR_NO_SUSPCLR); 69087827Smckusick if (collectsnapstats && starttime.tv_sec > 0) { 69187827Smckusick nanotime(&endtime); 69287827Smckusick timespecsub(&endtime, &starttime); 693106965Speter printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n", 694106965Speter vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec, 69587827Smckusick endtime.tv_nsec / 1000000, redo, fs->fs_ncg); 69687827Smckusick } 697225851Smckusick if (copy_fs == NULL) 69890098Smckusick goto out; 69990098Smckusick /* 70090098Smckusick * Copy allocation information from all the snapshots in 70190098Smckusick * this snapshot and then expunge them from its view. 70290098Smckusick */ 703135138Sphk TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) { 70490098Smckusick if (xp == ip) 70590098Smckusick break; 70698542Smckusick if (xp->i_ump->um_fstype == UFS1) 70798542Smckusick error = expunge_ufs1(vp, xp, fs, snapacct_ufs1, 708207141Sjeff BLK_SNAP, 0); 70998542Smckusick else 71098542Smckusick error = expunge_ufs2(vp, xp, fs, snapacct_ufs2, 711207141Sjeff BLK_SNAP, 0); 712158527Stegge if (error == 0 && xp->i_effnlink == 0) { 713158527Stegge error = ffs_freefile(ump, 714158527Stegge copy_fs, 715158527Stegge vp, 716158527Stegge xp->i_number, 717207141Sjeff xp->i_mode, NULL); 718158527Stegge } 71998542Smckusick if (error) { 72090098Smckusick fs->fs_snapinum[snaploc] = 0; 72190098Smckusick goto done; 72287827Smckusick } 72390098Smckusick } 72490098Smckusick /* 725111240Smckusick * Allocate space for the full list of preallocated snapshot blocks. 726104698Smckusick */ 727184205Sdes snapblklist = malloc(snaplistsize * sizeof(daddr_t), 728111119Simp M_UFSMNT, M_WAITOK); 729107915Smckusick ip->i_snapblklist = &snapblklist[1]; 730104698Smckusick /* 73190098Smckusick * Expunge the blocks used by the snapshots from the set of 732104698Smckusick * blocks marked as used in the snapshot bitmaps. Also, collect 733107915Smckusick * the list of allocated blocks in i_snapblklist. 73490098Smckusick */ 73598542Smckusick if (ip->i_ump->um_fstype == UFS1) 736207141Sjeff error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, 737207141Sjeff BLK_SNAP, 0); 73898542Smckusick else 739207141Sjeff error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, 740207141Sjeff BLK_SNAP, 0); 74198542Smckusick if (error) { 74290098Smckusick fs->fs_snapinum[snaploc] = 0; 743184205Sdes free(snapblklist, M_UFSMNT); 74490098Smckusick goto done; 74590098Smckusick } 746111240Smckusick if (snaplistsize < ip->i_snapblklist - snapblklist) 747111240Smckusick panic("ffs_snapshot: list too small"); 748107915Smckusick snaplistsize = ip->i_snapblklist - snapblklist; 749107848Smckusick snapblklist[0] = snaplistsize; 750107915Smckusick ip->i_snapblklist = 0; 75190098Smckusick /* 752104698Smckusick * Write out the list of allocated blocks to the end of the snapshot. 753104698Smckusick */ 754104698Smckusick auio.uio_iov = &aiov; 755104698Smckusick auio.uio_iovcnt = 1; 756107848Smckusick aiov.iov_base = (void *)snapblklist; 757107848Smckusick aiov.iov_len = snaplistsize * sizeof(daddr_t); 758201758Smbr auio.uio_resid = aiov.iov_len; 759104698Smckusick auio.uio_offset = ip->i_size; 760104698Smckusick auio.uio_segflg = UIO_SYSSPACE; 761104698Smckusick auio.uio_rw = UIO_WRITE; 762104698Smckusick auio.uio_td = td; 763104698Smckusick if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 764104698Smckusick fs->fs_snapinum[snaploc] = 0; 765184205Sdes free(snapblklist, M_UFSMNT); 766104698Smckusick goto done; 767104698Smckusick } 768104698Smckusick /* 76990098Smckusick * Write the superblock and its summary information 77090098Smckusick * to the snapshot. 77190098Smckusick */ 77290098Smckusick blkno = fragstoblks(fs, fs->fs_csaddr); 77390098Smckusick len = howmany(fs->fs_cssize, fs->fs_bsize); 77490098Smckusick space = copy_fs->fs_csp; 77590098Smckusick for (loc = 0; loc < len; loc++) { 77690098Smckusick error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp); 77790098Smckusick if (error) { 77890098Smckusick brelse(nbp); 77990098Smckusick fs->fs_snapinum[snaploc] = 0; 780184205Sdes free(snapblklist, M_UFSMNT); 78190098Smckusick goto done; 78276269Smckusick } 78390098Smckusick bcopy(space, nbp->b_data, fs->fs_bsize); 78490098Smckusick space = (char *)space + fs->fs_bsize; 78590098Smckusick bawrite(nbp); 78676269Smckusick } 787225851Smckusick error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize, 788225851Smckusick KERNCRED, &nbp); 789225851Smckusick if (error) { 790225851Smckusick brelse(nbp); 791225851Smckusick } else { 792225851Smckusick loc = blkoff(fs, fs->fs_sblockloc); 793253465Skib bcopy((char *)copy_fs, &nbp->b_data[loc], (u_int)fs->fs_sbsize); 794225851Smckusick bawrite(nbp); 795225851Smckusick } 796107848Smckusick /* 797107848Smckusick * As this is the newest list, it is the most inclusive, so 798107848Smckusick * should replace the previous list. 799107848Smckusick */ 800107848Smckusick VI_LOCK(devvp); 801135138Sphk space = sn->sn_blklist; 802135138Sphk sn->sn_blklist = snapblklist; 803135138Sphk sn->sn_listsize = snaplistsize; 804122596Salc VI_UNLOCK(devvp); 805111240Smckusick if (space != NULL) 806184205Sdes free(space, M_UFSMNT); 807151180Stegge /* 808233630Smckusick * Preallocate all the direct blocks in the snapshot inode so 809233630Smckusick * that we never have to write the inode itself to commit an 810233630Smckusick * update to the contents of the snapshot. Note that once 811233630Smckusick * created, the size of the snapshot will never change, so 812233630Smckusick * there will never be a need to write the inode except to 813233630Smckusick * update the non-integrity-critical time fields and 814233630Smckusick * allocated-block count. 815151180Stegge */ 816233630Smckusick for (blockno = 0; blockno < NDADDR; blockno++) { 817233630Smckusick if (DIP(ip, i_db[blockno]) != 0) 818233630Smckusick continue; 819233630Smckusick error = UFS_BALLOC(vp, lblktosize(fs, blockno), 820233630Smckusick fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 821233630Smckusick if (error) 822233630Smckusick break; 823233630Smckusick error = readblock(vp, bp, blockno); 824233630Smckusick bawrite(bp); 825233630Smckusick if (error != 0) 826233630Smckusick break; 827233630Smckusick } 82890098Smckusickdone: 829184205Sdes free(copy_fs->fs_csp, M_UFSMNT); 830225851Smckusick free(copy_fs, M_UFSMNT); 831225851Smckusick copy_fs = NULL; 83262976Smckusickout: 833168576Skib NDFREE(&nd, NDF_ONLY_PNBUF); 834113872Sjhb if (saved_nice > 0) { 835170307Sjeff struct proc *p; 836170307Sjeff 837170307Sjeff p = td->td_proc; 838170307Sjeff PROC_LOCK(p); 839130551Sjulian sched_nice(td->td_proc, saved_nice); 840113872Sjhb PROC_UNLOCK(td->td_proc); 841113872Sjhb } 842140706Sjeff UFS_LOCK(ump); 84387827Smckusick if (fs->fs_active != 0) { 844184205Sdes free(fs->fs_active, M_DEVBUF); 84587827Smckusick fs->fs_active = 0; 84687827Smckusick } 847140706Sjeff UFS_UNLOCK(ump); 848162647Stegge MNT_ILOCK(mp); 849162652Stegge mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA); 850162647Stegge MNT_IUNLOCK(mp); 85176269Smckusick if (error) 852141526Sphk (void) ffs_truncate(vp, (off_t)0, 0, NOCRED, td); 853233630Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, 0); 85462976Smckusick if (error) 85562976Smckusick vput(vp); 85662976Smckusick else 857175294Sattilio VOP_UNLOCK(vp, 0); 858156895Stegge vrele(nd.ni_dvp); 85962976Smckusick vn_finished_write(wrtmp); 860156560Stegge process_deferred_inactive(mp); 86162976Smckusick return (error); 86262976Smckusick} 86362976Smckusick 86462976Smckusick/* 86587827Smckusick * Copy a cylinder group map. All the unallocated blocks are marked 86687827Smckusick * BLK_NOCOPY so that the snapshot knows that it need not copy them 86792363Smckusick * if they are later written. If passno is one, then this is a first 86892363Smckusick * pass, so only setting needs to be done. If passno is 2, then this 86987827Smckusick * is a revision to a previous pass which must be undone as the 87087827Smckusick * replacement pass is done. 87187827Smckusick */ 87287827Smckusickstatic int 87387827Smckusickcgaccount(cg, vp, nbp, passno) 87487827Smckusick int cg; 87587827Smckusick struct vnode *vp; 87687827Smckusick struct buf *nbp; 87787827Smckusick int passno; 87887827Smckusick{ 87987827Smckusick struct buf *bp, *ibp; 88087827Smckusick struct inode *ip; 88187827Smckusick struct cg *cgp; 88287827Smckusick struct fs *fs; 88398542Smckusick ufs2_daddr_t base, numblks; 88498542Smckusick int error, len, loc, indiroff; 88587827Smckusick 88687827Smckusick ip = VTOI(vp); 88787827Smckusick fs = ip->i_fs; 88887827Smckusick error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 88987827Smckusick (int)fs->fs_cgsize, KERNCRED, &bp); 89087827Smckusick if (error) { 89187827Smckusick brelse(bp); 89287827Smckusick return (error); 89387827Smckusick } 89487827Smckusick cgp = (struct cg *)bp->b_data; 89587827Smckusick if (!cg_chkmagic(cgp)) { 89687827Smckusick brelse(bp); 89787827Smckusick return (EIO); 89887827Smckusick } 899140706Sjeff UFS_LOCK(ip->i_ump); 900142879Sjeff ACTIVESET(fs, cg); 901183822Skib /* 902183822Skib * Recomputation of summary information might not have been performed 903183822Skib * at mount time. Sync up summary information for current cylinder 904183822Skib * group while data is in memory to ensure that result of background 905183822Skib * fsck is slightly more consistent. 906183822Skib */ 907183822Skib fs->fs_cs(fs, cg) = cgp->cg_cs; 908140706Sjeff UFS_UNLOCK(ip->i_ump); 90987827Smckusick bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize); 91087827Smckusick if (fs->fs_cgsize < fs->fs_bsize) 91187827Smckusick bzero(&nbp->b_data[fs->fs_cgsize], 91287827Smckusick fs->fs_bsize - fs->fs_cgsize); 913151178Stegge cgp = (struct cg *)nbp->b_data; 914151178Stegge bqrelse(bp); 91587827Smckusick if (passno == 2) 91687827Smckusick nbp->b_flags |= B_VALIDSUSPWRT; 91787827Smckusick numblks = howmany(fs->fs_size, fs->fs_frag); 91887827Smckusick len = howmany(fs->fs_fpg, fs->fs_frag); 919138634Smckusick base = cgbase(fs, cg) / fs->fs_frag; 92087827Smckusick if (base + len >= numblks) 92187827Smckusick len = numblks - base - 1; 92287827Smckusick loc = 0; 92387827Smckusick if (base < NDADDR) { 92487827Smckusick for ( ; loc < NDADDR; loc++) { 92587827Smckusick if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 926132775Skan DIP_SET(ip, i_db[loc], BLK_NOCOPY); 92798542Smckusick else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY) 928132775Skan DIP_SET(ip, i_db[loc], 0); 92998542Smckusick else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY) 93087827Smckusick panic("ffs_snapshot: lost direct block"); 93187827Smckusick } 93287827Smckusick } 93387827Smckusick error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), 93498658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 93587827Smckusick if (error) { 93687827Smckusick return (error); 93787827Smckusick } 93887827Smckusick indiroff = (base + loc - NDADDR) % NINDIR(fs); 93987827Smckusick for ( ; loc < len; loc++, indiroff++) { 94087827Smckusick if (indiroff >= NINDIR(fs)) { 94187827Smckusick if (passno == 2) 94287827Smckusick ibp->b_flags |= B_VALIDSUSPWRT; 94387827Smckusick bawrite(ibp); 94487827Smckusick error = UFS_BALLOC(vp, 94587827Smckusick lblktosize(fs, (off_t)(base + loc)), 94698658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 94787827Smckusick if (error) { 94887827Smckusick return (error); 94987827Smckusick } 95087827Smckusick indiroff = 0; 95187827Smckusick } 95298542Smckusick if (ip->i_ump->um_fstype == UFS1) { 95398542Smckusick if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 95498542Smckusick ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 95598542Smckusick BLK_NOCOPY; 95698542Smckusick else if (passno == 2 && ((ufs1_daddr_t *)(ibp->b_data)) 95798542Smckusick [indiroff] == BLK_NOCOPY) 95898542Smckusick ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 0; 95998542Smckusick else if (passno == 1 && ((ufs1_daddr_t *)(ibp->b_data)) 96098542Smckusick [indiroff] == BLK_NOCOPY) 96198542Smckusick panic("ffs_snapshot: lost indirect block"); 96298542Smckusick continue; 96398542Smckusick } 96487827Smckusick if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 96598542Smckusick ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY; 96687827Smckusick else if (passno == 2 && 96798542Smckusick ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 96898542Smckusick ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 0; 96987827Smckusick else if (passno == 1 && 97098542Smckusick ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 97187827Smckusick panic("ffs_snapshot: lost indirect block"); 97287827Smckusick } 97387827Smckusick if (passno == 2) 97487827Smckusick ibp->b_flags |= B_VALIDSUSPWRT; 97587827Smckusick bdwrite(ibp); 97687827Smckusick return (0); 97787827Smckusick} 97887827Smckusick 97987827Smckusick/* 98076269Smckusick * Before expunging a snapshot inode, note all the 98176269Smckusick * blocks that it claims with BLK_SNAP so that fsck will 98276269Smckusick * be able to account for those blocks properly and so 98376269Smckusick * that this snapshot knows that it need not copy them 98498542Smckusick * if the other snapshot holding them is freed. This code 98598542Smckusick * is reproduced once each for UFS1 and UFS2. 98676269Smckusick */ 98776269Smckusickstatic int 988207141Sjeffexpunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype, clearmode) 98990098Smckusick struct vnode *snapvp; 99090098Smckusick struct inode *cancelip; 99176269Smckusick struct fs *fs; 99298542Smckusick int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 99398542Smckusick struct fs *, ufs_lbn_t, int); 99490098Smckusick int expungetype; 995207141Sjeff int clearmode; 99676269Smckusick{ 99798542Smckusick int i, error, indiroff; 99898542Smckusick ufs_lbn_t lbn, rlbn; 99998542Smckusick ufs2_daddr_t len, blkno, numblks, blksperindir; 100098542Smckusick struct ufs1_dinode *dip; 100190098Smckusick struct thread *td = curthread; 100276269Smckusick struct buf *bp; 100376269Smckusick 100476269Smckusick /* 100590098Smckusick * Prepare to expunge the inode. If its inode block has not 100690098Smckusick * yet been copied, then allocate and fill the copy. 100776269Smckusick */ 100890098Smckusick lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 100990098Smckusick blkno = 0; 101090098Smckusick if (lbn < NDADDR) { 1011107558Smckusick blkno = VTOI(snapvp)->i_din1->di_db[lbn]; 101290098Smckusick } else { 1013207742Sjeff if (DOINGSOFTDEP(snapvp)) 1014207742Sjeff softdep_prealloc(snapvp, MNT_WAIT); 1015121443Sjhb td->td_pflags |= TDP_COWINPROGRESS; 1016141526Sphk error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn), 101798658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 1018121443Sjhb td->td_pflags &= ~TDP_COWINPROGRESS; 101990098Smckusick if (error) 102090098Smckusick return (error); 102190098Smckusick indiroff = (lbn - NDADDR) % NINDIR(fs); 102298542Smckusick blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; 102390098Smckusick bqrelse(bp); 102490098Smckusick } 1025107558Smckusick if (blkno != 0) { 1026107558Smckusick if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 1027107558Smckusick return (error); 1028107558Smckusick } else { 1029141526Sphk error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn), 1030107558Smckusick fs->fs_bsize, KERNCRED, 0, &bp); 1031107558Smckusick if (error) 1032107558Smckusick return (error); 1033135138Sphk if ((error = readblock(snapvp, bp, lbn)) != 0) 1034107558Smckusick return (error); 1035107558Smckusick } 103690098Smckusick /* 103790098Smckusick * Set a snapshot inode to be a zero length file, regular files 1038158527Stegge * or unlinked snapshots to be completely unallocated. 103990098Smckusick */ 104098542Smckusick dip = (struct ufs1_dinode *)bp->b_data + 104198542Smckusick ino_to_fsbo(fs, cancelip->i_number); 1042207141Sjeff if (clearmode || cancelip->i_effnlink == 0) 104390098Smckusick dip->di_mode = 0; 104476269Smckusick dip->di_size = 0; 104576269Smckusick dip->di_blocks = 0; 104676269Smckusick dip->di_flags &= ~SF_SNAPSHOT; 104798542Smckusick bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t)); 104876269Smckusick bdwrite(bp); 1049107848Smckusick /* 1050107848Smckusick * Now go through and expunge all the blocks in the file 1051107848Smckusick * using the function requested. 1052107848Smckusick */ 1053107848Smckusick numblks = howmany(cancelip->i_size, fs->fs_bsize); 1054107848Smckusick if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0], 1055107848Smckusick &cancelip->i_din1->di_db[NDADDR], fs, 0, expungetype))) 1056107848Smckusick return (error); 1057107848Smckusick if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_ib[0], 1058107848Smckusick &cancelip->i_din1->di_ib[NIADDR], fs, -1, expungetype))) 1059107848Smckusick return (error); 1060107848Smckusick blksperindir = 1; 1061107848Smckusick lbn = -NDADDR; 1062107848Smckusick len = numblks - NDADDR; 1063107848Smckusick rlbn = NDADDR; 1064107848Smckusick for (i = 0; len > 0 && i < NIADDR; i++) { 1065107848Smckusick error = indiracct_ufs1(snapvp, ITOV(cancelip), i, 1066107848Smckusick cancelip->i_din1->di_ib[i], lbn, rlbn, len, 1067107848Smckusick blksperindir, fs, acctfunc, expungetype); 1068107848Smckusick if (error) 1069107848Smckusick return (error); 1070107848Smckusick blksperindir *= NINDIR(fs); 1071107848Smckusick lbn -= blksperindir + 1; 1072107848Smckusick len -= blksperindir; 1073107848Smckusick rlbn += blksperindir; 1074107848Smckusick } 107576269Smckusick return (0); 107676269Smckusick} 107776269Smckusick 107876269Smckusick/* 107962976Smckusick * Descend an indirect block chain for vnode cancelvp accounting for all 108062976Smckusick * its indirect blocks in snapvp. 108162976Smckusick */ 108262976Smckusickstatic int 108398542Smckusickindiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 108498542Smckusick blksperindir, fs, acctfunc, expungetype) 108562976Smckusick struct vnode *snapvp; 108662976Smckusick struct vnode *cancelvp; 108762976Smckusick int level; 108898542Smckusick ufs1_daddr_t blkno; 108998542Smckusick ufs_lbn_t lbn; 109098542Smckusick ufs_lbn_t rlbn; 109198542Smckusick ufs_lbn_t remblks; 109298542Smckusick ufs_lbn_t blksperindir; 109376269Smckusick struct fs *fs; 109498542Smckusick int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 109598542Smckusick struct fs *, ufs_lbn_t, int); 109690098Smckusick int expungetype; 109762976Smckusick{ 109898542Smckusick int error, num, i; 109998542Smckusick ufs_lbn_t subblksperindir; 110062976Smckusick struct indir indirs[NIADDR + 2]; 110198542Smckusick ufs1_daddr_t last, *bap; 110262976Smckusick struct buf *bp; 110362976Smckusick 1104121158Smckusick if (blkno == 0) { 1105121158Smckusick if (expungetype == BLK_NOCOPY) 1106121158Smckusick return (0); 1107121158Smckusick panic("indiracct_ufs1: missing indir"); 1108121158Smckusick } 110962976Smckusick if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 111062976Smckusick return (error); 1111121158Smckusick if (lbn != indirs[num - 1 - level].in_lbn || num < 2) 1112121158Smckusick panic("indiracct_ufs1: botched params"); 111362976Smckusick /* 111462976Smckusick * We have to expand bread here since it will deadlock looking 111562976Smckusick * up the block number for any blocks that are not in the cache. 111662976Smckusick */ 1117111856Sjeff bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 111862976Smckusick bp->b_blkno = fsbtodb(fs, blkno); 111962976Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1120135138Sphk (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) { 112162976Smckusick brelse(bp); 112262976Smckusick return (error); 112362976Smckusick } 112462976Smckusick /* 112562976Smckusick * Account for the block pointers in this indirect block. 112662976Smckusick */ 112762976Smckusick last = howmany(remblks, blksperindir); 112862976Smckusick if (last > NINDIR(fs)) 112962976Smckusick last = NINDIR(fs); 1130184205Sdes bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK); 113176269Smckusick bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 113276269Smckusick bqrelse(bp); 1133107848Smckusick error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, 1134107848Smckusick level == 0 ? rlbn : -1, expungetype); 113562976Smckusick if (error || level == 0) 113662976Smckusick goto out; 113762976Smckusick /* 113862976Smckusick * Account for the block pointers in each of the indirect blocks 113962976Smckusick * in the levels below us. 114062976Smckusick */ 114162976Smckusick subblksperindir = blksperindir / NINDIR(fs); 114262976Smckusick for (lbn++, level--, i = 0; i < last; i++) { 114398542Smckusick error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn, 114490098Smckusick rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 114562976Smckusick if (error) 114662976Smckusick goto out; 114762976Smckusick rlbn += blksperindir; 114862976Smckusick lbn -= blksperindir; 114962976Smckusick remblks -= blksperindir; 115062976Smckusick } 115162976Smckusickout: 1152184205Sdes free(bap, M_DEVBUF); 115362976Smckusick return (error); 115462976Smckusick} 115562976Smckusick 115662976Smckusick/* 115790098Smckusick * Do both snap accounting and map accounting. 115890098Smckusick */ 115990098Smckusickstatic int 116098542Smckusickfullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype) 116190098Smckusick struct vnode *vp; 116298542Smckusick ufs1_daddr_t *oldblkp, *lastblkp; 116390098Smckusick struct fs *fs; 116498542Smckusick ufs_lbn_t lblkno; 116598542Smckusick int exptype; /* BLK_SNAP or BLK_NOCOPY */ 116698542Smckusick{ 116798542Smckusick int error; 116898542Smckusick 116998542Smckusick if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 117098542Smckusick return (error); 117198542Smckusick return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 117298542Smckusick} 117398542Smckusick 117498542Smckusick/* 117598542Smckusick * Identify a set of blocks allocated in a snapshot inode. 117698542Smckusick */ 117798542Smckusickstatic int 117898542Smckusicksnapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 117998542Smckusick struct vnode *vp; 118098542Smckusick ufs1_daddr_t *oldblkp, *lastblkp; 118198542Smckusick struct fs *fs; 118298542Smckusick ufs_lbn_t lblkno; 118390098Smckusick int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 118490098Smckusick{ 118598542Smckusick struct inode *ip = VTOI(vp); 118698542Smckusick ufs1_daddr_t blkno, *blkp; 118798542Smckusick ufs_lbn_t lbn; 118898542Smckusick struct buf *ibp; 118990098Smckusick int error; 119090098Smckusick 119198542Smckusick for ( ; oldblkp < lastblkp; oldblkp++) { 119298542Smckusick blkno = *oldblkp; 119398542Smckusick if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 119498542Smckusick continue; 119598542Smckusick lbn = fragstoblks(fs, blkno); 119698542Smckusick if (lbn < NDADDR) { 119798542Smckusick blkp = &ip->i_din1->di_db[lbn]; 119898542Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 119998542Smckusick } else { 1200141526Sphk error = ffs_balloc_ufs1(vp, lblktosize(fs, (off_t)lbn), 120198658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 120298542Smckusick if (error) 120398542Smckusick return (error); 120498542Smckusick blkp = &((ufs1_daddr_t *)(ibp->b_data)) 120598542Smckusick [(lbn - NDADDR) % NINDIR(fs)]; 120698542Smckusick } 120798542Smckusick /* 120898542Smckusick * If we are expunging a snapshot vnode and we 120998542Smckusick * find a block marked BLK_NOCOPY, then it is 121098542Smckusick * one that has been allocated to this snapshot after 121198542Smckusick * we took our current snapshot and can be ignored. 121298542Smckusick */ 121398542Smckusick if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 121498542Smckusick if (lbn >= NDADDR) 121598542Smckusick brelse(ibp); 121698542Smckusick } else { 121798542Smckusick if (*blkp != 0) 1218121158Smckusick panic("snapacct_ufs1: bad block"); 121998542Smckusick *blkp = expungetype; 122098542Smckusick if (lbn >= NDADDR) 122198542Smckusick bdwrite(ibp); 122298542Smckusick } 122398542Smckusick } 122498542Smckusick return (0); 122598542Smckusick} 122698542Smckusick 122798542Smckusick/* 122898542Smckusick * Account for a set of blocks allocated in a snapshot inode. 122998542Smckusick */ 123098542Smckusickstatic int 123198542Smckusickmapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 123298542Smckusick struct vnode *vp; 123398542Smckusick ufs1_daddr_t *oldblkp, *lastblkp; 123498542Smckusick struct fs *fs; 123598542Smckusick ufs_lbn_t lblkno; 123698542Smckusick int expungetype; 123798542Smckusick{ 123898542Smckusick ufs1_daddr_t blkno; 1239104698Smckusick struct inode *ip; 124098542Smckusick ino_t inum; 1241108050Smckusick int acctit; 124298542Smckusick 1243104698Smckusick ip = VTOI(vp); 1244104698Smckusick inum = ip->i_number; 1245108050Smckusick if (lblkno == -1) 1246108050Smckusick acctit = 0; 1247108050Smckusick else 1248108050Smckusick acctit = 1; 124998542Smckusick for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 125098542Smckusick blkno = *oldblkp; 125198542Smckusick if (blkno == 0 || blkno == BLK_NOCOPY) 125298542Smckusick continue; 1253108050Smckusick if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1254107915Smckusick *ip->i_snapblklist++ = lblkno; 125598542Smckusick if (blkno == BLK_SNAP) 125698542Smckusick blkno = blkstofrags(fs, lblkno); 1257223127Smckusick ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, 1258223127Smckusick vp->v_type, NULL); 125998542Smckusick } 126098542Smckusick return (0); 126198542Smckusick} 126298542Smckusick 126398542Smckusick/* 126498542Smckusick * Before expunging a snapshot inode, note all the 126598542Smckusick * blocks that it claims with BLK_SNAP so that fsck will 126698542Smckusick * be able to account for those blocks properly and so 126798542Smckusick * that this snapshot knows that it need not copy them 126898542Smckusick * if the other snapshot holding them is freed. This code 126998542Smckusick * is reproduced once each for UFS1 and UFS2. 127098542Smckusick */ 127198542Smckusickstatic int 1272207141Sjeffexpunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode) 127398542Smckusick struct vnode *snapvp; 127498542Smckusick struct inode *cancelip; 127598542Smckusick struct fs *fs; 127698542Smckusick int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 127798542Smckusick struct fs *, ufs_lbn_t, int); 127898542Smckusick int expungetype; 1279207141Sjeff int clearmode; 128098542Smckusick{ 128198542Smckusick int i, error, indiroff; 128298542Smckusick ufs_lbn_t lbn, rlbn; 128398542Smckusick ufs2_daddr_t len, blkno, numblks, blksperindir; 128498542Smckusick struct ufs2_dinode *dip; 128598542Smckusick struct thread *td = curthread; 128698542Smckusick struct buf *bp; 128798542Smckusick 128898542Smckusick /* 128998542Smckusick * Prepare to expunge the inode. If its inode block has not 129098542Smckusick * yet been copied, then allocate and fill the copy. 129198542Smckusick */ 129298542Smckusick lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 129398542Smckusick blkno = 0; 129498542Smckusick if (lbn < NDADDR) { 1295107558Smckusick blkno = VTOI(snapvp)->i_din2->di_db[lbn]; 129698542Smckusick } else { 1297207742Sjeff if (DOINGSOFTDEP(snapvp)) 1298207742Sjeff softdep_prealloc(snapvp, MNT_WAIT); 1299121443Sjhb td->td_pflags |= TDP_COWINPROGRESS; 1300141526Sphk error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn), 130198658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 1302121443Sjhb td->td_pflags &= ~TDP_COWINPROGRESS; 130398542Smckusick if (error) 130498542Smckusick return (error); 130598542Smckusick indiroff = (lbn - NDADDR) % NINDIR(fs); 130698542Smckusick blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; 130798542Smckusick bqrelse(bp); 130898542Smckusick } 1309107558Smckusick if (blkno != 0) { 1310107558Smckusick if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 1311107558Smckusick return (error); 1312107558Smckusick } else { 1313141526Sphk error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn), 1314107558Smckusick fs->fs_bsize, KERNCRED, 0, &bp); 1315107558Smckusick if (error) 1316107558Smckusick return (error); 1317135138Sphk if ((error = readblock(snapvp, bp, lbn)) != 0) 1318107558Smckusick return (error); 1319107558Smckusick } 132098542Smckusick /* 132198542Smckusick * Set a snapshot inode to be a zero length file, regular files 132298542Smckusick * to be completely unallocated. 132398542Smckusick */ 132498542Smckusick dip = (struct ufs2_dinode *)bp->b_data + 132598542Smckusick ino_to_fsbo(fs, cancelip->i_number); 1326207141Sjeff if (clearmode || cancelip->i_effnlink == 0) 132798542Smckusick dip->di_mode = 0; 132898542Smckusick dip->di_size = 0; 132998542Smckusick dip->di_blocks = 0; 133098542Smckusick dip->di_flags &= ~SF_SNAPSHOT; 133198542Smckusick bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t)); 133298542Smckusick bdwrite(bp); 1333107848Smckusick /* 1334107848Smckusick * Now go through and expunge all the blocks in the file 1335107848Smckusick * using the function requested. 1336107848Smckusick */ 1337107848Smckusick numblks = howmany(cancelip->i_size, fs->fs_bsize); 1338107848Smckusick if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0], 1339107848Smckusick &cancelip->i_din2->di_db[NDADDR], fs, 0, expungetype))) 1340107848Smckusick return (error); 1341107848Smckusick if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_ib[0], 1342107848Smckusick &cancelip->i_din2->di_ib[NIADDR], fs, -1, expungetype))) 1343107848Smckusick return (error); 1344107848Smckusick blksperindir = 1; 1345107848Smckusick lbn = -NDADDR; 1346107848Smckusick len = numblks - NDADDR; 1347107848Smckusick rlbn = NDADDR; 1348107848Smckusick for (i = 0; len > 0 && i < NIADDR; i++) { 1349107848Smckusick error = indiracct_ufs2(snapvp, ITOV(cancelip), i, 1350107848Smckusick cancelip->i_din2->di_ib[i], lbn, rlbn, len, 1351107848Smckusick blksperindir, fs, acctfunc, expungetype); 1352107848Smckusick if (error) 1353107848Smckusick return (error); 1354107848Smckusick blksperindir *= NINDIR(fs); 1355107848Smckusick lbn -= blksperindir + 1; 1356107848Smckusick len -= blksperindir; 1357107848Smckusick rlbn += blksperindir; 1358107848Smckusick } 135998542Smckusick return (0); 136090098Smckusick} 136190098Smckusick 136290098Smckusick/* 136398542Smckusick * Descend an indirect block chain for vnode cancelvp accounting for all 136498542Smckusick * its indirect blocks in snapvp. 136598542Smckusick */ 136698542Smckusickstatic int 136798542Smckusickindiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 136898542Smckusick blksperindir, fs, acctfunc, expungetype) 136998542Smckusick struct vnode *snapvp; 137098542Smckusick struct vnode *cancelvp; 137198542Smckusick int level; 137298542Smckusick ufs2_daddr_t blkno; 137398542Smckusick ufs_lbn_t lbn; 137498542Smckusick ufs_lbn_t rlbn; 137598542Smckusick ufs_lbn_t remblks; 137698542Smckusick ufs_lbn_t blksperindir; 137798542Smckusick struct fs *fs; 137898542Smckusick int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 137998542Smckusick struct fs *, ufs_lbn_t, int); 138098542Smckusick int expungetype; 138198542Smckusick{ 138298542Smckusick int error, num, i; 138398542Smckusick ufs_lbn_t subblksperindir; 138498542Smckusick struct indir indirs[NIADDR + 2]; 138598542Smckusick ufs2_daddr_t last, *bap; 138698542Smckusick struct buf *bp; 138798542Smckusick 1388121158Smckusick if (blkno == 0) { 1389121158Smckusick if (expungetype == BLK_NOCOPY) 1390121158Smckusick return (0); 1391121158Smckusick panic("indiracct_ufs2: missing indir"); 1392121158Smckusick } 139398542Smckusick if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 139498542Smckusick return (error); 1395121158Smckusick if (lbn != indirs[num - 1 - level].in_lbn || num < 2) 1396121158Smckusick panic("indiracct_ufs2: botched params"); 139798542Smckusick /* 139898542Smckusick * We have to expand bread here since it will deadlock looking 139998542Smckusick * up the block number for any blocks that are not in the cache. 140098542Smckusick */ 1401111856Sjeff bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 140298542Smckusick bp->b_blkno = fsbtodb(fs, blkno); 140398542Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1404135138Sphk (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) { 140598542Smckusick brelse(bp); 140698542Smckusick return (error); 140798542Smckusick } 140898542Smckusick /* 140998542Smckusick * Account for the block pointers in this indirect block. 141098542Smckusick */ 141198542Smckusick last = howmany(remblks, blksperindir); 141298542Smckusick if (last > NINDIR(fs)) 141398542Smckusick last = NINDIR(fs); 1414184205Sdes bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK); 141598542Smckusick bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 141698542Smckusick bqrelse(bp); 1417107848Smckusick error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, 1418107848Smckusick level == 0 ? rlbn : -1, expungetype); 141998542Smckusick if (error || level == 0) 142098542Smckusick goto out; 142198542Smckusick /* 142298542Smckusick * Account for the block pointers in each of the indirect blocks 142398542Smckusick * in the levels below us. 142498542Smckusick */ 142598542Smckusick subblksperindir = blksperindir / NINDIR(fs); 142698542Smckusick for (lbn++, level--, i = 0; i < last; i++) { 142798542Smckusick error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn, 142898542Smckusick rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 142998542Smckusick if (error) 143098542Smckusick goto out; 143198542Smckusick rlbn += blksperindir; 143298542Smckusick lbn -= blksperindir; 143398542Smckusick remblks -= blksperindir; 143498542Smckusick } 143598542Smckusickout: 1436184205Sdes free(bap, M_DEVBUF); 143798542Smckusick return (error); 143898542Smckusick} 143998542Smckusick 144098542Smckusick/* 144198542Smckusick * Do both snap accounting and map accounting. 144298542Smckusick */ 144398542Smckusickstatic int 144498542Smckusickfullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype) 144598542Smckusick struct vnode *vp; 144698542Smckusick ufs2_daddr_t *oldblkp, *lastblkp; 144798542Smckusick struct fs *fs; 144898542Smckusick ufs_lbn_t lblkno; 144998542Smckusick int exptype; /* BLK_SNAP or BLK_NOCOPY */ 145098542Smckusick{ 145198542Smckusick int error; 145298542Smckusick 145398542Smckusick if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 145498542Smckusick return (error); 145598542Smckusick return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 145698542Smckusick} 145798542Smckusick 145898542Smckusick/* 145987827Smckusick * Identify a set of blocks allocated in a snapshot inode. 146062976Smckusick */ 146162976Smckusickstatic int 146298542Smckusicksnapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 146362976Smckusick struct vnode *vp; 146498542Smckusick ufs2_daddr_t *oldblkp, *lastblkp; 146576269Smckusick struct fs *fs; 146698542Smckusick ufs_lbn_t lblkno; 146790098Smckusick int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 146862976Smckusick{ 146962976Smckusick struct inode *ip = VTOI(vp); 147098542Smckusick ufs2_daddr_t blkno, *blkp; 147198542Smckusick ufs_lbn_t lbn; 147262976Smckusick struct buf *ibp; 147362976Smckusick int error; 147462976Smckusick 147562976Smckusick for ( ; oldblkp < lastblkp; oldblkp++) { 147662976Smckusick blkno = *oldblkp; 147762976Smckusick if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 147862976Smckusick continue; 147962976Smckusick lbn = fragstoblks(fs, blkno); 148062976Smckusick if (lbn < NDADDR) { 148198542Smckusick blkp = &ip->i_din2->di_db[lbn]; 148262976Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 148362976Smckusick } else { 1484141526Sphk error = ffs_balloc_ufs2(vp, lblktosize(fs, (off_t)lbn), 148598658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 148662976Smckusick if (error) 148762976Smckusick return (error); 148898542Smckusick blkp = &((ufs2_daddr_t *)(ibp->b_data)) 148962976Smckusick [(lbn - NDADDR) % NINDIR(fs)]; 149062976Smckusick } 149187827Smckusick /* 149290098Smckusick * If we are expunging a snapshot vnode and we 149390098Smckusick * find a block marked BLK_NOCOPY, then it is 149487827Smckusick * one that has been allocated to this snapshot after 149587827Smckusick * we took our current snapshot and can be ignored. 149687827Smckusick */ 149790098Smckusick if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 149887827Smckusick if (lbn >= NDADDR) 149987827Smckusick brelse(ibp); 150087827Smckusick } else { 150187827Smckusick if (*blkp != 0) 1502121158Smckusick panic("snapacct_ufs2: bad block"); 150390098Smckusick *blkp = expungetype; 150487827Smckusick if (lbn >= NDADDR) 150587827Smckusick bdwrite(ibp); 150663788Smckusick } 150762976Smckusick } 150862976Smckusick return (0); 150962976Smckusick} 151062976Smckusick 151162976Smckusick/* 151276269Smckusick * Account for a set of blocks allocated in a snapshot inode. 151376269Smckusick */ 151476269Smckusickstatic int 151598542Smckusickmapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 151676269Smckusick struct vnode *vp; 151798542Smckusick ufs2_daddr_t *oldblkp, *lastblkp; 151876269Smckusick struct fs *fs; 151998542Smckusick ufs_lbn_t lblkno; 152090098Smckusick int expungetype; 152176269Smckusick{ 152298542Smckusick ufs2_daddr_t blkno; 1523104698Smckusick struct inode *ip; 152490098Smckusick ino_t inum; 1525108050Smckusick int acctit; 152676269Smckusick 1527104698Smckusick ip = VTOI(vp); 1528104698Smckusick inum = ip->i_number; 1529108050Smckusick if (lblkno == -1) 1530108050Smckusick acctit = 0; 1531108050Smckusick else 1532108050Smckusick acctit = 1; 153376269Smckusick for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 153476269Smckusick blkno = *oldblkp; 153576269Smckusick if (blkno == 0 || blkno == BLK_NOCOPY) 153676269Smckusick continue; 1537108050Smckusick if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1538107915Smckusick *ip->i_snapblklist++ = lblkno; 153976269Smckusick if (blkno == BLK_SNAP) 154076269Smckusick blkno = blkstofrags(fs, lblkno); 1541223127Smckusick ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, 1542223127Smckusick vp->v_type, NULL); 154376269Smckusick } 154476269Smckusick return (0); 154576269Smckusick} 154676269Smckusick 154776269Smckusick/* 154870183Smckusick * Decrement extra reference on snapshot when last name is removed. 154970183Smckusick * It will not be freed until the last open reference goes away. 155070183Smckusick */ 155170183Smckusickvoid 155270183Smckusickffs_snapgone(ip) 155370183Smckusick struct inode *ip; 155470183Smckusick{ 155570183Smckusick struct inode *xp; 155674547Smckusick struct fs *fs; 155774547Smckusick int snaploc; 1558135138Sphk struct snapdata *sn; 1559140706Sjeff struct ufsmount *ump; 156070183Smckusick 156170183Smckusick /* 156270183Smckusick * Find snapshot in incore list. 156370183Smckusick */ 1564135138Sphk xp = NULL; 1565135138Sphk sn = ip->i_devvp->v_rdev->si_snapdata; 1566135138Sphk if (sn != NULL) 1567135138Sphk TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) 1568135138Sphk if (xp == ip) 1569135138Sphk break; 1570107848Smckusick if (xp != NULL) 1571107848Smckusick vrele(ITOV(ip)); 1572107848Smckusick else if (snapdebug) 157370183Smckusick printf("ffs_snapgone: lost snapshot vnode %d\n", 157470183Smckusick ip->i_number); 157574547Smckusick /* 157674547Smckusick * Delete snapshot inode from superblock. Keep list dense. 157774547Smckusick */ 157874547Smckusick fs = ip->i_fs; 1579140706Sjeff ump = ip->i_ump; 1580140706Sjeff UFS_LOCK(ump); 158174547Smckusick for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 158274547Smckusick if (fs->fs_snapinum[snaploc] == ip->i_number) 158374547Smckusick break; 158474547Smckusick if (snaploc < FSMAXSNAP) { 158574547Smckusick for (snaploc++; snaploc < FSMAXSNAP; snaploc++) { 158674547Smckusick if (fs->fs_snapinum[snaploc] == 0) 158774547Smckusick break; 158874547Smckusick fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc]; 158974547Smckusick } 159074547Smckusick fs->fs_snapinum[snaploc - 1] = 0; 159174547Smckusick } 1592140706Sjeff UFS_UNLOCK(ump); 159370183Smckusick} 159470183Smckusick 159570183Smckusick/* 159662976Smckusick * Prepare a snapshot file for being removed. 159762976Smckusick */ 159862976Smckusickvoid 159962976Smckusickffs_snapremove(vp) 160062976Smckusick struct vnode *vp; 160162976Smckusick{ 160273942Smckusick struct inode *ip; 160362976Smckusick struct vnode *devvp; 160462976Smckusick struct buf *ibp; 160562976Smckusick struct fs *fs; 1606158259Stegge ufs2_daddr_t numblks, blkno, dblk; 160798542Smckusick int error, loc, last; 1608135138Sphk struct snapdata *sn; 160962976Smckusick 161062976Smckusick ip = VTOI(vp); 161162976Smckusick fs = ip->i_fs; 1612107414Smckusick devvp = ip->i_devvp; 161362976Smckusick /* 161475943Smckusick * If active, delete from incore list (this snapshot may 161575943Smckusick * already have been in the process of being deleted, so 161675943Smckusick * would not have been active). 161775943Smckusick * 161862976Smckusick * Clear copy-on-write flag if last snapshot. 161962976Smckusick */ 1620158259Stegge VI_LOCK(devvp); 162175943Smckusick if (ip->i_nextsnap.tqe_prev != 0) { 1622158259Stegge sn = devvp->v_rdev->si_snapdata; 1623135138Sphk TAILQ_REMOVE(&sn->sn_head, ip, i_nextsnap); 1624107414Smckusick ip->i_nextsnap.tqe_prev = 0; 1625158259Stegge VI_UNLOCK(devvp); 1626175635Sattilio lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL); 1627158259Stegge KASSERT(vp->v_vnlock == &sn->sn_lock, 1628158259Stegge ("ffs_snapremove: lost lock mutation")); 1629105191Smckusick vp->v_vnlock = &vp->v_lock; 1630158259Stegge VI_LOCK(devvp); 1631175635Sattilio lockmgr(&sn->sn_lock, LK_RELEASE, NULL); 1632177778Sjeff try_free_snapdata(devvp); 1633158259Stegge } else 1634158259Stegge VI_UNLOCK(devvp); 163562976Smckusick /* 163662976Smckusick * Clear all BLK_NOCOPY fields. Pass any block claims to other 163762976Smckusick * snapshots that want them (see ffs_snapblkfree below). 163862976Smckusick */ 163962976Smckusick for (blkno = 1; blkno < NDADDR; blkno++) { 164098542Smckusick dblk = DIP(ip, i_db[blkno]); 1641151177Stegge if (dblk == 0) 1642151177Stegge continue; 164376356Smckusick if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1644132775Skan DIP_SET(ip, i_db[blkno], 0); 164576356Smckusick else if ((dblk == blkstofrags(fs, blkno) && 164690098Smckusick ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, 1647223127Smckusick ip->i_number, vp->v_type, NULL))) { 1648132775Skan DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - 1649132775Skan btodb(fs->fs_bsize)); 1650132775Skan DIP_SET(ip, i_db[blkno], 0); 165176356Smckusick } 165262976Smckusick } 165376356Smckusick numblks = howmany(ip->i_size, fs->fs_bsize); 165476356Smckusick for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 165576132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 165698658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 165762976Smckusick if (error) 165862976Smckusick continue; 165998542Smckusick if (fs->fs_size - blkno > NINDIR(fs)) 166062976Smckusick last = NINDIR(fs); 166198542Smckusick else 166298542Smckusick last = fs->fs_size - blkno; 166362976Smckusick for (loc = 0; loc < last; loc++) { 166498542Smckusick if (ip->i_ump->um_fstype == UFS1) { 166598542Smckusick dblk = ((ufs1_daddr_t *)(ibp->b_data))[loc]; 1666151177Stegge if (dblk == 0) 1667151177Stegge continue; 166898542Smckusick if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 166998542Smckusick ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 167098542Smckusick else if ((dblk == blkstofrags(fs, blkno) && 167198542Smckusick ffs_snapblkfree(fs, ip->i_devvp, dblk, 1672223127Smckusick fs->fs_bsize, ip->i_number, vp->v_type, 1673223127Smckusick NULL))) { 167498542Smckusick ip->i_din1->di_blocks -= 167598542Smckusick btodb(fs->fs_bsize); 167698542Smckusick ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 167798542Smckusick } 167898542Smckusick continue; 167998542Smckusick } 168098542Smckusick dblk = ((ufs2_daddr_t *)(ibp->b_data))[loc]; 1681151177Stegge if (dblk == 0) 1682151177Stegge continue; 168376356Smckusick if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 168498542Smckusick ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 168576356Smckusick else if ((dblk == blkstofrags(fs, blkno) && 168690098Smckusick ffs_snapblkfree(fs, ip->i_devvp, dblk, 1687223127Smckusick fs->fs_bsize, ip->i_number, vp->v_type, NULL))) { 168898542Smckusick ip->i_din2->di_blocks -= btodb(fs->fs_bsize); 168998542Smckusick ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 169076356Smckusick } 169162976Smckusick } 169262976Smckusick bawrite(ibp); 169362976Smckusick } 169462976Smckusick /* 169562976Smckusick * Clear snapshot flag and drop reference. 169662976Smckusick */ 169763897Smckusick ip->i_flags &= ~SF_SNAPSHOT; 1698132775Skan DIP_SET(ip, i_flags, ip->i_flags); 169962976Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 1700207141Sjeff /* 1701207141Sjeff * The dirtied indirects must be written out before 1702207141Sjeff * softdep_setup_freeblocks() is called. Otherwise indir_trunc() 1703207141Sjeff * may find indirect pointers using the magic BLK_* values. 1704207141Sjeff */ 1705207141Sjeff if (DOINGSOFTDEP(vp)) 1706233630Smckusick ffs_syncvnode(vp, MNT_WAIT, 0); 1707158322Stegge#ifdef QUOTA 1708158322Stegge /* 1709158322Stegge * Reenable disk quotas for ex-snapshot file. 1710158322Stegge */ 1711158322Stegge if (!getinoquota(ip)) 1712158322Stegge (void) chkdq(ip, DIP(ip, i_blocks), KERNCRED, FORCE); 1713158322Stegge#endif 171462976Smckusick} 171562976Smckusick 171662976Smckusick/* 171762976Smckusick * Notification that a block is being freed. Return zero if the free 171862976Smckusick * should be allowed to proceed. Return non-zero if the snapshot file 171962976Smckusick * wants to claim the block. The block will be claimed if it is an 172062976Smckusick * uncopied part of one of the snapshots. It will be freed if it is 172162976Smckusick * either a BLK_NOCOPY or has already been copied in all of the snapshots. 172262976Smckusick * If a fragment is being freed, then all snapshots that care about 172362976Smckusick * it must make a copy since a snapshot file can only claim full sized 172462976Smckusick * blocks. Note that if more than one snapshot file maps the block, 172562976Smckusick * we can pick one at random to claim it. Since none of the snapshots 172662976Smckusick * can change, we are assurred that they will all see the same unmodified 172762976Smckusick * image. When deleting a snapshot file (see ffs_snapremove above), we 172862976Smckusick * must push any of these claimed blocks to one of the other snapshots 172962976Smckusick * that maps it. These claimed blocks are easily identified as they will 173062976Smckusick * have a block number equal to their logical block number within the 173162976Smckusick * snapshot. A copied block can never have this property because they 173262976Smckusick * must always have been allocated from a BLK_NOCOPY location. 173362976Smckusick */ 173462976Smckusickint 1735223127Smckusickffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd) 173690098Smckusick struct fs *fs; 173790098Smckusick struct vnode *devvp; 173898542Smckusick ufs2_daddr_t bno; 173962976Smckusick long size; 174090098Smckusick ino_t inum; 1741223127Smckusick enum vtype vtype; 1742223020Smckusick struct workhead *wkhd; 174362976Smckusick{ 174462976Smckusick struct buf *ibp, *cbp, *savedcbp = 0; 174583366Sjulian struct thread *td = curthread; 174662976Smckusick struct inode *ip; 1747107414Smckusick struct vnode *vp = NULL; 174898542Smckusick ufs_lbn_t lbn; 174998542Smckusick ufs2_daddr_t blkno; 1750151177Stegge int indiroff = 0, error = 0, claimedblk = 0; 1751135138Sphk struct snapdata *sn; 175262976Smckusick 175362976Smckusick lbn = fragstoblks(fs, bno); 1754107414Smckusickretry: 1755107414Smckusick VI_LOCK(devvp); 1756135138Sphk sn = devvp->v_rdev->si_snapdata; 1757135312Sphk if (sn == NULL) { 1758135312Sphk VI_UNLOCK(devvp); 1759135312Sphk return (0); 1760135312Sphk } 1761175635Sattilio if (lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 1762175635Sattilio VI_MTX(devvp)) != 0) 1763151177Stegge goto retry; 1764135138Sphk TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 176562976Smckusick vp = ITOV(ip); 1766207742Sjeff if (DOINGSOFTDEP(vp)) 1767207742Sjeff softdep_prealloc(vp, MNT_WAIT); 176862976Smckusick /* 176962976Smckusick * Lookup block being written. 177062976Smckusick */ 177162976Smckusick if (lbn < NDADDR) { 177298542Smckusick blkno = DIP(ip, i_db[lbn]); 177362976Smckusick } else { 1774121443Sjhb td->td_pflags |= TDP_COWINPROGRESS; 177576132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 177698658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1777121443Sjhb td->td_pflags &= ~TDP_COWINPROGRESS; 177862976Smckusick if (error) 177962976Smckusick break; 178062976Smckusick indiroff = (lbn - NDADDR) % NINDIR(fs); 178198542Smckusick if (ip->i_ump->um_fstype == UFS1) 178298542Smckusick blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 178398542Smckusick else 178498542Smckusick blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 178562976Smckusick } 178662976Smckusick /* 178762976Smckusick * Check to see if block needs to be copied. 178862976Smckusick */ 178998542Smckusick if (blkno == 0) { 179098542Smckusick /* 179198542Smckusick * A block that we map is being freed. If it has not 179298542Smckusick * been claimed yet, we will claim or copy it (below). 179398542Smckusick */ 179498542Smckusick claimedblk = 1; 179598542Smckusick } else if (blkno == BLK_SNAP) { 179698542Smckusick /* 179798542Smckusick * No previous snapshot claimed the block, 1798107414Smckusick * so it will be freed and become a BLK_NOCOPY 179998542Smckusick * (don't care) for us. 180098542Smckusick */ 180162976Smckusick if (claimedblk) 180262976Smckusick panic("snapblkfree: inconsistent block type"); 180362976Smckusick if (lbn < NDADDR) { 1804132775Skan DIP_SET(ip, i_db[lbn], BLK_NOCOPY); 180562976Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 180698542Smckusick } else if (ip->i_ump->um_fstype == UFS1) { 180798542Smckusick ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 180898542Smckusick BLK_NOCOPY; 180998542Smckusick bdwrite(ibp); 181062976Smckusick } else { 181198542Smckusick ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 181262976Smckusick BLK_NOCOPY; 181362976Smckusick bdwrite(ibp); 181462976Smckusick } 181562976Smckusick continue; 181698542Smckusick } else /* BLK_NOCOPY or default */ { 181798542Smckusick /* 181898542Smckusick * If the snapshot has already copied the block 181998542Smckusick * (default), or does not care about the block, 182098542Smckusick * it is not needed. 182198542Smckusick */ 182298542Smckusick if (lbn >= NDADDR) 182398542Smckusick bqrelse(ibp); 182498542Smckusick continue; 182562976Smckusick } 182662976Smckusick /* 182762976Smckusick * If this is a full size block, we will just grab it 182862976Smckusick * and assign it to the snapshot inode. Otherwise we 182962976Smckusick * will proceed to copy it. See explanation for this 183062976Smckusick * routine as to why only a single snapshot needs to 183162976Smckusick * claim this block. 183262976Smckusick */ 183362976Smckusick if (size == fs->fs_bsize) { 183462976Smckusick#ifdef DEBUG 183562976Smckusick if (snapdebug) 183698687Smux printf("%s %d lbn %jd from inum %d\n", 183798542Smckusick "Grabonremove: snapino", ip->i_number, 183898542Smckusick (intmax_t)lbn, inum); 183962976Smckusick#endif 1840223020Smckusick /* 1841223020Smckusick * If journaling is tracking this write we must add 1842223020Smckusick * the work to the inode or indirect being written. 1843223020Smckusick */ 1844223020Smckusick if (wkhd != NULL) { 1845223020Smckusick if (lbn < NDADDR) 1846223020Smckusick softdep_inode_append(ip, 1847223020Smckusick curthread->td_ucred, wkhd); 1848223020Smckusick else 1849223020Smckusick softdep_buf_append(ibp, wkhd); 1850223020Smckusick } 185162976Smckusick if (lbn < NDADDR) { 1852132775Skan DIP_SET(ip, i_db[lbn], bno); 185398542Smckusick } else if (ip->i_ump->um_fstype == UFS1) { 185498542Smckusick ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = bno; 185598542Smckusick bdwrite(ibp); 185662976Smckusick } else { 185798542Smckusick ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = bno; 185862976Smckusick bdwrite(ibp); 185962976Smckusick } 1860132775Skan DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(size)); 186162976Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 1862175635Sattilio lockmgr(vp->v_vnlock, LK_RELEASE, NULL); 186362976Smckusick return (1); 186462976Smckusick } 186562976Smckusick if (lbn >= NDADDR) 186663788Smckusick bqrelse(ibp); 186762976Smckusick /* 186862976Smckusick * Allocate the block into which to do the copy. Note that this 186962976Smckusick * allocation will never require any additional allocations for 187062976Smckusick * the snapshot inode. 187162976Smckusick */ 1872121443Sjhb td->td_pflags |= TDP_COWINPROGRESS; 187376132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 187462976Smckusick fs->fs_bsize, KERNCRED, 0, &cbp); 1875121443Sjhb td->td_pflags &= ~TDP_COWINPROGRESS; 1876107414Smckusick if (error) 187762976Smckusick break; 187862976Smckusick#ifdef DEBUG 187962976Smckusick if (snapdebug) 188098687Smux printf("%s%d lbn %jd %s %d size %ld to blkno %jd\n", 188198542Smckusick "Copyonremove: snapino ", ip->i_number, 188298542Smckusick (intmax_t)lbn, "for inum", inum, size, 188398542Smckusick (intmax_t)cbp->b_blkno); 188462976Smckusick#endif 188562976Smckusick /* 188662976Smckusick * If we have already read the old block contents, then 188775943Smckusick * simply copy them to the new block. Note that we need 188875943Smckusick * to synchronously write snapshots that have not been 188975943Smckusick * unlinked, and hence will be visible after a crash, 1890223127Smckusick * to ensure their integrity. At a minimum we ensure the 1891223127Smckusick * integrity of the filesystem metadata, but use the 1892223127Smckusick * dopersistence sysctl-setable flag to decide on the 1893223127Smckusick * persistence needed for file content data. 189462976Smckusick */ 189562976Smckusick if (savedcbp != 0) { 189662976Smckusick bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 189762976Smckusick bawrite(cbp); 1898223127Smckusick if ((vtype == VDIR || dopersistence) && 1899223127Smckusick ip->i_effnlink > 0) 1900233630Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT); 190162976Smckusick continue; 190262976Smckusick } 190362976Smckusick /* 190462976Smckusick * Otherwise, read the old block contents into the buffer. 190562976Smckusick */ 1906135138Sphk if ((error = readblock(vp, cbp, lbn)) != 0) { 190775943Smckusick bzero(cbp->b_data, fs->fs_bsize); 190875943Smckusick bawrite(cbp); 1909223127Smckusick if ((vtype == VDIR || dopersistence) && 1910223127Smckusick ip->i_effnlink > 0) 1911233630Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT); 191262976Smckusick break; 191375943Smckusick } 191462976Smckusick savedcbp = cbp; 191562976Smckusick } 191675943Smckusick /* 191775943Smckusick * Note that we need to synchronously write snapshots that 191875943Smckusick * have not been unlinked, and hence will be visible after 1919223127Smckusick * a crash, to ensure their integrity. At a minimum we 1920223127Smckusick * ensure the integrity of the filesystem metadata, but 1921223127Smckusick * use the dopersistence sysctl-setable flag to decide on 1922223127Smckusick * the persistence needed for file content data. 192375943Smckusick */ 192475943Smckusick if (savedcbp) { 192575943Smckusick vp = savedcbp->b_vp; 192662976Smckusick bawrite(savedcbp); 1927223268Smckusick if ((vtype == VDIR || dopersistence) && 1928223268Smckusick VTOI(vp)->i_effnlink > 0) 1929233630Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT); 193075943Smckusick } 193162976Smckusick /* 193262976Smckusick * If we have been unable to allocate a block in which to do 193362976Smckusick * the copy, then return non-zero so that the fragment will 193462976Smckusick * not be freed. Although space will be lost, the snapshot 193562976Smckusick * will stay consistent. 193662976Smckusick */ 1937223020Smckusick if (error != 0 && wkhd != NULL) 1938223020Smckusick softdep_freework(wkhd); 1939175635Sattilio lockmgr(vp->v_vnlock, LK_RELEASE, NULL); 194062976Smckusick return (error); 194162976Smckusick} 194262976Smckusick 194362976Smckusick/* 194462976Smckusick * Associate snapshot files when mounting. 194562976Smckusick */ 194662976Smckusickvoid 194762976Smckusickffs_snapshot_mount(mp) 194862976Smckusick struct mount *mp; 194962976Smckusick{ 195062976Smckusick struct ufsmount *ump = VFSTOUFS(mp); 1951107414Smckusick struct vnode *devvp = ump->um_devvp; 195262976Smckusick struct fs *fs = ump->um_fs; 195383366Sjulian struct thread *td = curthread; 1954135138Sphk struct snapdata *sn; 195562976Smckusick struct vnode *vp; 1956158636Stegge struct vnode *lastvp; 1957135303Sphk struct inode *ip; 1958104698Smckusick struct uio auio; 1959104698Smckusick struct iovec aiov; 1960107848Smckusick void *snapblklist; 1961104698Smckusick char *reason; 1962107848Smckusick daddr_t snaplistsize; 196362976Smckusick int error, snaploc, loc; 196462976Smckusick 1965104698Smckusick /* 1966141526Sphk * XXX The following needs to be set before ffs_truncate or 1967104698Smckusick * VOP_READ can be called. 1968104698Smckusick */ 1969104698Smckusick mp->mnt_stat.f_iosize = fs->fs_bsize; 1970104698Smckusick /* 1971104698Smckusick * Process each snapshot listed in the superblock. 1972104698Smckusick */ 1973107848Smckusick vp = NULL; 1974158636Stegge lastvp = NULL; 1975177778Sjeff sn = NULL; 197662976Smckusick for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) { 197762976Smckusick if (fs->fs_snapinum[snaploc] == 0) 1978107848Smckusick break; 1979141526Sphk if ((error = ffs_vget(mp, fs->fs_snapinum[snaploc], 198092462Smckusick LK_EXCLUSIVE, &vp)) != 0){ 198162976Smckusick printf("ffs_snapshot_mount: vget failed %d\n", error); 198262976Smckusick continue; 198362976Smckusick } 198462976Smckusick ip = VTOI(vp); 1985233630Smckusick if (!IS_SNAPSHOT(ip) || ip->i_size == 1986104698Smckusick lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) { 1987233630Smckusick if (!IS_SNAPSHOT(ip)) { 1988104698Smckusick reason = "non-snapshot"; 1989104698Smckusick } else { 1990104698Smckusick reason = "old format snapshot"; 1991141526Sphk (void)ffs_truncate(vp, (off_t)0, 0, NOCRED, td); 1992233630Smckusick (void)ffs_syncvnode(vp, MNT_WAIT, 0); 1993104698Smckusick } 1994104698Smckusick printf("ffs_snapshot_mount: %s inode %d\n", 1995104698Smckusick reason, fs->fs_snapinum[snaploc]); 199662976Smckusick vput(vp); 1997107848Smckusick vp = NULL; 199862976Smckusick for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) { 199962976Smckusick if (fs->fs_snapinum[loc] == 0) 200062976Smckusick break; 200162976Smckusick fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc]; 200262976Smckusick } 200362976Smckusick fs->fs_snapinum[loc - 1] = 0; 200462976Smckusick snaploc--; 200562976Smckusick continue; 200662976Smckusick } 2007104698Smckusick /* 2008177778Sjeff * Acquire a lock on the snapdata structure, creating it if 2009177778Sjeff * necessary. 2010105191Smckusick */ 2011177778Sjeff sn = ffs_snapdata_acquire(devvp); 2012177778Sjeff /* 2013177778Sjeff * Change vnode to use shared snapshot lock instead of the 2014177778Sjeff * original private lock. 2015177778Sjeff */ 2016177778Sjeff vp->v_vnlock = &sn->sn_lock; 2017175635Sattilio lockmgr(&vp->v_lock, LK_RELEASE, NULL); 2018105191Smckusick /* 2019104698Smckusick * Link it onto the active snapshot list. 2020104698Smckusick */ 2021107414Smckusick VI_LOCK(devvp); 202273942Smckusick if (ip->i_nextsnap.tqe_prev != 0) 202362976Smckusick panic("ffs_snapshot_mount: %d already on list", 202462976Smckusick ip->i_number); 202573942Smckusick else 2026135138Sphk TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap); 2027101308Sjeff vp->v_vflag |= VV_SYSTEM; 2028107414Smckusick VI_UNLOCK(devvp); 2029175294Sattilio VOP_UNLOCK(vp, 0); 2030158636Stegge lastvp = vp; 203162976Smckusick } 2032158636Stegge vp = lastvp; 2033107848Smckusick /* 2034107848Smckusick * No usable snapshots found. 2035107848Smckusick */ 2036177778Sjeff if (sn == NULL || vp == NULL) 2037107848Smckusick return; 2038107848Smckusick /* 2039107848Smckusick * Allocate the space for the block hints list. We always want to 2040107848Smckusick * use the list from the newest snapshot. 2041107848Smckusick */ 2042107848Smckusick auio.uio_iov = &aiov; 2043107848Smckusick auio.uio_iovcnt = 1; 2044107848Smckusick aiov.iov_base = (void *)&snaplistsize; 2045107848Smckusick aiov.iov_len = sizeof(snaplistsize); 2046107848Smckusick auio.uio_resid = aiov.iov_len; 2047107848Smckusick auio.uio_offset = 2048107848Smckusick lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)); 2049107848Smckusick auio.uio_segflg = UIO_SYSSPACE; 2050107848Smckusick auio.uio_rw = UIO_READ; 2051107848Smckusick auio.uio_td = td; 2052175202Sattilio vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2053107848Smckusick if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 2054107848Smckusick printf("ffs_snapshot_mount: read_1 failed %d\n", error); 2055175294Sattilio VOP_UNLOCK(vp, 0); 2056107848Smckusick return; 2057107848Smckusick } 2058184205Sdes snapblklist = malloc(snaplistsize * sizeof(daddr_t), 2059111119Simp M_UFSMNT, M_WAITOK); 2060107848Smckusick auio.uio_iovcnt = 1; 2061107848Smckusick aiov.iov_base = snapblklist; 2062107848Smckusick aiov.iov_len = snaplistsize * sizeof (daddr_t); 2063107848Smckusick auio.uio_resid = aiov.iov_len; 2064107848Smckusick auio.uio_offset -= sizeof(snaplistsize); 2065107848Smckusick if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 2066107848Smckusick printf("ffs_snapshot_mount: read_2 failed %d\n", error); 2067175294Sattilio VOP_UNLOCK(vp, 0); 2068184205Sdes free(snapblklist, M_UFSMNT); 2069107848Smckusick return; 2070107848Smckusick } 2071175294Sattilio VOP_UNLOCK(vp, 0); 2072107848Smckusick VI_LOCK(devvp); 2073107848Smckusick ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount"); 2074135138Sphk sn->sn_listsize = snaplistsize; 2075135138Sphk sn->sn_blklist = (daddr_t *)snapblklist; 2076107848Smckusick devvp->v_vflag |= VV_COPYONWRITE; 2077107848Smckusick VI_UNLOCK(devvp); 207862976Smckusick} 207962976Smckusick 208062976Smckusick/* 208162976Smckusick * Disassociate snapshot files when unmounting. 208262976Smckusick */ 208362976Smckusickvoid 208462976Smckusickffs_snapshot_unmount(mp) 208562976Smckusick struct mount *mp; 208662976Smckusick{ 2087107414Smckusick struct vnode *devvp = VFSTOUFS(mp)->um_devvp; 2088135138Sphk struct snapdata *sn; 208962976Smckusick struct inode *xp; 2090105191Smckusick struct vnode *vp; 209162976Smckusick 2092158259Stegge VI_LOCK(devvp); 2093135138Sphk sn = devvp->v_rdev->si_snapdata; 2094158259Stegge while (sn != NULL && (xp = TAILQ_FIRST(&sn->sn_head)) != NULL) { 2095105191Smckusick vp = ITOV(xp); 2096135138Sphk TAILQ_REMOVE(&sn->sn_head, xp, i_nextsnap); 209773942Smckusick xp->i_nextsnap.tqe_prev = 0; 2098175635Sattilio lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE, 2099175635Sattilio VI_MTX(devvp)); 2100177778Sjeff lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL); 2101158259Stegge KASSERT(vp->v_vnlock == &sn->sn_lock, 2102158259Stegge ("ffs_snapshot_unmount: lost lock mutation")); 2103158259Stegge vp->v_vnlock = &vp->v_lock; 2104175635Sattilio lockmgr(&vp->v_lock, LK_RELEASE, NULL); 2105175635Sattilio lockmgr(&sn->sn_lock, LK_RELEASE, NULL); 2106158259Stegge if (xp->i_effnlink > 0) 2107105191Smckusick vrele(vp); 2108158259Stegge VI_LOCK(devvp); 2109158259Stegge sn = devvp->v_rdev->si_snapdata; 211062976Smckusick } 2111177778Sjeff try_free_snapdata(devvp); 2112107414Smckusick ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount"); 211362976Smckusick} 211462976Smckusick 211562976Smckusick/* 2116166193Skib * Check the buffer block to be belong to device buffer that shall be 2117166193Skib * locked after snaplk. devvp shall be locked on entry, and will be 2118166193Skib * leaved locked upon exit. 2119166193Skib */ 2120166193Skibstatic int 2121166193Skibffs_bp_snapblk(devvp, bp) 2122166193Skib struct vnode *devvp; 2123166193Skib struct buf *bp; 2124166193Skib{ 2125166193Skib struct snapdata *sn; 2126166193Skib struct fs *fs; 2127166193Skib ufs2_daddr_t lbn, *snapblklist; 2128166193Skib int lower, upper, mid; 2129166193Skib 2130166193Skib ASSERT_VI_LOCKED(devvp, "ffs_bp_snapblk"); 2131166193Skib KASSERT(devvp->v_type == VCHR, ("Not a device %p", devvp)); 2132166193Skib sn = devvp->v_rdev->si_snapdata; 2133166193Skib if (sn == NULL || TAILQ_FIRST(&sn->sn_head) == NULL) 2134166193Skib return (0); 2135166193Skib fs = TAILQ_FIRST(&sn->sn_head)->i_fs; 2136166193Skib lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); 2137166193Skib snapblklist = sn->sn_blklist; 2138166193Skib upper = sn->sn_listsize - 1; 2139166193Skib lower = 1; 2140166193Skib while (lower <= upper) { 2141166193Skib mid = (lower + upper) / 2; 2142166193Skib if (snapblklist[mid] == lbn) 2143166193Skib break; 2144166193Skib if (snapblklist[mid] < lbn) 2145166193Skib lower = mid + 1; 2146166193Skib else 2147166193Skib upper = mid - 1; 2148166193Skib } 2149166193Skib if (lower <= upper) 2150166193Skib return (1); 2151166193Skib return (0); 2152166193Skib} 2153166193Skib 2154166193Skibvoid 2155166193Skibffs_bdflush(bo, bp) 2156166193Skib struct bufobj *bo; 2157166193Skib struct buf *bp; 2158166193Skib{ 2159166193Skib struct thread *td; 2160166193Skib struct vnode *vp, *devvp; 2161166193Skib struct buf *nbp; 2162166193Skib int bp_bdskip; 2163166193Skib 2164166193Skib if (bo->bo_dirty.bv_cnt <= dirtybufthresh) 2165166193Skib return; 2166166193Skib 2167166193Skib td = curthread; 2168166193Skib vp = bp->b_vp; 2169166193Skib devvp = bo->__bo_vnode; 2170166193Skib KASSERT(vp == devvp, ("devvp != vp %p %p", bo, bp)); 2171166193Skib 2172166193Skib VI_LOCK(devvp); 2173166193Skib bp_bdskip = ffs_bp_snapblk(devvp, bp); 2174166193Skib if (bp_bdskip) 2175166193Skib bdwriteskip++; 2176166193Skib VI_UNLOCK(devvp); 2177166193Skib if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10 && !bp_bdskip) { 2178166193Skib (void) VOP_FSYNC(vp, MNT_NOWAIT, td); 2179166193Skib altbufferflushes++; 2180166193Skib } else { 2181166193Skib BO_LOCK(bo); 2182166193Skib /* 2183166193Skib * Try to find a buffer to flush. 2184166193Skib */ 2185166193Skib TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) { 2186166193Skib if ((nbp->b_vflags & BV_BKGRDINPROG) || 2187166193Skib BUF_LOCK(nbp, 2188166193Skib LK_EXCLUSIVE | LK_NOWAIT, NULL)) 2189166193Skib continue; 2190166193Skib if (bp == nbp) 2191166193Skib panic("bdwrite: found ourselves"); 2192166193Skib BO_UNLOCK(bo); 2193166193Skib /* 2194166193Skib * Don't countdeps with the bo lock 2195166193Skib * held. 2196166193Skib */ 2197166193Skib if (buf_countdeps(nbp, 0)) { 2198166193Skib BO_LOCK(bo); 2199166193Skib BUF_UNLOCK(nbp); 2200166193Skib continue; 2201166193Skib } 2202166193Skib if (bp_bdskip) { 2203166193Skib VI_LOCK(devvp); 2204166193Skib if (!ffs_bp_snapblk(vp, nbp)) { 2205166193Skib if (BO_MTX(bo) != VI_MTX(vp)) { 2206166193Skib VI_UNLOCK(devvp); 2207166193Skib BO_LOCK(bo); 2208166193Skib } 2209166193Skib BUF_UNLOCK(nbp); 2210166193Skib continue; 2211166193Skib } 2212166193Skib VI_UNLOCK(devvp); 2213166193Skib } 2214166193Skib if (nbp->b_flags & B_CLUSTEROK) { 2215166193Skib vfs_bio_awrite(nbp); 2216166193Skib } else { 2217166193Skib bremfree(nbp); 2218166193Skib bawrite(nbp); 2219166193Skib } 2220166193Skib dirtybufferflushes++; 2221166193Skib break; 2222166193Skib } 2223166193Skib if (nbp == NULL) 2224166193Skib BO_UNLOCK(bo); 2225166193Skib } 2226166193Skib} 2227166193Skib 2228166193Skib/* 222962976Smckusick * Check for need to copy block that is about to be written, 223062976Smckusick * copying the block if necessary. 223162976Smckusick */ 2232136963Sphkint 223373942Smckusickffs_copyonwrite(devvp, bp) 223473942Smckusick struct vnode *devvp; 223573942Smckusick struct buf *bp; 223662976Smckusick{ 2237135138Sphk struct snapdata *sn; 223873942Smckusick struct buf *ibp, *cbp, *savedcbp = 0; 223983366Sjulian struct thread *td = curthread; 224073942Smckusick struct fs *fs; 224162976Smckusick struct inode *ip; 2242105670Smckusick struct vnode *vp = 0; 2243107848Smckusick ufs2_daddr_t lbn, blkno, *snapblklist; 2244151177Stegge int lower, upper, mid, indiroff, error = 0; 2245150760Struckman int launched_async_io, prev_norunningbuf; 2246158260Stegge long saved_runningbufspace; 224762976Smckusick 2248233630Smckusick if (devvp != bp->b_vp && IS_SNAPSHOT(VTOI(bp->b_vp))) 2249151179Stegge return (0); /* Update on a snapshot file */ 2250121443Sjhb if (td->td_pflags & TDP_COWINPROGRESS) 225162976Smckusick panic("ffs_copyonwrite: recursive call"); 2252107848Smckusick /* 2253107848Smckusick * First check to see if it is in the preallocated list. 2254107848Smckusick * By doing this check we avoid several potential deadlocks. 2255107848Smckusick */ 2256107414Smckusick VI_LOCK(devvp); 2257135138Sphk sn = devvp->v_rdev->si_snapdata; 2258151177Stegge if (sn == NULL || 2259168353Sdelphij TAILQ_EMPTY(&sn->sn_head)) { 2260151177Stegge VI_UNLOCK(devvp); 2261151177Stegge return (0); /* No snapshot */ 2262151177Stegge } 2263135138Sphk ip = TAILQ_FIRST(&sn->sn_head); 2264105191Smckusick fs = ip->i_fs; 2265105191Smckusick lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); 2266135138Sphk snapblklist = sn->sn_blklist; 2267135138Sphk upper = sn->sn_listsize - 1; 2268107848Smckusick lower = 1; 2269107848Smckusick while (lower <= upper) { 2270107848Smckusick mid = (lower + upper) / 2; 2271107848Smckusick if (snapblklist[mid] == lbn) 2272107848Smckusick break; 2273107848Smckusick if (snapblklist[mid] < lbn) 2274107848Smckusick lower = mid + 1; 2275107848Smckusick else 2276107848Smckusick upper = mid - 1; 2277107848Smckusick } 2278107848Smckusick if (lower <= upper) { 2279107848Smckusick VI_UNLOCK(devvp); 2280107848Smckusick return (0); 2281107848Smckusick } 2282150760Struckman launched_async_io = 0; 2283150760Struckman prev_norunningbuf = td->td_pflags & TDP_NORUNNINGBUF; 2284107848Smckusick /* 2285150741Struckman * Since I/O on bp isn't yet in progress and it may be blocked 2286150741Struckman * for a long time waiting on snaplk, back it out of 2287150741Struckman * runningbufspace, possibly waking other threads waiting for space. 2288150741Struckman */ 2289158260Stegge saved_runningbufspace = bp->b_runningbufspace; 2290158260Stegge if (saved_runningbufspace != 0) 2291158260Stegge runningbufwakeup(bp); 2292150741Struckman /* 2293107848Smckusick * Not in the precomputed list, so check the snapshots. 2294107848Smckusick */ 2295175635Sattilio while (lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 2296175635Sattilio VI_MTX(devvp)) != 0) { 2297151177Stegge VI_LOCK(devvp); 2298151177Stegge sn = devvp->v_rdev->si_snapdata; 2299151177Stegge if (sn == NULL || 2300168353Sdelphij TAILQ_EMPTY(&sn->sn_head)) { 2301151177Stegge VI_UNLOCK(devvp); 2302158260Stegge if (saved_runningbufspace != 0) { 2303158260Stegge bp->b_runningbufspace = saved_runningbufspace; 2304189595Sjhb atomic_add_long(&runningbufspace, 2305151177Stegge bp->b_runningbufspace); 2306158260Stegge } 2307151177Stegge return (0); /* Snapshot gone */ 2308151177Stegge } 2309151177Stegge } 2310135138Sphk TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 231162976Smckusick vp = ITOV(ip); 2312207742Sjeff if (DOINGSOFTDEP(vp)) 2313207742Sjeff softdep_prealloc(vp, MNT_WAIT); 231462976Smckusick /* 231562976Smckusick * We ensure that everything of our own that needs to be 231662976Smckusick * copied will be done at the time that ffs_snapshot is 231762976Smckusick * called. Thus we can skip the check here which can 231876132Sphk * deadlock in doing the lookup in UFS_BALLOC. 231962976Smckusick */ 232062976Smckusick if (bp->b_vp == vp) 232162976Smckusick continue; 232262976Smckusick /* 2323105670Smckusick * Check to see if block needs to be copied. We do not have 2324105670Smckusick * to hold the snapshot lock while doing this lookup as it 2325105670Smckusick * will never require any additional allocations for the 2326105670Smckusick * snapshot inode. 232762976Smckusick */ 232862976Smckusick if (lbn < NDADDR) { 232998542Smckusick blkno = DIP(ip, i_db[lbn]); 233062976Smckusick } else { 2331150741Struckman td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF; 233276132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2333105191Smckusick fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 2334121443Sjhb td->td_pflags &= ~TDP_COWINPROGRESS; 2335105191Smckusick if (error) 2336105191Smckusick break; 233762976Smckusick indiroff = (lbn - NDADDR) % NINDIR(fs); 233898542Smckusick if (ip->i_ump->um_fstype == UFS1) 233998542Smckusick blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 234098542Smckusick else 234198542Smckusick blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 234263788Smckusick bqrelse(ibp); 234362976Smckusick } 2344173464Sobrien#ifdef INVARIANTS 234562976Smckusick if (blkno == BLK_SNAP && bp->b_lblkno >= 0) 234662976Smckusick panic("ffs_copyonwrite: bad copy block"); 234762976Smckusick#endif 2348105191Smckusick if (blkno != 0) 234962976Smckusick continue; 235062976Smckusick /* 2351105670Smckusick * Allocate the block into which to do the copy. Since 2352105670Smckusick * multiple processes may all try to copy the same block, 2353105670Smckusick * we have to recheck our need to do a copy if we sleep 2354105670Smckusick * waiting for the lock. 2355105670Smckusick * 2356105670Smckusick * Because all snapshots on a filesystem share a single 2357105670Smckusick * lock, we ensure that we will never be in competition 2358105670Smckusick * with another process to allocate a block. 235962976Smckusick */ 2360150741Struckman td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF; 236176132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2362105191Smckusick fs->fs_bsize, KERNCRED, 0, &cbp); 2363121443Sjhb td->td_pflags &= ~TDP_COWINPROGRESS; 2364105191Smckusick if (error) 2365105191Smckusick break; 236662976Smckusick#ifdef DEBUG 236762976Smckusick if (snapdebug) { 236898687Smux printf("Copyonwrite: snapino %d lbn %jd for ", 236998542Smckusick ip->i_number, (intmax_t)lbn); 237073942Smckusick if (bp->b_vp == devvp) 237162976Smckusick printf("fs metadata"); 237262976Smckusick else 237362976Smckusick printf("inum %d", VTOI(bp->b_vp)->i_number); 237498687Smux printf(" lblkno %jd to blkno %jd\n", 237598542Smckusick (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno); 237662976Smckusick } 237762976Smckusick#endif 237862976Smckusick /* 237962976Smckusick * If we have already read the old block contents, then 238075943Smckusick * simply copy them to the new block. Note that we need 238175943Smckusick * to synchronously write snapshots that have not been 238275943Smckusick * unlinked, and hence will be visible after a crash, 2383223127Smckusick * to ensure their integrity. At a minimum we ensure the 2384223127Smckusick * integrity of the filesystem metadata, but use the 2385223127Smckusick * dopersistence sysctl-setable flag to decide on the 2386223127Smckusick * persistence needed for file content data. 238762976Smckusick */ 238862976Smckusick if (savedcbp != 0) { 238962976Smckusick bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 239062976Smckusick bawrite(cbp); 2391223127Smckusick if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR || 2392223127Smckusick dopersistence) && ip->i_effnlink > 0) 2393233630Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT); 2394150760Struckman else 2395150760Struckman launched_async_io = 1; 239662976Smckusick continue; 239762976Smckusick } 239862976Smckusick /* 239962976Smckusick * Otherwise, read the old block contents into the buffer. 240062976Smckusick */ 2401135138Sphk if ((error = readblock(vp, cbp, lbn)) != 0) { 240275943Smckusick bzero(cbp->b_data, fs->fs_bsize); 240375943Smckusick bawrite(cbp); 2404223127Smckusick if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR || 2405223127Smckusick dopersistence) && ip->i_effnlink > 0) 2406233630Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT); 2407150760Struckman else 2408150760Struckman launched_async_io = 1; 240962976Smckusick break; 241075943Smckusick } 241162976Smckusick savedcbp = cbp; 241262976Smckusick } 241375943Smckusick /* 241475943Smckusick * Note that we need to synchronously write snapshots that 241575943Smckusick * have not been unlinked, and hence will be visible after 2416223127Smckusick * a crash, to ensure their integrity. At a minimum we 2417223127Smckusick * ensure the integrity of the filesystem metadata, but 2418223127Smckusick * use the dopersistence sysctl-setable flag to decide on 2419223127Smckusick * the persistence needed for file content data. 242075943Smckusick */ 242175943Smckusick if (savedcbp) { 242275943Smckusick vp = savedcbp->b_vp; 242362976Smckusick bawrite(savedcbp); 2424223127Smckusick if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR || 2425223127Smckusick dopersistence) && VTOI(vp)->i_effnlink > 0) 2426233630Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT); 2427150760Struckman else 2428150760Struckman launched_async_io = 1; 242975943Smckusick } 2430175635Sattilio lockmgr(vp->v_vnlock, LK_RELEASE, NULL); 2431151177Stegge td->td_pflags = (td->td_pflags & ~TDP_NORUNNINGBUF) | 2432151177Stegge prev_norunningbuf; 2433150791Struckman if (launched_async_io && (td->td_pflags & TDP_NORUNNINGBUF) == 0) 2434150760Struckman waitrunningbufspace(); 2435150741Struckman /* 2436150741Struckman * I/O on bp will now be started, so count it in runningbufspace. 2437150741Struckman */ 2438158260Stegge if (saved_runningbufspace != 0) { 2439158260Stegge bp->b_runningbufspace = saved_runningbufspace; 2440189595Sjhb atomic_add_long(&runningbufspace, bp->b_runningbufspace); 2441158260Stegge } 244262976Smckusick return (error); 244362976Smckusick} 244462976Smckusick 244562976Smckusick/* 2446223020Smckusick * sync snapshots to force freework records waiting on snapshots to claim 2447223020Smckusick * blocks to free. 2448223020Smckusick */ 2449223020Smckusickvoid 2450223020Smckusickffs_sync_snap(mp, waitfor) 2451223020Smckusick struct mount *mp; 2452223020Smckusick int waitfor; 2453223020Smckusick{ 2454223020Smckusick struct snapdata *sn; 2455223020Smckusick struct vnode *devvp; 2456223020Smckusick struct vnode *vp; 2457223020Smckusick struct inode *ip; 2458223020Smckusick 2459223020Smckusick devvp = VFSTOUFS(mp)->um_devvp; 2460223020Smckusick if ((devvp->v_vflag & VV_COPYONWRITE) == 0) 2461223020Smckusick return; 2462223020Smckusick for (;;) { 2463223020Smckusick VI_LOCK(devvp); 2464223020Smckusick sn = devvp->v_rdev->si_snapdata; 2465223020Smckusick if (sn == NULL) { 2466223020Smckusick VI_UNLOCK(devvp); 2467223020Smckusick return; 2468223020Smckusick } 2469223020Smckusick if (lockmgr(&sn->sn_lock, 2470223020Smckusick LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 2471223020Smckusick VI_MTX(devvp)) == 0) 2472223020Smckusick break; 2473223020Smckusick } 2474223020Smckusick TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 2475223020Smckusick vp = ITOV(ip); 2476233630Smckusick ffs_syncvnode(vp, waitfor, NO_INO_UPDT); 2477223020Smckusick } 2478223020Smckusick lockmgr(&sn->sn_lock, LK_RELEASE, NULL); 2479223020Smckusick} 2480223020Smckusick 2481223020Smckusick/* 248262976Smckusick * Read the specified block into the given buffer. 248362976Smckusick * Much of this boiler-plate comes from bwrite(). 248462976Smckusick */ 248562976Smckusickstatic int 2486135138Sphkreadblock(vp, bp, lbn) 2487135138Sphk struct vnode *vp; 248862976Smckusick struct buf *bp; 248998542Smckusick ufs2_daddr_t lbn; 249062976Smckusick{ 2491135138Sphk struct inode *ip = VTOI(vp); 2492137035Sphk struct bio *bip; 249362976Smckusick 2494137035Sphk bip = g_alloc_bio(); 2495137035Sphk bip->bio_cmd = BIO_READ; 2496137035Sphk bip->bio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn))); 2497137035Sphk bip->bio_data = bp->b_data; 2498137035Sphk bip->bio_length = bp->b_bcount; 2499158308Spjd bip->bio_done = NULL; 2500137035Sphk 2501137035Sphk g_io_request(bip, ip->i_devvp->v_bufobj.bo_private); 2502158308Spjd bp->b_error = biowait(bip, "snaprdb"); 2503137035Sphk g_destroy_bio(bip); 2504137035Sphk return (bp->b_error); 250562976Smckusick} 2506154065Simp 2507183073Skib#endif 2508183073Skib 2509156560Stegge/* 2510156560Stegge * Process file deletes that were deferred by ufs_inactive() due to 2511163194Skib * the file system being suspended. Transfer IN_LAZYACCESS into 2512163194Skib * IN_MODIFIED for vnodes that were accessed during suspension. 2513156560Stegge */ 2514183073Skibvoid 2515156560Steggeprocess_deferred_inactive(struct mount *mp) 2516156560Stegge{ 2517156560Stegge struct vnode *vp, *mvp; 2518163194Skib struct inode *ip; 2519156560Stegge struct thread *td; 2520156560Stegge int error; 2521156560Stegge 2522156560Stegge td = curthread; 2523156560Stegge (void) vn_start_secondary_write(NULL, &mp, V_WAIT); 2524156560Stegge loop: 2525235626Smckusick MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 2526163194Skib /* 2527163194Skib * IN_LAZYACCESS is checked here without holding any 2528163194Skib * vnode lock, but this flag is set only while holding 2529163194Skib * vnode interlock. 2530163194Skib */ 2531235626Smckusick if (vp->v_type == VNON || 2532163194Skib ((VTOI(vp)->i_flag & IN_LAZYACCESS) == 0 && 2533235626Smckusick ((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0))) { 2534156560Stegge VI_UNLOCK(vp); 2535156560Stegge continue; 2536156560Stegge } 2537156560Stegge vholdl(vp); 2538175202Sattilio error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK); 2539156560Stegge if (error != 0) { 2540156560Stegge vdrop(vp); 2541156560Stegge if (error == ENOENT) 2542156560Stegge continue; /* vnode recycled */ 2543235626Smckusick MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 2544156560Stegge goto loop; 2545156560Stegge } 2546163194Skib ip = VTOI(vp); 2547163194Skib if ((ip->i_flag & IN_LAZYACCESS) != 0) { 2548163194Skib ip->i_flag &= ~IN_LAZYACCESS; 2549163194Skib ip->i_flag |= IN_MODIFIED; 2550163194Skib } 2551156560Stegge VI_LOCK(vp); 2552163194Skib if ((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0) { 2553156560Stegge VI_UNLOCK(vp); 2554175294Sattilio VOP_UNLOCK(vp, 0); 2555156560Stegge vdrop(vp); 2556156560Stegge continue; 2557156560Stegge } 2558234473Smckusick vinactive(vp, td); 2559156560Stegge VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, 2560156560Stegge ("process_deferred_inactive: got VI_OWEINACT")); 2561156560Stegge VI_UNLOCK(vp); 2562175294Sattilio VOP_UNLOCK(vp, 0); 2563156560Stegge vdrop(vp); 2564156560Stegge } 2565156560Stegge vn_finished_secondary_write(mp); 2566156560Stegge} 2567158259Stegge 2568183073Skib#ifndef NO_FFS_SNAPSHOT 2569183073Skib 2570177778Sjeffstatic struct snapdata * 2571177778Sjeffffs_snapdata_alloc(void) 2572177778Sjeff{ 2573177778Sjeff struct snapdata *sn; 2574177778Sjeff 2575177778Sjeff /* 2576177778Sjeff * Fetch a snapdata from the free list if there is one available. 2577177778Sjeff */ 2578177778Sjeff mtx_lock(&snapfree_lock); 2579177778Sjeff sn = LIST_FIRST(&snapfree); 2580177778Sjeff if (sn != NULL) 2581177778Sjeff LIST_REMOVE(sn, sn_link); 2582177778Sjeff mtx_unlock(&snapfree_lock); 2583177778Sjeff if (sn != NULL) 2584177778Sjeff return (sn); 2585177778Sjeff /* 2586177778Sjeff * If there were no free snapdatas allocate one. 2587177778Sjeff */ 2588177778Sjeff sn = malloc(sizeof *sn, M_UFSMNT, M_WAITOK | M_ZERO); 2589177778Sjeff TAILQ_INIT(&sn->sn_head); 2590177778Sjeff lockinit(&sn->sn_lock, PVFS, "snaplk", VLKTIMEOUT, 2591177778Sjeff LK_CANRECURSE | LK_NOSHARE); 2592177778Sjeff return (sn); 2593177778Sjeff} 2594177778Sjeff 2595177778Sjeff/* 2596177778Sjeff * The snapdata is never freed because we can not be certain that 2597177778Sjeff * there are no threads sleeping on the snap lock. Persisting 2598177778Sjeff * them permanently avoids costly synchronization in ffs_lock(). 2599177778Sjeff */ 2600177778Sjeffstatic void 2601177778Sjeffffs_snapdata_free(struct snapdata *sn) 2602177778Sjeff{ 2603177778Sjeff mtx_lock(&snapfree_lock); 2604177778Sjeff LIST_INSERT_HEAD(&snapfree, sn, sn_link); 2605177778Sjeff mtx_unlock(&snapfree_lock); 2606177778Sjeff} 2607177778Sjeff 2608158259Stegge/* Try to free snapdata associated with devvp */ 2609158259Steggestatic void 2610177778Sjefftry_free_snapdata(struct vnode *devvp) 2611158259Stegge{ 2612158259Stegge struct snapdata *sn; 2613158259Stegge ufs2_daddr_t *snapblklist; 2614158259Stegge 2615177778Sjeff ASSERT_VI_LOCKED(devvp, "try_free_snapdata"); 2616158259Stegge sn = devvp->v_rdev->si_snapdata; 2617158259Stegge 2618158259Stegge if (sn == NULL || TAILQ_FIRST(&sn->sn_head) != NULL || 2619158259Stegge (devvp->v_vflag & VV_COPYONWRITE) == 0) { 2620158259Stegge VI_UNLOCK(devvp); 2621158259Stegge return; 2622158259Stegge } 2623158259Stegge 2624158259Stegge devvp->v_rdev->si_snapdata = NULL; 2625158259Stegge devvp->v_vflag &= ~VV_COPYONWRITE; 2626177778Sjeff lockmgr(&sn->sn_lock, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp)); 2627158259Stegge snapblklist = sn->sn_blklist; 2628158259Stegge sn->sn_blklist = NULL; 2629158259Stegge sn->sn_listsize = 0; 2630175635Sattilio lockmgr(&sn->sn_lock, LK_RELEASE, NULL); 2631158259Stegge if (snapblklist != NULL) 2632184205Sdes free(snapblklist, M_UFSMNT); 2633177778Sjeff ffs_snapdata_free(sn); 2634158259Stegge} 2635177778Sjeff 2636177778Sjeffstatic struct snapdata * 2637177778Sjeffffs_snapdata_acquire(struct vnode *devvp) 2638177778Sjeff{ 2639177778Sjeff struct snapdata *nsn; 2640177778Sjeff struct snapdata *sn; 2641177778Sjeff 2642177778Sjeff /* 2643262780Spfg * Allocate a free snapdata. This is done before acquiring the 2644177778Sjeff * devvp lock to avoid allocation while the devvp interlock is 2645177778Sjeff * held. 2646177778Sjeff */ 2647177778Sjeff nsn = ffs_snapdata_alloc(); 2648177778Sjeff /* 2649177778Sjeff * If there snapshots already exist on this filesystem grab a 2650177778Sjeff * reference to the shared lock. Otherwise this is the first 2651177778Sjeff * snapshot on this filesystem and we need to use our 2652177778Sjeff * pre-allocated snapdata. 2653177778Sjeff */ 2654177778Sjeff VI_LOCK(devvp); 2655177778Sjeff if (devvp->v_rdev->si_snapdata == NULL) { 2656177778Sjeff devvp->v_rdev->si_snapdata = nsn; 2657177778Sjeff nsn = NULL; 2658177778Sjeff } 2659177778Sjeff sn = devvp->v_rdev->si_snapdata; 2660177778Sjeff /* 2661177778Sjeff * Acquire the snapshot lock. 2662177778Sjeff */ 2663177778Sjeff lockmgr(&sn->sn_lock, 2664177778Sjeff LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, VI_MTX(devvp)); 2665177778Sjeff /* 2666177778Sjeff * Free any unused snapdata. 2667177778Sjeff */ 2668177778Sjeff if (nsn != NULL) 2669177778Sjeff ffs_snapdata_free(nsn); 2670177778Sjeff 2671177778Sjeff return (sn); 2672177778Sjeff} 2673177778Sjeff 2674154065Simp#endif 2675