1139825Simp/*- 262976Smckusick * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. 362976Smckusick * 462976Smckusick * Further information about snapshots can be obtained from: 562976Smckusick * 662976Smckusick * Marshall Kirk McKusick http://www.mckusick.com/softdep/ 762976Smckusick * 1614 Oxford Street mckusick@mckusick.com 862976Smckusick * Berkeley, CA 94709-1608 +1-510-843-9542 962976Smckusick * USA 1062976Smckusick * 1162976Smckusick * Redistribution and use in source and binary forms, with or without 1262976Smckusick * modification, are permitted provided that the following conditions 1362976Smckusick * are met: 1462976Smckusick * 1562976Smckusick * 1. Redistributions of source code must retain the above copyright 1662976Smckusick * notice, this list of conditions and the following disclaimer. 1762976Smckusick * 2. Redistributions in binary form must reproduce the above copyright 1862976Smckusick * notice, this list of conditions and the following disclaimer in the 1962976Smckusick * documentation and/or other materials provided with the distribution. 2062976Smckusick * 2162976Smckusick * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY 2262976Smckusick * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 2362976Smckusick * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 2462976Smckusick * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR 2562976Smckusick * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2662976Smckusick * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2762976Smckusick * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2862976Smckusick * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2962976Smckusick * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3062976Smckusick * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3162976Smckusick * SUCH DAMAGE. 3262976Smckusick * 3363788Smckusick * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00 3462976Smckusick */ 3562976Smckusick 36116192Sobrien#include <sys/cdefs.h> 37116192Sobrien__FBSDID("$FreeBSD: releng/11.0/sys/ufs/ffs/ffs_snapshot.c 297791 2016-04-10 21:48:11Z pfg $"); 38116192Sobrien 39158322Stegge#include "opt_quota.h" 40158322Stegge 4162976Smckusick#include <sys/param.h> 42105191Smckusick#include <sys/kernel.h> 4362976Smckusick#include <sys/systm.h> 4473942Smckusick#include <sys/conf.h> 4562976Smckusick#include <sys/bio.h> 4662976Smckusick#include <sys/buf.h> 47177785Skib#include <sys/fcntl.h> 4862976Smckusick#include <sys/proc.h> 4962976Smckusick#include <sys/namei.h> 50113376Sjeff#include <sys/sched.h> 5162976Smckusick#include <sys/stat.h> 5262976Smckusick#include <sys/malloc.h> 5362976Smckusick#include <sys/mount.h> 5462976Smckusick#include <sys/resource.h> 5562976Smckusick#include <sys/resourcevar.h> 56251171Sjeff#include <sys/rwlock.h> 5762976Smckusick#include <sys/vnode.h> 5862976Smckusick 59137035Sphk#include <geom/geom.h> 60137035Sphk 6162976Smckusick#include <ufs/ufs/extattr.h> 6262976Smckusick#include <ufs/ufs/quota.h> 6362976Smckusick#include <ufs/ufs/ufsmount.h> 6462976Smckusick#include <ufs/ufs/inode.h> 6562976Smckusick#include <ufs/ufs/ufs_extern.h> 6662976Smckusick 6762976Smckusick#include <ufs/ffs/fs.h> 6862976Smckusick#include <ufs/ffs/ffs_extern.h> 6962976Smckusick 7091420Sjhb#define KERNCRED thread0.td_ucred 7165998Sdes#define DEBUG 1 7262976Smckusick 73154065Simp#include "opt_ffs.h" 74154065Simp 75154065Simp#ifdef NO_FFS_SNAPSHOT 76154065Simpint 77154065Simpffs_snapshot(mp, snapfile) 78154065Simp struct mount *mp; 79154065Simp char *snapfile; 80154065Simp{ 81154065Simp return (EINVAL); 82154065Simp} 83154065Simp 84154065Simpint 85223127Smckusickffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd) 86154065Simp struct fs *fs; 87154065Simp struct vnode *devvp; 88154065Simp ufs2_daddr_t bno; 89154065Simp long size; 90154065Simp ino_t inum; 91223127Smckusick enum vtype vtype; 92223020Smckusick struct workhead *wkhd; 93154065Simp{ 94154065Simp return (EINVAL); 95154065Simp} 96154065Simp 97154065Simpvoid 98154065Simpffs_snapremove(vp) 99154065Simp struct vnode *vp; 100154065Simp{ 101154065Simp} 102154065Simp 103154065Simpvoid 104154065Simpffs_snapshot_mount(mp) 105154065Simp struct mount *mp; 106154065Simp{ 107154065Simp} 108154065Simp 109154065Simpvoid 110154065Simpffs_snapshot_unmount(mp) 111154065Simp struct mount *mp; 112154065Simp{ 113154065Simp} 114154065Simp 115154065Simpvoid 116154065Simpffs_snapgone(ip) 117154065Simp struct inode *ip; 118154065Simp{ 119154065Simp} 120154065Simp 121154065Simpint 122154065Simpffs_copyonwrite(devvp, bp) 123154065Simp struct vnode *devvp; 124154065Simp struct buf *bp; 125154065Simp{ 126154065Simp return (EINVAL); 127154065Simp} 128154065Simp 129223020Smckusickvoid 130223020Smckusickffs_sync_snap(mp, waitfor) 131223020Smckusick struct mount *mp; 132223020Smckusick int waitfor; 133223020Smckusick{ 134223020Smckusick} 135223020Smckusick 136154065Simp#else 137218485SnetchildFEATURE(ffs_snapshot, "FFS snapshot support"); 138154065Simp 139177778SjeffLIST_HEAD(, snapdata) snapfree; 140177778Sjeffstatic struct mtx snapfree_lock; 141177778SjeffMTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF); 142177778Sjeff 14392728Salfredstatic int cgaccount(int, struct vnode *, struct buf *, int); 14498542Smckusickstatic int expunge_ufs1(struct vnode *, struct inode *, struct fs *, 14598542Smckusick int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 146207141Sjeff ufs_lbn_t, int), int, int); 14798542Smckusickstatic int indiracct_ufs1(struct vnode *, struct vnode *, int, 14898542Smckusick ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 14998542Smckusick int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, 15098542Smckusick ufs_lbn_t, int), int); 15198542Smckusickstatic int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 15298542Smckusick struct fs *, ufs_lbn_t, int); 15398542Smckusickstatic int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 15498542Smckusick struct fs *, ufs_lbn_t, int); 15598542Smckusickstatic int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 15698542Smckusick struct fs *, ufs_lbn_t, int); 15798542Smckusickstatic int expunge_ufs2(struct vnode *, struct inode *, struct fs *, 15898542Smckusick int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 159207141Sjeff ufs_lbn_t, int), int, int); 16098542Smckusickstatic int indiracct_ufs2(struct vnode *, struct vnode *, int, 16198542Smckusick ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, 16298542Smckusick int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, 16398542Smckusick ufs_lbn_t, int), int); 16498542Smckusickstatic int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 16598542Smckusick struct fs *, ufs_lbn_t, int); 16698542Smckusickstatic int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 16798542Smckusick struct fs *, ufs_lbn_t, int); 16898542Smckusickstatic int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 16998542Smckusick struct fs *, ufs_lbn_t, int); 170135138Sphkstatic int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t); 171177778Sjeffstatic void try_free_snapdata(struct vnode *devvp); 172177778Sjeffstatic struct snapdata *ffs_snapdata_acquire(struct vnode *devvp); 173166193Skibstatic int ffs_bp_snapblk(struct vnode *, struct buf *); 17462976Smckusick 17576580Smckusick/* 17676580Smckusick * To ensure the consistency of snapshots across crashes, we must 17776580Smckusick * synchronously write out copied blocks before allowing the 17876580Smckusick * originals to be modified. Because of the rather severe speed 179223127Smckusick * penalty that this imposes, the code normally only ensures 180223127Smckusick * persistence for the filesystem metadata contained within a 181223127Smckusick * snapshot. Setting the following flag allows this crash 182223127Smckusick * persistence to be enabled for file contents. 18376580Smckusick */ 18476580Smckusickint dopersistence = 0; 18576580Smckusick 18662976Smckusick#ifdef DEBUG 18762976Smckusick#include <sys/sysctl.h> 18876580SmckusickSYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, ""); 189114293Smarkmstatic int snapdebug = 0; 19062976SmckusickSYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, ""); 19187827Smckusickint collectsnapstats = 0; 19287827SmckusickSYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats, 19387827Smckusick 0, ""); 19462976Smckusick#endif /* DEBUG */ 19562976Smckusick 19662976Smckusick/* 19762976Smckusick * Create a snapshot file and initialize it for the filesystem. 19862976Smckusick */ 19962976Smckusickint 20062976Smckusickffs_snapshot(mp, snapfile) 20162976Smckusick struct mount *mp; 20262976Smckusick char *snapfile; 20362976Smckusick{ 204111240Smckusick ufs2_daddr_t numblks, blkno, *blkp, *snapblklist; 20598542Smckusick int error, cg, snaploc; 20690098Smckusick int i, size, len, loc; 207232351Smckusick ufs2_daddr_t blockno; 208225806Smckusick uint64_t flag; 20987827Smckusick struct timespec starttime = {0, 0}, endtime; 21087827Smckusick char saved_nice = 0; 211111240Smckusick long redo = 0, snaplistsize = 0; 21276269Smckusick int32_t *lp; 21371073Siedowse void *space; 214140706Sjeff struct fs *copy_fs = NULL, *fs; 21583366Sjulian struct thread *td = curthread; 21673942Smckusick struct inode *ip, *xp; 217225807Smckusick struct buf *bp, *nbp, *ibp; 21862976Smckusick struct nameidata nd; 21962976Smckusick struct mount *wrtmp; 22062976Smckusick struct vattr vat; 221154152Stegge struct vnode *vp, *xvp, *mvp, *devvp; 222104698Smckusick struct uio auio; 223104698Smckusick struct iovec aiov; 224135138Sphk struct snapdata *sn; 225140706Sjeff struct ufsmount *ump; 22662976Smckusick 227140706Sjeff ump = VFSTOUFS(mp); 228140706Sjeff fs = ump->um_fs; 229158632Stegge sn = NULL; 230230250Smckusick /* 231230250Smckusick * At the moment, journaled soft updates cannot support 232230250Smckusick * taking snapshots. 233230250Smckusick */ 234230250Smckusick if (MOUNTEDSUJ(mp)) { 235230250Smckusick vfs_mount_error(mp, "%s: Snapshots are not yet supported when " 236230250Smckusick "running with journaled soft updates", fs->fs_fsmnt); 237230250Smckusick return (EOPNOTSUPP); 238230250Smckusick } 239162647Stegge MNT_ILOCK(mp); 240162647Stegge flag = mp->mnt_flag; 241162647Stegge MNT_IUNLOCK(mp); 242135138Sphk /* 24362976Smckusick * Need to serialize access to snapshot code per filesystem. 24462976Smckusick */ 24562976Smckusick /* 24662976Smckusick * Assign a snapshot slot in the superblock. 24762976Smckusick */ 248140706Sjeff UFS_LOCK(ump); 24962976Smckusick for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 25062976Smckusick if (fs->fs_snapinum[snaploc] == 0) 25162976Smckusick break; 252140706Sjeff UFS_UNLOCK(ump); 25362976Smckusick if (snaploc == FSMAXSNAP) 25462976Smckusick return (ENOSPC); 25562976Smckusick /* 25662976Smckusick * Create the snapshot file. 25762976Smckusick */ 25862976Smckusickrestart: 259275897Skib NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF | NOCACHE, UIO_SYSSPACE, 260275897Skib snapfile, td); 26162976Smckusick if ((error = namei(&nd)) != 0) 26262976Smckusick return (error); 26362976Smckusick if (nd.ni_vp != NULL) { 26462976Smckusick vput(nd.ni_vp); 26562976Smckusick error = EEXIST; 26662976Smckusick } 26762976Smckusick if (nd.ni_dvp->v_mount != mp) 26862976Smckusick error = EXDEV; 26962976Smckusick if (error) { 27062976Smckusick NDFREE(&nd, NDF_ONLY_PNBUF); 27162976Smckusick if (nd.ni_dvp == nd.ni_vp) 27262976Smckusick vrele(nd.ni_dvp); 27362976Smckusick else 27462976Smckusick vput(nd.ni_dvp); 27562976Smckusick return (error); 27662976Smckusick } 27762976Smckusick VATTR_NULL(&vat); 27862976Smckusick vat.va_type = VREG; 27962976Smckusick vat.va_mode = S_IRUSR; 28062976Smckusick vat.va_vaflags |= VA_EXCLUSIVE; 28162976Smckusick if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp)) 28262976Smckusick wrtmp = NULL; 28362976Smckusick if (wrtmp != mp) 28462976Smckusick panic("ffs_snapshot: mount mismatch"); 285157325Sjeff vfs_rel(wrtmp); 28662985Smckusick if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) { 28762976Smckusick NDFREE(&nd, NDF_ONLY_PNBUF); 28862976Smckusick vput(nd.ni_dvp); 28962985Smckusick if ((error = vn_start_write(NULL, &wrtmp, 29062985Smckusick V_XSLEEP | PCATCH)) != 0) 29162976Smckusick return (error); 29262976Smckusick goto restart; 29362976Smckusick } 29462976Smckusick error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat); 295175294Sattilio VOP_UNLOCK(nd.ni_dvp, 0); 29662976Smckusick if (error) { 29762976Smckusick NDFREE(&nd, NDF_ONLY_PNBUF); 29862976Smckusick vn_finished_write(wrtmp); 299156895Stegge vrele(nd.ni_dvp); 30062976Smckusick return (error); 30162976Smckusick } 30262976Smckusick vp = nd.ni_vp; 303166142Smpp vp->v_vflag |= VV_SYSTEM; 30462976Smckusick ip = VTOI(vp); 305107414Smckusick devvp = ip->i_devvp; 30662976Smckusick /* 30762976Smckusick * Allocate and copy the last block contents so as to be able 30862976Smckusick * to set size to that of the filesystem. 30962976Smckusick */ 31062976Smckusick numblks = howmany(fs->fs_size, fs->fs_frag); 31176132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 31298658Sdillon fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 31362976Smckusick if (error) 31462976Smckusick goto out; 31562976Smckusick ip->i_size = lblktosize(fs, (off_t)numblks); 316132775Skan DIP_SET(ip, i_size, ip->i_size); 31762976Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 318158633Stegge error = readblock(vp, bp, numblks - 1); 319158633Stegge bawrite(bp); 320158633Stegge if (error != 0) 32162976Smckusick goto out; 32262976Smckusick /* 32362976Smckusick * Preallocate critical data structures so that we can copy 32462976Smckusick * them in without further allocation after we suspend all 32562976Smckusick * operations on the filesystem. We would like to just release 32662976Smckusick * the allocated buffers without writing them since they will 32762976Smckusick * be filled in below once we are ready to go, but this upsets 32862976Smckusick * the soft update code, so we go ahead and write the new buffers. 32962976Smckusick * 33075993Smckusick * Allocate all indirect blocks and mark all of them as not 33175993Smckusick * needing to be copied. 33262976Smckusick */ 33362976Smckusick for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 33476132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 33598658Sdillon fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp); 33662976Smckusick if (error) 33762976Smckusick goto out; 338107406Smckusick bawrite(ibp); 33962976Smckusick } 34062976Smckusick /* 34162976Smckusick * Allocate copies for the superblock and its summary information. 34262976Smckusick */ 343107294Smckusick error = UFS_BALLOC(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED, 344107294Smckusick 0, &nbp); 34576269Smckusick if (error) 34662976Smckusick goto out; 34762976Smckusick bawrite(nbp); 34862976Smckusick blkno = fragstoblks(fs, fs->fs_csaddr); 34962976Smckusick len = howmany(fs->fs_cssize, fs->fs_bsize); 35062976Smckusick for (loc = 0; loc < len; loc++) { 35176132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)), 35262976Smckusick fs->fs_bsize, KERNCRED, 0, &nbp); 35362976Smckusick if (error) 35462976Smckusick goto out; 35562976Smckusick bawrite(nbp); 35662976Smckusick } 35762976Smckusick /* 35887827Smckusick * Allocate all cylinder group blocks. 35987827Smckusick */ 36087827Smckusick for (cg = 0; cg < fs->fs_ncg; cg++) { 361111238Smckusick error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 36287827Smckusick fs->fs_bsize, KERNCRED, 0, &nbp); 36387827Smckusick if (error) 36487827Smckusick goto out; 365107406Smckusick bawrite(nbp); 366184934Sambrisko if (cg % 10 == 0) 367233438Smckusick ffs_syncvnode(vp, MNT_WAIT, 0); 36887827Smckusick } 36987827Smckusick /* 37087827Smckusick * Copy all the cylinder group maps. Although the 37187827Smckusick * filesystem is still active, we hope that only a few 37287827Smckusick * cylinder groups will change between now and when we 37387827Smckusick * suspend operations. Thus, we will be able to quickly 37487827Smckusick * touch up the few cylinder groups that changed during 37587827Smckusick * the suspension period. 37687827Smckusick */ 37789450Smckusick len = howmany(fs->fs_ncg, NBBY); 378184205Sdes space = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 379140706Sjeff UFS_LOCK(ump); 380140706Sjeff fs->fs_active = space; 381140706Sjeff UFS_UNLOCK(ump); 38287827Smckusick for (cg = 0; cg < fs->fs_ncg; cg++) { 383111238Smckusick error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 384107558Smckusick fs->fs_bsize, KERNCRED, 0, &nbp); 385107558Smckusick if (error) 38687827Smckusick goto out; 38787827Smckusick error = cgaccount(cg, vp, nbp, 1); 38887827Smckusick bawrite(nbp); 389184934Sambrisko if (cg % 10 == 0) 390233438Smckusick ffs_syncvnode(vp, MNT_WAIT, 0); 39187827Smckusick if (error) 39287827Smckusick goto out; 39387827Smckusick } 39487827Smckusick /* 39562976Smckusick * Change inode to snapshot type file. 39662976Smckusick */ 39763897Smckusick ip->i_flags |= SF_SNAPSHOT; 398132775Skan DIP_SET(ip, i_flags, ip->i_flags); 39962976Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 40062976Smckusick /* 40162976Smckusick * Ensure that the snapshot is completely on disk. 402107406Smckusick * Since we have marked it as a snapshot it is safe to 403107406Smckusick * unlock it as no process will be allowed to write to it. 40462976Smckusick */ 405233438Smckusick if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) 40662976Smckusick goto out; 407175294Sattilio VOP_UNLOCK(vp, 0); 40862976Smckusick /* 40962976Smckusick * All allocations are done, so we can now snapshot the system. 41062976Smckusick * 41187827Smckusick * Recind nice scheduling while running with the filesystem suspended. 41287827Smckusick */ 413130551Sjulian if (td->td_proc->p_nice > 0) { 414170307Sjeff struct proc *p; 415170307Sjeff 416170307Sjeff p = td->td_proc; 417170307Sjeff PROC_LOCK(p); 418170307Sjeff saved_nice = p->p_nice; 419170307Sjeff sched_nice(p, 0); 420170307Sjeff PROC_UNLOCK(p); 42187827Smckusick } 42287827Smckusick /* 42362976Smckusick * Suspend operation on filesystem. 42462976Smckusick */ 42562976Smckusick for (;;) { 42662976Smckusick vn_finished_write(wrtmp); 427253106Skib if ((error = vfs_write_suspend(vp->v_mount, 0)) != 0) { 428105902Smckusick vn_start_write(NULL, &wrtmp, V_WAIT); 429175202Sattilio vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 430105902Smckusick goto out; 431105902Smckusick } 43262976Smckusick if (mp->mnt_kern_flag & MNTK_SUSPENDED) 43362976Smckusick break; 43462985Smckusick vn_start_write(NULL, &wrtmp, V_WAIT); 43562976Smckusick } 436175202Sattilio vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 437158262Stegge if (ip->i_effnlink == 0) { 438158262Stegge error = ENOENT; /* Snapshot file unlinked */ 439158262Stegge goto out1; 440158262Stegge } 44190098Smckusick if (collectsnapstats) 44290098Smckusick nanotime(&starttime); 443158634Stegge 444158634Stegge /* The last block might have changed. Copy it again to be sure. */ 445158634Stegge error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 446158634Stegge fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 447158634Stegge if (error != 0) 448158634Stegge goto out1; 449158634Stegge error = readblock(vp, bp, numblks - 1); 450158634Stegge bp->b_flags |= B_VALIDSUSPWRT; 451158634Stegge bawrite(bp); 452158634Stegge if (error != 0) 453158634Stegge goto out1; 45462976Smckusick /* 45587827Smckusick * First, copy all the cylinder group maps that have changed. 45662976Smckusick */ 45762976Smckusick for (cg = 0; cg < fs->fs_ncg; cg++) { 45888138Smckusick if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0) 45987827Smckusick continue; 46087827Smckusick redo++; 461111238Smckusick error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 462107558Smckusick fs->fs_bsize, KERNCRED, 0, &nbp); 463107558Smckusick if (error) 46462976Smckusick goto out1; 46587827Smckusick error = cgaccount(cg, vp, nbp, 2); 46689450Smckusick bawrite(nbp); 46787827Smckusick if (error) 46862976Smckusick goto out1; 46962976Smckusick } 47062976Smckusick /* 47176269Smckusick * Grab a copy of the superblock and its summary information. 47276269Smckusick * We delay writing it until the suspension is released below. 47376269Smckusick */ 474225807Smckusick copy_fs = malloc((u_long)fs->fs_bsize, M_UFSMNT, M_WAITOK); 47576269Smckusick bcopy(fs, copy_fs, fs->fs_sbsize); 47676269Smckusick if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) 47776269Smckusick copy_fs->fs_clean = 1; 478111972Smckusick size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE; 479111972Smckusick if (fs->fs_sbsize < size) 480225807Smckusick bzero(&((char *)copy_fs)[fs->fs_sbsize], 481225807Smckusick size - fs->fs_sbsize); 48276269Smckusick size = blkroundup(fs, fs->fs_cssize); 48376269Smckusick if (fs->fs_contigsumsize > 0) 48476269Smckusick size += fs->fs_ncg * sizeof(int32_t); 485111119Simp space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 48676269Smckusick copy_fs->fs_csp = space; 48776269Smckusick bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize); 488130246Sstefanf space = (char *)space + fs->fs_cssize; 48976269Smckusick loc = howmany(fs->fs_cssize, fs->fs_fsize); 49076356Smckusick i = fs->fs_frag - loc % fs->fs_frag; 49176356Smckusick len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize; 49276356Smckusick if (len > 0) { 493107414Smckusick if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc), 49476269Smckusick len, KERNCRED, &bp)) != 0) { 49590098Smckusick brelse(bp); 49676269Smckusick free(copy_fs->fs_csp, M_UFSMNT); 497225807Smckusick free(copy_fs, M_UFSMNT); 498225807Smckusick copy_fs = NULL; 49976269Smckusick goto out1; 50062976Smckusick } 50176269Smckusick bcopy(bp->b_data, space, (u_int)len); 502130246Sstefanf space = (char *)space + len; 50376269Smckusick bp->b_flags |= B_INVAL | B_NOCACHE; 50476269Smckusick brelse(bp); 50562976Smckusick } 50676269Smckusick if (fs->fs_contigsumsize > 0) { 50776269Smckusick copy_fs->fs_maxcluster = lp = space; 50876269Smckusick for (i = 0; i < fs->fs_ncg; i++) 50976269Smckusick *lp++ = fs->fs_contigsumsize; 51076269Smckusick } 51162976Smckusick /* 51290098Smckusick * We must check for active files that have been unlinked 51390098Smckusick * (e.g., with a zero link count). We have to expunge all 51490098Smckusick * trace of these files from the snapshot so that they are 51590098Smckusick * not reclaimed prematurely by fsck or unnecessarily dumped. 51690098Smckusick * We turn off the MNTK_SUSPENDED flag to avoid a panic from 51790098Smckusick * spec_strategy about writing on a suspended filesystem. 518104698Smckusick * Note that we skip unlinked snapshot files as they will 519104698Smckusick * be handled separately below. 520111240Smckusick * 521111240Smckusick * We also calculate the needed size for the snapshot list. 52290098Smckusick */ 523111240Smckusick snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) + 524111240Smckusick FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */; 525140706Sjeff MNT_ILOCK(mp); 52690098Smckusick mp->mnt_kern_flag &= ~MNTK_SUSPENDED; 527234386Smckusick MNT_IUNLOCK(mp); 52890098Smckusickloop: 529234386Smckusick MNT_VNODE_FOREACH_ALL(xvp, mp, mvp) { 530234386Smckusick if ((xvp->v_usecount == 0 && 531156560Stegge (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) || 532156560Stegge xvp->v_type == VNON || 533232351Smckusick IS_SNAPSHOT(VTOI(xvp))) { 534120740Sjeff VI_UNLOCK(xvp); 53590098Smckusick continue; 53690098Smckusick } 537130690Skuriyama /* 538130690Skuriyama * We can skip parent directory vnode because it must have 539130690Skuriyama * this snapshot file in it. 540130690Skuriyama */ 541130690Skuriyama if (xvp == nd.ni_dvp) { 542130690Skuriyama VI_UNLOCK(xvp); 543130690Skuriyama continue; 544130690Skuriyama } 545156560Stegge vholdl(xvp); 546175202Sattilio if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK) != 0) { 547234386Smckusick MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 548156560Stegge vdrop(xvp); 54990098Smckusick goto loop; 550120740Sjeff } 551156560Stegge VI_LOCK(xvp); 552156560Stegge if (xvp->v_usecount == 0 && 553156560Stegge (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) { 554156560Stegge VI_UNLOCK(xvp); 555175294Sattilio VOP_UNLOCK(xvp, 0); 556156560Stegge vdrop(xvp); 557156560Stegge continue; 558156560Stegge } 559156560Stegge VI_UNLOCK(xvp); 560124119Skan if (snapdebug) 561124119Skan vprint("ffs_snapshot: busy vnode", xvp); 562182371Sattilio if (VOP_GETATTR(xvp, &vat, td->td_ucred) == 0 && 563120740Sjeff vat.va_nlink > 0) { 564175294Sattilio VOP_UNLOCK(xvp, 0); 565156560Stegge vdrop(xvp); 566120740Sjeff continue; 567120740Sjeff } 56890098Smckusick xp = VTOI(xvp); 569111239Smckusick if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) { 570175294Sattilio VOP_UNLOCK(xvp, 0); 571156560Stegge vdrop(xvp); 572111239Smckusick continue; 573111239Smckusick } 57490098Smckusick /* 57590098Smckusick * If there is a fragment, clear it here. 57690098Smckusick */ 57790098Smckusick blkno = 0; 57890098Smckusick loc = howmany(xp->i_size, fs->fs_bsize) - 1; 57990098Smckusick if (loc < NDADDR) { 58090098Smckusick len = fragroundup(fs, blkoff(fs, xp->i_size)); 581142074Sdelphij if (len != 0 && len < fs->fs_bsize) { 582140706Sjeff ffs_blkfree(ump, copy_fs, vp, 583207141Sjeff DIP(xp, i_db[loc]), len, xp->i_number, 584223127Smckusick xvp->v_type, NULL); 58598542Smckusick blkno = DIP(xp, i_db[loc]); 586132775Skan DIP_SET(xp, i_db[loc], 0); 58790098Smckusick } 58890098Smckusick } 589111240Smckusick snaplistsize += 1; 59098542Smckusick if (xp->i_ump->um_fstype == UFS1) 59198542Smckusick error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, 592207141Sjeff BLK_NOCOPY, 1); 59398542Smckusick else 59498542Smckusick error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, 595207141Sjeff BLK_NOCOPY, 1); 59690098Smckusick if (blkno) 597132775Skan DIP_SET(xp, i_db[loc], blkno); 59890098Smckusick if (!error) 599140706Sjeff error = ffs_freefile(ump, copy_fs, vp, xp->i_number, 600207141Sjeff xp->i_mode, NULL); 601175294Sattilio VOP_UNLOCK(xvp, 0); 602156560Stegge vdrop(xvp); 60390098Smckusick if (error) { 60490098Smckusick free(copy_fs->fs_csp, M_UFSMNT); 605225807Smckusick free(copy_fs, M_UFSMNT); 606225807Smckusick copy_fs = NULL; 607234386Smckusick MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 60890098Smckusick goto out1; 60990098Smckusick } 61090098Smckusick } 61190098Smckusick /* 612207141Sjeff * Erase the journal file from the snapshot. 613207141Sjeff */ 614207141Sjeff if (fs->fs_flags & FS_SUJ) { 615207141Sjeff error = softdep_journal_lookup(mp, &xvp); 616207141Sjeff if (error) { 617207141Sjeff free(copy_fs->fs_csp, M_UFSMNT); 618225807Smckusick free(copy_fs, M_UFSMNT); 619225807Smckusick copy_fs = NULL; 620207141Sjeff goto out1; 621207141Sjeff } 622207141Sjeff xp = VTOI(xvp); 623207141Sjeff if (xp->i_ump->um_fstype == UFS1) 624207141Sjeff error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, 625207141Sjeff BLK_NOCOPY, 0); 626207141Sjeff else 627207141Sjeff error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, 628207141Sjeff BLK_NOCOPY, 0); 629207141Sjeff vput(xvp); 630207141Sjeff } 631207141Sjeff /* 632177778Sjeff * Acquire a lock on the snapdata structure, creating it if necessary. 633105191Smckusick */ 634177778Sjeff sn = ffs_snapdata_acquire(devvp); 635177778Sjeff /* 636177778Sjeff * Change vnode to use shared snapshot lock instead of the original 637177778Sjeff * private lock. 638177778Sjeff */ 639177778Sjeff vp->v_vnlock = &sn->sn_lock; 640175635Sattilio lockmgr(&vp->v_lock, LK_RELEASE, NULL); 641177778Sjeff xp = TAILQ_FIRST(&sn->sn_head); 642105191Smckusick /* 643111240Smckusick * If this is the first snapshot on this filesystem, then we need 644111240Smckusick * to allocate the space for the list of preallocated snapshot blocks. 645111240Smckusick * This list will be refined below, but this preliminary one will 646111240Smckusick * keep us out of deadlock until the full one is ready. 647111240Smckusick */ 648111240Smckusick if (xp == NULL) { 649184205Sdes snapblklist = malloc(snaplistsize * sizeof(daddr_t), 650111240Smckusick M_UFSMNT, M_WAITOK); 651111240Smckusick blkp = &snapblklist[1]; 652111240Smckusick *blkp++ = lblkno(fs, fs->fs_sblockloc); 653111240Smckusick blkno = fragstoblks(fs, fs->fs_csaddr); 654111240Smckusick for (cg = 0; cg < fs->fs_ncg; cg++) { 655111240Smckusick if (fragstoblks(fs, cgtod(fs, cg) > blkno)) 656111240Smckusick break; 657111240Smckusick *blkp++ = fragstoblks(fs, cgtod(fs, cg)); 658111240Smckusick } 659111240Smckusick len = howmany(fs->fs_cssize, fs->fs_bsize); 660111240Smckusick for (loc = 0; loc < len; loc++) 661111240Smckusick *blkp++ = blkno + loc; 662111240Smckusick for (; cg < fs->fs_ncg; cg++) 663111240Smckusick *blkp++ = fragstoblks(fs, cgtod(fs, cg)); 664111240Smckusick snapblklist[0] = blkp - snapblklist; 665111240Smckusick VI_LOCK(devvp); 666135138Sphk if (sn->sn_blklist != NULL) 667111240Smckusick panic("ffs_snapshot: non-empty list"); 668135138Sphk sn->sn_blklist = snapblklist; 669135138Sphk sn->sn_listsize = blkp - snapblklist; 670111240Smckusick VI_UNLOCK(devvp); 671111240Smckusick } 672111240Smckusick /* 67362976Smckusick * Record snapshot inode. Since this is the newest snapshot, 67462976Smckusick * it must be placed at the end of the list. 67562976Smckusick */ 676107414Smckusick VI_LOCK(devvp); 67762976Smckusick fs->fs_snapinum[snaploc] = ip->i_number; 67873942Smckusick if (ip->i_nextsnap.tqe_prev != 0) 679241011Smdf panic("ffs_snapshot: %ju already on list", 680241011Smdf (uintmax_t)ip->i_number); 681135138Sphk TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap); 682107414Smckusick devvp->v_vflag |= VV_COPYONWRITE; 683107414Smckusick VI_UNLOCK(devvp); 684101308Sjeff ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp"); 68587827Smckusickout1: 686225807Smckusick KASSERT((sn != NULL && copy_fs != NULL && error == 0) || 687225807Smckusick (sn == NULL && copy_fs == NULL && error != 0), 688158632Stegge ("email phk@ and mckusick@")); 68962976Smckusick /* 69062976Smckusick * Resume operation on filesystem. 69162976Smckusick */ 692245286Skib vfs_write_resume(vp->v_mount, VR_START_WRITE | VR_NO_SUSPCLR); 69387827Smckusick if (collectsnapstats && starttime.tv_sec > 0) { 69487827Smckusick nanotime(&endtime); 69587827Smckusick timespecsub(&endtime, &starttime); 696106965Speter printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n", 697106965Speter vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec, 69887827Smckusick endtime.tv_nsec / 1000000, redo, fs->fs_ncg); 69987827Smckusick } 700225807Smckusick if (copy_fs == NULL) 70190098Smckusick goto out; 70290098Smckusick /* 70390098Smckusick * Copy allocation information from all the snapshots in 70490098Smckusick * this snapshot and then expunge them from its view. 70590098Smckusick */ 706135138Sphk TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) { 70790098Smckusick if (xp == ip) 70890098Smckusick break; 70998542Smckusick if (xp->i_ump->um_fstype == UFS1) 71098542Smckusick error = expunge_ufs1(vp, xp, fs, snapacct_ufs1, 711207141Sjeff BLK_SNAP, 0); 71298542Smckusick else 71398542Smckusick error = expunge_ufs2(vp, xp, fs, snapacct_ufs2, 714207141Sjeff BLK_SNAP, 0); 715158527Stegge if (error == 0 && xp->i_effnlink == 0) { 716158527Stegge error = ffs_freefile(ump, 717158527Stegge copy_fs, 718158527Stegge vp, 719158527Stegge xp->i_number, 720207141Sjeff xp->i_mode, NULL); 721158527Stegge } 72298542Smckusick if (error) { 72390098Smckusick fs->fs_snapinum[snaploc] = 0; 72490098Smckusick goto done; 72587827Smckusick } 72690098Smckusick } 72790098Smckusick /* 728111240Smckusick * Allocate space for the full list of preallocated snapshot blocks. 729104698Smckusick */ 730184205Sdes snapblklist = malloc(snaplistsize * sizeof(daddr_t), 731111119Simp M_UFSMNT, M_WAITOK); 732107915Smckusick ip->i_snapblklist = &snapblklist[1]; 733104698Smckusick /* 73490098Smckusick * Expunge the blocks used by the snapshots from the set of 735104698Smckusick * blocks marked as used in the snapshot bitmaps. Also, collect 736107915Smckusick * the list of allocated blocks in i_snapblklist. 73790098Smckusick */ 73898542Smckusick if (ip->i_ump->um_fstype == UFS1) 739207141Sjeff error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, 740207141Sjeff BLK_SNAP, 0); 74198542Smckusick else 742207141Sjeff error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, 743207141Sjeff BLK_SNAP, 0); 74498542Smckusick if (error) { 74590098Smckusick fs->fs_snapinum[snaploc] = 0; 746184205Sdes free(snapblklist, M_UFSMNT); 74790098Smckusick goto done; 74890098Smckusick } 749111240Smckusick if (snaplistsize < ip->i_snapblklist - snapblklist) 750111240Smckusick panic("ffs_snapshot: list too small"); 751107915Smckusick snaplistsize = ip->i_snapblklist - snapblklist; 752107848Smckusick snapblklist[0] = snaplistsize; 753107915Smckusick ip->i_snapblklist = 0; 75490098Smckusick /* 755104698Smckusick * Write out the list of allocated blocks to the end of the snapshot. 756104698Smckusick */ 757104698Smckusick auio.uio_iov = &aiov; 758104698Smckusick auio.uio_iovcnt = 1; 759107848Smckusick aiov.iov_base = (void *)snapblklist; 760107848Smckusick aiov.iov_len = snaplistsize * sizeof(daddr_t); 761201758Smbr auio.uio_resid = aiov.iov_len; 762104698Smckusick auio.uio_offset = ip->i_size; 763104698Smckusick auio.uio_segflg = UIO_SYSSPACE; 764104698Smckusick auio.uio_rw = UIO_WRITE; 765104698Smckusick auio.uio_td = td; 766104698Smckusick if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 767104698Smckusick fs->fs_snapinum[snaploc] = 0; 768184205Sdes free(snapblklist, M_UFSMNT); 769104698Smckusick goto done; 770104698Smckusick } 771104698Smckusick /* 77290098Smckusick * Write the superblock and its summary information 77390098Smckusick * to the snapshot. 77490098Smckusick */ 77590098Smckusick blkno = fragstoblks(fs, fs->fs_csaddr); 77690098Smckusick len = howmany(fs->fs_cssize, fs->fs_bsize); 77790098Smckusick space = copy_fs->fs_csp; 77890098Smckusick for (loc = 0; loc < len; loc++) { 77990098Smckusick error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp); 78090098Smckusick if (error) { 78190098Smckusick brelse(nbp); 78290098Smckusick fs->fs_snapinum[snaploc] = 0; 783184205Sdes free(snapblklist, M_UFSMNT); 78490098Smckusick goto done; 78576269Smckusick } 78690098Smckusick bcopy(space, nbp->b_data, fs->fs_bsize); 78790098Smckusick space = (char *)space + fs->fs_bsize; 78890098Smckusick bawrite(nbp); 78976269Smckusick } 790225807Smckusick error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize, 791225807Smckusick KERNCRED, &nbp); 792225807Smckusick if (error) { 793225807Smckusick brelse(nbp); 794225807Smckusick } else { 795225807Smckusick loc = blkoff(fs, fs->fs_sblockloc); 796253280Skib bcopy((char *)copy_fs, &nbp->b_data[loc], (u_int)fs->fs_sbsize); 797225807Smckusick bawrite(nbp); 798225807Smckusick } 799107848Smckusick /* 800107848Smckusick * As this is the newest list, it is the most inclusive, so 801107848Smckusick * should replace the previous list. 802107848Smckusick */ 803107848Smckusick VI_LOCK(devvp); 804135138Sphk space = sn->sn_blklist; 805135138Sphk sn->sn_blklist = snapblklist; 806135138Sphk sn->sn_listsize = snaplistsize; 807122596Salc VI_UNLOCK(devvp); 808111240Smckusick if (space != NULL) 809184205Sdes free(space, M_UFSMNT); 810151180Stegge /* 811232351Smckusick * Preallocate all the direct blocks in the snapshot inode so 812232351Smckusick * that we never have to write the inode itself to commit an 813232351Smckusick * update to the contents of the snapshot. Note that once 814232351Smckusick * created, the size of the snapshot will never change, so 815232351Smckusick * there will never be a need to write the inode except to 816232351Smckusick * update the non-integrity-critical time fields and 817232351Smckusick * allocated-block count. 818151180Stegge */ 819232351Smckusick for (blockno = 0; blockno < NDADDR; blockno++) { 820232351Smckusick if (DIP(ip, i_db[blockno]) != 0) 821232351Smckusick continue; 822232351Smckusick error = UFS_BALLOC(vp, lblktosize(fs, blockno), 823232351Smckusick fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 824232351Smckusick if (error) 825232351Smckusick break; 826232351Smckusick error = readblock(vp, bp, blockno); 827232351Smckusick bawrite(bp); 828232351Smckusick if (error != 0) 829232351Smckusick break; 830232351Smckusick } 83190098Smckusickdone: 832184205Sdes free(copy_fs->fs_csp, M_UFSMNT); 833225807Smckusick free(copy_fs, M_UFSMNT); 834225807Smckusick copy_fs = NULL; 83562976Smckusickout: 836168576Skib NDFREE(&nd, NDF_ONLY_PNBUF); 837113872Sjhb if (saved_nice > 0) { 838170307Sjeff struct proc *p; 839170307Sjeff 840170307Sjeff p = td->td_proc; 841170307Sjeff PROC_LOCK(p); 842130551Sjulian sched_nice(td->td_proc, saved_nice); 843113872Sjhb PROC_UNLOCK(td->td_proc); 844113872Sjhb } 845140706Sjeff UFS_LOCK(ump); 84687827Smckusick if (fs->fs_active != 0) { 847184205Sdes free(fs->fs_active, M_DEVBUF); 84887827Smckusick fs->fs_active = 0; 84987827Smckusick } 850140706Sjeff UFS_UNLOCK(ump); 851162647Stegge MNT_ILOCK(mp); 852162652Stegge mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA); 853162647Stegge MNT_IUNLOCK(mp); 85476269Smckusick if (error) 855234605Strasz (void) ffs_truncate(vp, (off_t)0, 0, NOCRED); 856233438Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, 0); 85762976Smckusick if (error) 85862976Smckusick vput(vp); 85962976Smckusick else 860175294Sattilio VOP_UNLOCK(vp, 0); 861156895Stegge vrele(nd.ni_dvp); 86262976Smckusick vn_finished_write(wrtmp); 863156560Stegge process_deferred_inactive(mp); 86462976Smckusick return (error); 86562976Smckusick} 86662976Smckusick 86762976Smckusick/* 86887827Smckusick * Copy a cylinder group map. All the unallocated blocks are marked 86987827Smckusick * BLK_NOCOPY so that the snapshot knows that it need not copy them 87092363Smckusick * if they are later written. If passno is one, then this is a first 87192363Smckusick * pass, so only setting needs to be done. If passno is 2, then this 87287827Smckusick * is a revision to a previous pass which must be undone as the 87387827Smckusick * replacement pass is done. 87487827Smckusick */ 87587827Smckusickstatic int 87687827Smckusickcgaccount(cg, vp, nbp, passno) 87787827Smckusick int cg; 87887827Smckusick struct vnode *vp; 87987827Smckusick struct buf *nbp; 88087827Smckusick int passno; 88187827Smckusick{ 88287827Smckusick struct buf *bp, *ibp; 88387827Smckusick struct inode *ip; 88487827Smckusick struct cg *cgp; 88587827Smckusick struct fs *fs; 88698542Smckusick ufs2_daddr_t base, numblks; 88798542Smckusick int error, len, loc, indiroff; 88887827Smckusick 88987827Smckusick ip = VTOI(vp); 89087827Smckusick fs = ip->i_fs; 89187827Smckusick error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 89287827Smckusick (int)fs->fs_cgsize, KERNCRED, &bp); 89387827Smckusick if (error) { 89487827Smckusick brelse(bp); 89587827Smckusick return (error); 89687827Smckusick } 89787827Smckusick cgp = (struct cg *)bp->b_data; 89887827Smckusick if (!cg_chkmagic(cgp)) { 89987827Smckusick brelse(bp); 90087827Smckusick return (EIO); 90187827Smckusick } 902140706Sjeff UFS_LOCK(ip->i_ump); 903142879Sjeff ACTIVESET(fs, cg); 904183822Skib /* 905183822Skib * Recomputation of summary information might not have been performed 906183822Skib * at mount time. Sync up summary information for current cylinder 907183822Skib * group while data is in memory to ensure that result of background 908183822Skib * fsck is slightly more consistent. 909183822Skib */ 910183822Skib fs->fs_cs(fs, cg) = cgp->cg_cs; 911140706Sjeff UFS_UNLOCK(ip->i_ump); 91287827Smckusick bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize); 91387827Smckusick if (fs->fs_cgsize < fs->fs_bsize) 91487827Smckusick bzero(&nbp->b_data[fs->fs_cgsize], 91587827Smckusick fs->fs_bsize - fs->fs_cgsize); 916151178Stegge cgp = (struct cg *)nbp->b_data; 917151178Stegge bqrelse(bp); 91887827Smckusick if (passno == 2) 91987827Smckusick nbp->b_flags |= B_VALIDSUSPWRT; 92087827Smckusick numblks = howmany(fs->fs_size, fs->fs_frag); 92187827Smckusick len = howmany(fs->fs_fpg, fs->fs_frag); 922138634Smckusick base = cgbase(fs, cg) / fs->fs_frag; 92387827Smckusick if (base + len >= numblks) 92487827Smckusick len = numblks - base - 1; 92587827Smckusick loc = 0; 92687827Smckusick if (base < NDADDR) { 92787827Smckusick for ( ; loc < NDADDR; loc++) { 92887827Smckusick if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 929132775Skan DIP_SET(ip, i_db[loc], BLK_NOCOPY); 93098542Smckusick else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY) 931132775Skan DIP_SET(ip, i_db[loc], 0); 93298542Smckusick else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY) 93387827Smckusick panic("ffs_snapshot: lost direct block"); 93487827Smckusick } 93587827Smckusick } 93687827Smckusick error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), 93798658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 93887827Smckusick if (error) { 93987827Smckusick return (error); 94087827Smckusick } 94187827Smckusick indiroff = (base + loc - NDADDR) % NINDIR(fs); 94287827Smckusick for ( ; loc < len; loc++, indiroff++) { 94387827Smckusick if (indiroff >= NINDIR(fs)) { 94487827Smckusick if (passno == 2) 94587827Smckusick ibp->b_flags |= B_VALIDSUSPWRT; 94687827Smckusick bawrite(ibp); 94787827Smckusick error = UFS_BALLOC(vp, 94887827Smckusick lblktosize(fs, (off_t)(base + loc)), 94998658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 95087827Smckusick if (error) { 95187827Smckusick return (error); 95287827Smckusick } 95387827Smckusick indiroff = 0; 95487827Smckusick } 95598542Smckusick if (ip->i_ump->um_fstype == UFS1) { 95698542Smckusick if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 95798542Smckusick ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 95898542Smckusick BLK_NOCOPY; 95998542Smckusick else if (passno == 2 && ((ufs1_daddr_t *)(ibp->b_data)) 96098542Smckusick [indiroff] == BLK_NOCOPY) 96198542Smckusick ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 0; 96298542Smckusick else if (passno == 1 && ((ufs1_daddr_t *)(ibp->b_data)) 96398542Smckusick [indiroff] == BLK_NOCOPY) 96498542Smckusick panic("ffs_snapshot: lost indirect block"); 96598542Smckusick continue; 96698542Smckusick } 96787827Smckusick if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 96898542Smckusick ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY; 96987827Smckusick else if (passno == 2 && 97098542Smckusick ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 97198542Smckusick ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 0; 97287827Smckusick else if (passno == 1 && 97398542Smckusick ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) 97487827Smckusick panic("ffs_snapshot: lost indirect block"); 97587827Smckusick } 97687827Smckusick if (passno == 2) 97787827Smckusick ibp->b_flags |= B_VALIDSUSPWRT; 97887827Smckusick bdwrite(ibp); 97987827Smckusick return (0); 98087827Smckusick} 98187827Smckusick 98287827Smckusick/* 98376269Smckusick * Before expunging a snapshot inode, note all the 98476269Smckusick * blocks that it claims with BLK_SNAP so that fsck will 98576269Smckusick * be able to account for those blocks properly and so 98676269Smckusick * that this snapshot knows that it need not copy them 98798542Smckusick * if the other snapshot holding them is freed. This code 98898542Smckusick * is reproduced once each for UFS1 and UFS2. 98976269Smckusick */ 99076269Smckusickstatic int 991207141Sjeffexpunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype, clearmode) 99290098Smckusick struct vnode *snapvp; 99390098Smckusick struct inode *cancelip; 99476269Smckusick struct fs *fs; 99598542Smckusick int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 99698542Smckusick struct fs *, ufs_lbn_t, int); 99790098Smckusick int expungetype; 998207141Sjeff int clearmode; 99976269Smckusick{ 100098542Smckusick int i, error, indiroff; 100198542Smckusick ufs_lbn_t lbn, rlbn; 100298542Smckusick ufs2_daddr_t len, blkno, numblks, blksperindir; 100398542Smckusick struct ufs1_dinode *dip; 100490098Smckusick struct thread *td = curthread; 100576269Smckusick struct buf *bp; 100676269Smckusick 100776269Smckusick /* 100890098Smckusick * Prepare to expunge the inode. If its inode block has not 100990098Smckusick * yet been copied, then allocate and fill the copy. 101076269Smckusick */ 101190098Smckusick lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 101290098Smckusick blkno = 0; 101390098Smckusick if (lbn < NDADDR) { 1014107558Smckusick blkno = VTOI(snapvp)->i_din1->di_db[lbn]; 101590098Smckusick } else { 1016207742Sjeff if (DOINGSOFTDEP(snapvp)) 1017207742Sjeff softdep_prealloc(snapvp, MNT_WAIT); 1018121443Sjhb td->td_pflags |= TDP_COWINPROGRESS; 1019141526Sphk error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn), 102098658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 1021121443Sjhb td->td_pflags &= ~TDP_COWINPROGRESS; 102290098Smckusick if (error) 102390098Smckusick return (error); 102490098Smckusick indiroff = (lbn - NDADDR) % NINDIR(fs); 102598542Smckusick blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; 102690098Smckusick bqrelse(bp); 102790098Smckusick } 1028107558Smckusick if (blkno != 0) { 1029107558Smckusick if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 1030107558Smckusick return (error); 1031107558Smckusick } else { 1032141526Sphk error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn), 1033107558Smckusick fs->fs_bsize, KERNCRED, 0, &bp); 1034107558Smckusick if (error) 1035107558Smckusick return (error); 1036135138Sphk if ((error = readblock(snapvp, bp, lbn)) != 0) 1037107558Smckusick return (error); 1038107558Smckusick } 103990098Smckusick /* 104090098Smckusick * Set a snapshot inode to be a zero length file, regular files 1041158527Stegge * or unlinked snapshots to be completely unallocated. 104290098Smckusick */ 104398542Smckusick dip = (struct ufs1_dinode *)bp->b_data + 104498542Smckusick ino_to_fsbo(fs, cancelip->i_number); 1045207141Sjeff if (clearmode || cancelip->i_effnlink == 0) 104690098Smckusick dip->di_mode = 0; 104776269Smckusick dip->di_size = 0; 104876269Smckusick dip->di_blocks = 0; 104976269Smckusick dip->di_flags &= ~SF_SNAPSHOT; 105098542Smckusick bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t)); 105176269Smckusick bdwrite(bp); 1052107848Smckusick /* 1053107848Smckusick * Now go through and expunge all the blocks in the file 1054107848Smckusick * using the function requested. 1055107848Smckusick */ 1056107848Smckusick numblks = howmany(cancelip->i_size, fs->fs_bsize); 1057107848Smckusick if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0], 1058107848Smckusick &cancelip->i_din1->di_db[NDADDR], fs, 0, expungetype))) 1059107848Smckusick return (error); 1060107848Smckusick if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_ib[0], 1061107848Smckusick &cancelip->i_din1->di_ib[NIADDR], fs, -1, expungetype))) 1062107848Smckusick return (error); 1063107848Smckusick blksperindir = 1; 1064107848Smckusick lbn = -NDADDR; 1065107848Smckusick len = numblks - NDADDR; 1066107848Smckusick rlbn = NDADDR; 1067107848Smckusick for (i = 0; len > 0 && i < NIADDR; i++) { 1068107848Smckusick error = indiracct_ufs1(snapvp, ITOV(cancelip), i, 1069107848Smckusick cancelip->i_din1->di_ib[i], lbn, rlbn, len, 1070107848Smckusick blksperindir, fs, acctfunc, expungetype); 1071107848Smckusick if (error) 1072107848Smckusick return (error); 1073107848Smckusick blksperindir *= NINDIR(fs); 1074107848Smckusick lbn -= blksperindir + 1; 1075107848Smckusick len -= blksperindir; 1076107848Smckusick rlbn += blksperindir; 1077107848Smckusick } 107876269Smckusick return (0); 107976269Smckusick} 108076269Smckusick 108176269Smckusick/* 108262976Smckusick * Descend an indirect block chain for vnode cancelvp accounting for all 108362976Smckusick * its indirect blocks in snapvp. 108462976Smckusick */ 108562976Smckusickstatic int 108698542Smckusickindiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 108798542Smckusick blksperindir, fs, acctfunc, expungetype) 108862976Smckusick struct vnode *snapvp; 108962976Smckusick struct vnode *cancelvp; 109062976Smckusick int level; 109198542Smckusick ufs1_daddr_t blkno; 109298542Smckusick ufs_lbn_t lbn; 109398542Smckusick ufs_lbn_t rlbn; 109498542Smckusick ufs_lbn_t remblks; 109598542Smckusick ufs_lbn_t blksperindir; 109676269Smckusick struct fs *fs; 109798542Smckusick int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, 109898542Smckusick struct fs *, ufs_lbn_t, int); 109990098Smckusick int expungetype; 110062976Smckusick{ 110198542Smckusick int error, num, i; 110298542Smckusick ufs_lbn_t subblksperindir; 110362976Smckusick struct indir indirs[NIADDR + 2]; 110498542Smckusick ufs1_daddr_t last, *bap; 110562976Smckusick struct buf *bp; 110662976Smckusick 1107121158Smckusick if (blkno == 0) { 1108121158Smckusick if (expungetype == BLK_NOCOPY) 1109121158Smckusick return (0); 1110121158Smckusick panic("indiracct_ufs1: missing indir"); 1111121158Smckusick } 111262976Smckusick if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 111362976Smckusick return (error); 1114121158Smckusick if (lbn != indirs[num - 1 - level].in_lbn || num < 2) 1115121158Smckusick panic("indiracct_ufs1: botched params"); 111662976Smckusick /* 111762976Smckusick * We have to expand bread here since it will deadlock looking 111862976Smckusick * up the block number for any blocks that are not in the cache. 111962976Smckusick */ 1120111856Sjeff bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 112162976Smckusick bp->b_blkno = fsbtodb(fs, blkno); 112262976Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1123135138Sphk (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) { 112462976Smckusick brelse(bp); 112562976Smckusick return (error); 112662976Smckusick } 112762976Smckusick /* 112862976Smckusick * Account for the block pointers in this indirect block. 112962976Smckusick */ 113062976Smckusick last = howmany(remblks, blksperindir); 113162976Smckusick if (last > NINDIR(fs)) 113262976Smckusick last = NINDIR(fs); 1133184205Sdes bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK); 113476269Smckusick bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 113576269Smckusick bqrelse(bp); 1136107848Smckusick error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, 1137107848Smckusick level == 0 ? rlbn : -1, expungetype); 113862976Smckusick if (error || level == 0) 113962976Smckusick goto out; 114062976Smckusick /* 114162976Smckusick * Account for the block pointers in each of the indirect blocks 114262976Smckusick * in the levels below us. 114362976Smckusick */ 114462976Smckusick subblksperindir = blksperindir / NINDIR(fs); 114562976Smckusick for (lbn++, level--, i = 0; i < last; i++) { 114698542Smckusick error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn, 114790098Smckusick rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 114862976Smckusick if (error) 114962976Smckusick goto out; 115062976Smckusick rlbn += blksperindir; 115162976Smckusick lbn -= blksperindir; 115262976Smckusick remblks -= blksperindir; 115362976Smckusick } 115462976Smckusickout: 1155184205Sdes free(bap, M_DEVBUF); 115662976Smckusick return (error); 115762976Smckusick} 115862976Smckusick 115962976Smckusick/* 116090098Smckusick * Do both snap accounting and map accounting. 116190098Smckusick */ 116290098Smckusickstatic int 116398542Smckusickfullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype) 116490098Smckusick struct vnode *vp; 116598542Smckusick ufs1_daddr_t *oldblkp, *lastblkp; 116690098Smckusick struct fs *fs; 116798542Smckusick ufs_lbn_t lblkno; 116898542Smckusick int exptype; /* BLK_SNAP or BLK_NOCOPY */ 116998542Smckusick{ 117098542Smckusick int error; 117198542Smckusick 117298542Smckusick if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 117398542Smckusick return (error); 117498542Smckusick return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 117598542Smckusick} 117698542Smckusick 117798542Smckusick/* 117898542Smckusick * Identify a set of blocks allocated in a snapshot inode. 117998542Smckusick */ 118098542Smckusickstatic int 118198542Smckusicksnapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 118298542Smckusick struct vnode *vp; 118398542Smckusick ufs1_daddr_t *oldblkp, *lastblkp; 118498542Smckusick struct fs *fs; 118598542Smckusick ufs_lbn_t lblkno; 118690098Smckusick int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 118790098Smckusick{ 118898542Smckusick struct inode *ip = VTOI(vp); 118998542Smckusick ufs1_daddr_t blkno, *blkp; 119098542Smckusick ufs_lbn_t lbn; 119198542Smckusick struct buf *ibp; 119290098Smckusick int error; 119390098Smckusick 119498542Smckusick for ( ; oldblkp < lastblkp; oldblkp++) { 119598542Smckusick blkno = *oldblkp; 119698542Smckusick if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 119798542Smckusick continue; 119898542Smckusick lbn = fragstoblks(fs, blkno); 119998542Smckusick if (lbn < NDADDR) { 120098542Smckusick blkp = &ip->i_din1->di_db[lbn]; 120198542Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 120298542Smckusick } else { 1203141526Sphk error = ffs_balloc_ufs1(vp, lblktosize(fs, (off_t)lbn), 120498658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 120598542Smckusick if (error) 120698542Smckusick return (error); 120798542Smckusick blkp = &((ufs1_daddr_t *)(ibp->b_data)) 120898542Smckusick [(lbn - NDADDR) % NINDIR(fs)]; 120998542Smckusick } 121098542Smckusick /* 121198542Smckusick * If we are expunging a snapshot vnode and we 121298542Smckusick * find a block marked BLK_NOCOPY, then it is 121398542Smckusick * one that has been allocated to this snapshot after 121498542Smckusick * we took our current snapshot and can be ignored. 121598542Smckusick */ 121698542Smckusick if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 121798542Smckusick if (lbn >= NDADDR) 121898542Smckusick brelse(ibp); 121998542Smckusick } else { 122098542Smckusick if (*blkp != 0) 1221121158Smckusick panic("snapacct_ufs1: bad block"); 122298542Smckusick *blkp = expungetype; 122398542Smckusick if (lbn >= NDADDR) 122498542Smckusick bdwrite(ibp); 122598542Smckusick } 122698542Smckusick } 122798542Smckusick return (0); 122898542Smckusick} 122998542Smckusick 123098542Smckusick/* 123198542Smckusick * Account for a set of blocks allocated in a snapshot inode. 123298542Smckusick */ 123398542Smckusickstatic int 123498542Smckusickmapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 123598542Smckusick struct vnode *vp; 123698542Smckusick ufs1_daddr_t *oldblkp, *lastblkp; 123798542Smckusick struct fs *fs; 123898542Smckusick ufs_lbn_t lblkno; 123998542Smckusick int expungetype; 124098542Smckusick{ 124198542Smckusick ufs1_daddr_t blkno; 1242104698Smckusick struct inode *ip; 124398542Smckusick ino_t inum; 1244108050Smckusick int acctit; 124598542Smckusick 1246104698Smckusick ip = VTOI(vp); 1247104698Smckusick inum = ip->i_number; 1248108050Smckusick if (lblkno == -1) 1249108050Smckusick acctit = 0; 1250108050Smckusick else 1251108050Smckusick acctit = 1; 125298542Smckusick for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 125398542Smckusick blkno = *oldblkp; 125498542Smckusick if (blkno == 0 || blkno == BLK_NOCOPY) 125598542Smckusick continue; 1256108050Smckusick if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1257107915Smckusick *ip->i_snapblklist++ = lblkno; 125898542Smckusick if (blkno == BLK_SNAP) 125998542Smckusick blkno = blkstofrags(fs, lblkno); 1260223127Smckusick ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, 1261223127Smckusick vp->v_type, NULL); 126298542Smckusick } 126398542Smckusick return (0); 126498542Smckusick} 126598542Smckusick 126698542Smckusick/* 126798542Smckusick * Before expunging a snapshot inode, note all the 126898542Smckusick * blocks that it claims with BLK_SNAP so that fsck will 126998542Smckusick * be able to account for those blocks properly and so 127098542Smckusick * that this snapshot knows that it need not copy them 127198542Smckusick * if the other snapshot holding them is freed. This code 127298542Smckusick * is reproduced once each for UFS1 and UFS2. 127398542Smckusick */ 127498542Smckusickstatic int 1275207141Sjeffexpunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode) 127698542Smckusick struct vnode *snapvp; 127798542Smckusick struct inode *cancelip; 127898542Smckusick struct fs *fs; 127998542Smckusick int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 128098542Smckusick struct fs *, ufs_lbn_t, int); 128198542Smckusick int expungetype; 1282207141Sjeff int clearmode; 128398542Smckusick{ 128498542Smckusick int i, error, indiroff; 128598542Smckusick ufs_lbn_t lbn, rlbn; 128698542Smckusick ufs2_daddr_t len, blkno, numblks, blksperindir; 128798542Smckusick struct ufs2_dinode *dip; 128898542Smckusick struct thread *td = curthread; 128998542Smckusick struct buf *bp; 129098542Smckusick 129198542Smckusick /* 129298542Smckusick * Prepare to expunge the inode. If its inode block has not 129398542Smckusick * yet been copied, then allocate and fill the copy. 129498542Smckusick */ 129598542Smckusick lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 129698542Smckusick blkno = 0; 129798542Smckusick if (lbn < NDADDR) { 1298107558Smckusick blkno = VTOI(snapvp)->i_din2->di_db[lbn]; 129998542Smckusick } else { 1300207742Sjeff if (DOINGSOFTDEP(snapvp)) 1301207742Sjeff softdep_prealloc(snapvp, MNT_WAIT); 1302121443Sjhb td->td_pflags |= TDP_COWINPROGRESS; 1303141526Sphk error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn), 130498658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 1305121443Sjhb td->td_pflags &= ~TDP_COWINPROGRESS; 130698542Smckusick if (error) 130798542Smckusick return (error); 130898542Smckusick indiroff = (lbn - NDADDR) % NINDIR(fs); 130998542Smckusick blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; 131098542Smckusick bqrelse(bp); 131198542Smckusick } 1312107558Smckusick if (blkno != 0) { 1313107558Smckusick if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 1314107558Smckusick return (error); 1315107558Smckusick } else { 1316141526Sphk error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn), 1317107558Smckusick fs->fs_bsize, KERNCRED, 0, &bp); 1318107558Smckusick if (error) 1319107558Smckusick return (error); 1320135138Sphk if ((error = readblock(snapvp, bp, lbn)) != 0) 1321107558Smckusick return (error); 1322107558Smckusick } 132398542Smckusick /* 132498542Smckusick * Set a snapshot inode to be a zero length file, regular files 132598542Smckusick * to be completely unallocated. 132698542Smckusick */ 132798542Smckusick dip = (struct ufs2_dinode *)bp->b_data + 132898542Smckusick ino_to_fsbo(fs, cancelip->i_number); 1329207141Sjeff if (clearmode || cancelip->i_effnlink == 0) 133098542Smckusick dip->di_mode = 0; 133198542Smckusick dip->di_size = 0; 133298542Smckusick dip->di_blocks = 0; 133398542Smckusick dip->di_flags &= ~SF_SNAPSHOT; 133498542Smckusick bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t)); 133598542Smckusick bdwrite(bp); 1336107848Smckusick /* 1337107848Smckusick * Now go through and expunge all the blocks in the file 1338107848Smckusick * using the function requested. 1339107848Smckusick */ 1340107848Smckusick numblks = howmany(cancelip->i_size, fs->fs_bsize); 1341107848Smckusick if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0], 1342107848Smckusick &cancelip->i_din2->di_db[NDADDR], fs, 0, expungetype))) 1343107848Smckusick return (error); 1344107848Smckusick if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_ib[0], 1345107848Smckusick &cancelip->i_din2->di_ib[NIADDR], fs, -1, expungetype))) 1346107848Smckusick return (error); 1347107848Smckusick blksperindir = 1; 1348107848Smckusick lbn = -NDADDR; 1349107848Smckusick len = numblks - NDADDR; 1350107848Smckusick rlbn = NDADDR; 1351107848Smckusick for (i = 0; len > 0 && i < NIADDR; i++) { 1352107848Smckusick error = indiracct_ufs2(snapvp, ITOV(cancelip), i, 1353107848Smckusick cancelip->i_din2->di_ib[i], lbn, rlbn, len, 1354107848Smckusick blksperindir, fs, acctfunc, expungetype); 1355107848Smckusick if (error) 1356107848Smckusick return (error); 1357107848Smckusick blksperindir *= NINDIR(fs); 1358107848Smckusick lbn -= blksperindir + 1; 1359107848Smckusick len -= blksperindir; 1360107848Smckusick rlbn += blksperindir; 1361107848Smckusick } 136298542Smckusick return (0); 136390098Smckusick} 136490098Smckusick 136590098Smckusick/* 136698542Smckusick * Descend an indirect block chain for vnode cancelvp accounting for all 136798542Smckusick * its indirect blocks in snapvp. 136898542Smckusick */ 136998542Smckusickstatic int 137098542Smckusickindiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 137198542Smckusick blksperindir, fs, acctfunc, expungetype) 137298542Smckusick struct vnode *snapvp; 137398542Smckusick struct vnode *cancelvp; 137498542Smckusick int level; 137598542Smckusick ufs2_daddr_t blkno; 137698542Smckusick ufs_lbn_t lbn; 137798542Smckusick ufs_lbn_t rlbn; 137898542Smckusick ufs_lbn_t remblks; 137998542Smckusick ufs_lbn_t blksperindir; 138098542Smckusick struct fs *fs; 138198542Smckusick int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, 138298542Smckusick struct fs *, ufs_lbn_t, int); 138398542Smckusick int expungetype; 138498542Smckusick{ 138598542Smckusick int error, num, i; 138698542Smckusick ufs_lbn_t subblksperindir; 138798542Smckusick struct indir indirs[NIADDR + 2]; 138898542Smckusick ufs2_daddr_t last, *bap; 138998542Smckusick struct buf *bp; 139098542Smckusick 1391121158Smckusick if (blkno == 0) { 1392121158Smckusick if (expungetype == BLK_NOCOPY) 1393121158Smckusick return (0); 1394121158Smckusick panic("indiracct_ufs2: missing indir"); 1395121158Smckusick } 139698542Smckusick if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 139798542Smckusick return (error); 1398121158Smckusick if (lbn != indirs[num - 1 - level].in_lbn || num < 2) 1399121158Smckusick panic("indiracct_ufs2: botched params"); 140098542Smckusick /* 140198542Smckusick * We have to expand bread here since it will deadlock looking 140298542Smckusick * up the block number for any blocks that are not in the cache. 140398542Smckusick */ 1404111856Sjeff bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 140598542Smckusick bp->b_blkno = fsbtodb(fs, blkno); 140698542Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && 1407135138Sphk (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) { 140898542Smckusick brelse(bp); 140998542Smckusick return (error); 141098542Smckusick } 141198542Smckusick /* 141298542Smckusick * Account for the block pointers in this indirect block. 141398542Smckusick */ 141498542Smckusick last = howmany(remblks, blksperindir); 141598542Smckusick if (last > NINDIR(fs)) 141698542Smckusick last = NINDIR(fs); 1417184205Sdes bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK); 141898542Smckusick bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 141998542Smckusick bqrelse(bp); 1420107848Smckusick error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, 1421107848Smckusick level == 0 ? rlbn : -1, expungetype); 142298542Smckusick if (error || level == 0) 142398542Smckusick goto out; 142498542Smckusick /* 142598542Smckusick * Account for the block pointers in each of the indirect blocks 142698542Smckusick * in the levels below us. 142798542Smckusick */ 142898542Smckusick subblksperindir = blksperindir / NINDIR(fs); 142998542Smckusick for (lbn++, level--, i = 0; i < last; i++) { 143098542Smckusick error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn, 143198542Smckusick rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 143298542Smckusick if (error) 143398542Smckusick goto out; 143498542Smckusick rlbn += blksperindir; 143598542Smckusick lbn -= blksperindir; 143698542Smckusick remblks -= blksperindir; 143798542Smckusick } 143898542Smckusickout: 1439184205Sdes free(bap, M_DEVBUF); 144098542Smckusick return (error); 144198542Smckusick} 144298542Smckusick 144398542Smckusick/* 144498542Smckusick * Do both snap accounting and map accounting. 144598542Smckusick */ 144698542Smckusickstatic int 144798542Smckusickfullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype) 144898542Smckusick struct vnode *vp; 144998542Smckusick ufs2_daddr_t *oldblkp, *lastblkp; 145098542Smckusick struct fs *fs; 145198542Smckusick ufs_lbn_t lblkno; 145298542Smckusick int exptype; /* BLK_SNAP or BLK_NOCOPY */ 145398542Smckusick{ 145498542Smckusick int error; 145598542Smckusick 145698542Smckusick if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 145798542Smckusick return (error); 145898542Smckusick return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 145998542Smckusick} 146098542Smckusick 146198542Smckusick/* 146287827Smckusick * Identify a set of blocks allocated in a snapshot inode. 146362976Smckusick */ 146462976Smckusickstatic int 146598542Smckusicksnapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 146662976Smckusick struct vnode *vp; 146798542Smckusick ufs2_daddr_t *oldblkp, *lastblkp; 146876269Smckusick struct fs *fs; 146998542Smckusick ufs_lbn_t lblkno; 147090098Smckusick int expungetype; /* BLK_SNAP or BLK_NOCOPY */ 147162976Smckusick{ 147262976Smckusick struct inode *ip = VTOI(vp); 147398542Smckusick ufs2_daddr_t blkno, *blkp; 147498542Smckusick ufs_lbn_t lbn; 147562976Smckusick struct buf *ibp; 147662976Smckusick int error; 147762976Smckusick 147862976Smckusick for ( ; oldblkp < lastblkp; oldblkp++) { 147962976Smckusick blkno = *oldblkp; 148062976Smckusick if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) 148162976Smckusick continue; 148262976Smckusick lbn = fragstoblks(fs, blkno); 148362976Smckusick if (lbn < NDADDR) { 148498542Smckusick blkp = &ip->i_din2->di_db[lbn]; 148562976Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 148662976Smckusick } else { 1487141526Sphk error = ffs_balloc_ufs2(vp, lblktosize(fs, (off_t)lbn), 148898658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 148962976Smckusick if (error) 149062976Smckusick return (error); 149198542Smckusick blkp = &((ufs2_daddr_t *)(ibp->b_data)) 149262976Smckusick [(lbn - NDADDR) % NINDIR(fs)]; 149362976Smckusick } 149487827Smckusick /* 149590098Smckusick * If we are expunging a snapshot vnode and we 149690098Smckusick * find a block marked BLK_NOCOPY, then it is 149787827Smckusick * one that has been allocated to this snapshot after 149887827Smckusick * we took our current snapshot and can be ignored. 149987827Smckusick */ 150090098Smckusick if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { 150187827Smckusick if (lbn >= NDADDR) 150287827Smckusick brelse(ibp); 150387827Smckusick } else { 150487827Smckusick if (*blkp != 0) 1505121158Smckusick panic("snapacct_ufs2: bad block"); 150690098Smckusick *blkp = expungetype; 150787827Smckusick if (lbn >= NDADDR) 150887827Smckusick bdwrite(ibp); 150963788Smckusick } 151062976Smckusick } 151162976Smckusick return (0); 151262976Smckusick} 151362976Smckusick 151462976Smckusick/* 151576269Smckusick * Account for a set of blocks allocated in a snapshot inode. 151676269Smckusick */ 151776269Smckusickstatic int 151898542Smckusickmapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 151976269Smckusick struct vnode *vp; 152098542Smckusick ufs2_daddr_t *oldblkp, *lastblkp; 152176269Smckusick struct fs *fs; 152298542Smckusick ufs_lbn_t lblkno; 152390098Smckusick int expungetype; 152476269Smckusick{ 152598542Smckusick ufs2_daddr_t blkno; 1526104698Smckusick struct inode *ip; 152790098Smckusick ino_t inum; 1528108050Smckusick int acctit; 152976269Smckusick 1530104698Smckusick ip = VTOI(vp); 1531104698Smckusick inum = ip->i_number; 1532108050Smckusick if (lblkno == -1) 1533108050Smckusick acctit = 0; 1534108050Smckusick else 1535108050Smckusick acctit = 1; 153676269Smckusick for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 153776269Smckusick blkno = *oldblkp; 153876269Smckusick if (blkno == 0 || blkno == BLK_NOCOPY) 153976269Smckusick continue; 1540108050Smckusick if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1541107915Smckusick *ip->i_snapblklist++ = lblkno; 154276269Smckusick if (blkno == BLK_SNAP) 154376269Smckusick blkno = blkstofrags(fs, lblkno); 1544223127Smckusick ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, 1545223127Smckusick vp->v_type, NULL); 154676269Smckusick } 154776269Smckusick return (0); 154876269Smckusick} 154976269Smckusick 155076269Smckusick/* 155170183Smckusick * Decrement extra reference on snapshot when last name is removed. 155270183Smckusick * It will not be freed until the last open reference goes away. 155370183Smckusick */ 155470183Smckusickvoid 155570183Smckusickffs_snapgone(ip) 155670183Smckusick struct inode *ip; 155770183Smckusick{ 155870183Smckusick struct inode *xp; 155974547Smckusick struct fs *fs; 156074547Smckusick int snaploc; 1561135138Sphk struct snapdata *sn; 1562140706Sjeff struct ufsmount *ump; 156370183Smckusick 156470183Smckusick /* 156570183Smckusick * Find snapshot in incore list. 156670183Smckusick */ 1567135138Sphk xp = NULL; 1568135138Sphk sn = ip->i_devvp->v_rdev->si_snapdata; 1569135138Sphk if (sn != NULL) 1570135138Sphk TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) 1571135138Sphk if (xp == ip) 1572135138Sphk break; 1573107848Smckusick if (xp != NULL) 1574107848Smckusick vrele(ITOV(ip)); 1575107848Smckusick else if (snapdebug) 1576241011Smdf printf("ffs_snapgone: lost snapshot vnode %ju\n", 1577241011Smdf (uintmax_t)ip->i_number); 157874547Smckusick /* 157974547Smckusick * Delete snapshot inode from superblock. Keep list dense. 158074547Smckusick */ 158174547Smckusick fs = ip->i_fs; 1582140706Sjeff ump = ip->i_ump; 1583140706Sjeff UFS_LOCK(ump); 158474547Smckusick for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 158574547Smckusick if (fs->fs_snapinum[snaploc] == ip->i_number) 158674547Smckusick break; 158774547Smckusick if (snaploc < FSMAXSNAP) { 158874547Smckusick for (snaploc++; snaploc < FSMAXSNAP; snaploc++) { 158974547Smckusick if (fs->fs_snapinum[snaploc] == 0) 159074547Smckusick break; 159174547Smckusick fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc]; 159274547Smckusick } 159374547Smckusick fs->fs_snapinum[snaploc - 1] = 0; 159474547Smckusick } 1595140706Sjeff UFS_UNLOCK(ump); 159670183Smckusick} 159770183Smckusick 159870183Smckusick/* 159962976Smckusick * Prepare a snapshot file for being removed. 160062976Smckusick */ 160162976Smckusickvoid 160262976Smckusickffs_snapremove(vp) 160362976Smckusick struct vnode *vp; 160462976Smckusick{ 160573942Smckusick struct inode *ip; 160662976Smckusick struct vnode *devvp; 160762976Smckusick struct buf *ibp; 160862976Smckusick struct fs *fs; 1609158259Stegge ufs2_daddr_t numblks, blkno, dblk; 161098542Smckusick int error, loc, last; 1611135138Sphk struct snapdata *sn; 161262976Smckusick 161362976Smckusick ip = VTOI(vp); 161462976Smckusick fs = ip->i_fs; 1615107414Smckusick devvp = ip->i_devvp; 161662976Smckusick /* 161775943Smckusick * If active, delete from incore list (this snapshot may 161875943Smckusick * already have been in the process of being deleted, so 161975943Smckusick * would not have been active). 162075943Smckusick * 162162976Smckusick * Clear copy-on-write flag if last snapshot. 162262976Smckusick */ 1623158259Stegge VI_LOCK(devvp); 162475943Smckusick if (ip->i_nextsnap.tqe_prev != 0) { 1625158259Stegge sn = devvp->v_rdev->si_snapdata; 1626135138Sphk TAILQ_REMOVE(&sn->sn_head, ip, i_nextsnap); 1627107414Smckusick ip->i_nextsnap.tqe_prev = 0; 1628158259Stegge VI_UNLOCK(devvp); 1629175635Sattilio lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL); 1630158259Stegge KASSERT(vp->v_vnlock == &sn->sn_lock, 1631158259Stegge ("ffs_snapremove: lost lock mutation")); 1632105191Smckusick vp->v_vnlock = &vp->v_lock; 1633158259Stegge VI_LOCK(devvp); 1634175635Sattilio lockmgr(&sn->sn_lock, LK_RELEASE, NULL); 1635177778Sjeff try_free_snapdata(devvp); 1636158259Stegge } else 1637158259Stegge VI_UNLOCK(devvp); 163862976Smckusick /* 163962976Smckusick * Clear all BLK_NOCOPY fields. Pass any block claims to other 164062976Smckusick * snapshots that want them (see ffs_snapblkfree below). 164162976Smckusick */ 164262976Smckusick for (blkno = 1; blkno < NDADDR; blkno++) { 164398542Smckusick dblk = DIP(ip, i_db[blkno]); 1644151177Stegge if (dblk == 0) 1645151177Stegge continue; 164676356Smckusick if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 1647132775Skan DIP_SET(ip, i_db[blkno], 0); 164876356Smckusick else if ((dblk == blkstofrags(fs, blkno) && 164990098Smckusick ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, 1650223127Smckusick ip->i_number, vp->v_type, NULL))) { 1651132775Skan DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - 1652132775Skan btodb(fs->fs_bsize)); 1653132775Skan DIP_SET(ip, i_db[blkno], 0); 165476356Smckusick } 165562976Smckusick } 165676356Smckusick numblks = howmany(ip->i_size, fs->fs_bsize); 165776356Smckusick for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 165876132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 165998658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 166062976Smckusick if (error) 166162976Smckusick continue; 166298542Smckusick if (fs->fs_size - blkno > NINDIR(fs)) 166362976Smckusick last = NINDIR(fs); 166498542Smckusick else 166598542Smckusick last = fs->fs_size - blkno; 166662976Smckusick for (loc = 0; loc < last; loc++) { 166798542Smckusick if (ip->i_ump->um_fstype == UFS1) { 166898542Smckusick dblk = ((ufs1_daddr_t *)(ibp->b_data))[loc]; 1669151177Stegge if (dblk == 0) 1670151177Stegge continue; 167198542Smckusick if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 167298542Smckusick ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 167398542Smckusick else if ((dblk == blkstofrags(fs, blkno) && 167498542Smckusick ffs_snapblkfree(fs, ip->i_devvp, dblk, 1675223127Smckusick fs->fs_bsize, ip->i_number, vp->v_type, 1676223127Smckusick NULL))) { 167798542Smckusick ip->i_din1->di_blocks -= 167898542Smckusick btodb(fs->fs_bsize); 167998542Smckusick ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; 168098542Smckusick } 168198542Smckusick continue; 168298542Smckusick } 168398542Smckusick dblk = ((ufs2_daddr_t *)(ibp->b_data))[loc]; 1684151177Stegge if (dblk == 0) 1685151177Stegge continue; 168676356Smckusick if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) 168798542Smckusick ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 168876356Smckusick else if ((dblk == blkstofrags(fs, blkno) && 168990098Smckusick ffs_snapblkfree(fs, ip->i_devvp, dblk, 1690223127Smckusick fs->fs_bsize, ip->i_number, vp->v_type, NULL))) { 169198542Smckusick ip->i_din2->di_blocks -= btodb(fs->fs_bsize); 169298542Smckusick ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; 169376356Smckusick } 169462976Smckusick } 169562976Smckusick bawrite(ibp); 169662976Smckusick } 169762976Smckusick /* 169862976Smckusick * Clear snapshot flag and drop reference. 169962976Smckusick */ 170063897Smckusick ip->i_flags &= ~SF_SNAPSHOT; 1701132775Skan DIP_SET(ip, i_flags, ip->i_flags); 170262976Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 1703207141Sjeff /* 1704207141Sjeff * The dirtied indirects must be written out before 1705207141Sjeff * softdep_setup_freeblocks() is called. Otherwise indir_trunc() 1706207141Sjeff * may find indirect pointers using the magic BLK_* values. 1707207141Sjeff */ 1708207141Sjeff if (DOINGSOFTDEP(vp)) 1709233438Smckusick ffs_syncvnode(vp, MNT_WAIT, 0); 1710158322Stegge#ifdef QUOTA 1711158322Stegge /* 1712158322Stegge * Reenable disk quotas for ex-snapshot file. 1713158322Stegge */ 1714158322Stegge if (!getinoquota(ip)) 1715158322Stegge (void) chkdq(ip, DIP(ip, i_blocks), KERNCRED, FORCE); 1716158322Stegge#endif 171762976Smckusick} 171862976Smckusick 171962976Smckusick/* 172062976Smckusick * Notification that a block is being freed. Return zero if the free 172162976Smckusick * should be allowed to proceed. Return non-zero if the snapshot file 172262976Smckusick * wants to claim the block. The block will be claimed if it is an 172362976Smckusick * uncopied part of one of the snapshots. It will be freed if it is 172462976Smckusick * either a BLK_NOCOPY or has already been copied in all of the snapshots. 172562976Smckusick * If a fragment is being freed, then all snapshots that care about 172662976Smckusick * it must make a copy since a snapshot file can only claim full sized 172762976Smckusick * blocks. Note that if more than one snapshot file maps the block, 172862976Smckusick * we can pick one at random to claim it. Since none of the snapshots 172962976Smckusick * can change, we are assurred that they will all see the same unmodified 173062976Smckusick * image. When deleting a snapshot file (see ffs_snapremove above), we 173162976Smckusick * must push any of these claimed blocks to one of the other snapshots 173262976Smckusick * that maps it. These claimed blocks are easily identified as they will 173362976Smckusick * have a block number equal to their logical block number within the 173462976Smckusick * snapshot. A copied block can never have this property because they 173562976Smckusick * must always have been allocated from a BLK_NOCOPY location. 173662976Smckusick */ 173762976Smckusickint 1738223127Smckusickffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd) 173990098Smckusick struct fs *fs; 174090098Smckusick struct vnode *devvp; 174198542Smckusick ufs2_daddr_t bno; 174262976Smckusick long size; 174390098Smckusick ino_t inum; 1744223127Smckusick enum vtype vtype; 1745223020Smckusick struct workhead *wkhd; 174662976Smckusick{ 1747238697Skevlo struct buf *ibp, *cbp, *savedcbp = NULL; 174883366Sjulian struct thread *td = curthread; 174962976Smckusick struct inode *ip; 1750107414Smckusick struct vnode *vp = NULL; 175198542Smckusick ufs_lbn_t lbn; 175298542Smckusick ufs2_daddr_t blkno; 1753151177Stegge int indiroff = 0, error = 0, claimedblk = 0; 1754135138Sphk struct snapdata *sn; 175562976Smckusick 175662976Smckusick lbn = fragstoblks(fs, bno); 1757107414Smckusickretry: 1758107414Smckusick VI_LOCK(devvp); 1759135138Sphk sn = devvp->v_rdev->si_snapdata; 1760135312Sphk if (sn == NULL) { 1761135312Sphk VI_UNLOCK(devvp); 1762135312Sphk return (0); 1763135312Sphk } 1764175635Sattilio if (lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 1765175635Sattilio VI_MTX(devvp)) != 0) 1766151177Stegge goto retry; 1767135138Sphk TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 176862976Smckusick vp = ITOV(ip); 1769207742Sjeff if (DOINGSOFTDEP(vp)) 1770207742Sjeff softdep_prealloc(vp, MNT_WAIT); 177162976Smckusick /* 177262976Smckusick * Lookup block being written. 177362976Smckusick */ 177462976Smckusick if (lbn < NDADDR) { 177598542Smckusick blkno = DIP(ip, i_db[lbn]); 177662976Smckusick } else { 1777121443Sjhb td->td_pflags |= TDP_COWINPROGRESS; 177876132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 177998658Sdillon fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1780121443Sjhb td->td_pflags &= ~TDP_COWINPROGRESS; 178162976Smckusick if (error) 178262976Smckusick break; 178362976Smckusick indiroff = (lbn - NDADDR) % NINDIR(fs); 178498542Smckusick if (ip->i_ump->um_fstype == UFS1) 178598542Smckusick blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 178698542Smckusick else 178798542Smckusick blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 178862976Smckusick } 178962976Smckusick /* 179062976Smckusick * Check to see if block needs to be copied. 179162976Smckusick */ 179298542Smckusick if (blkno == 0) { 179398542Smckusick /* 179498542Smckusick * A block that we map is being freed. If it has not 179598542Smckusick * been claimed yet, we will claim or copy it (below). 179698542Smckusick */ 179798542Smckusick claimedblk = 1; 179898542Smckusick } else if (blkno == BLK_SNAP) { 179998542Smckusick /* 180098542Smckusick * No previous snapshot claimed the block, 1801107414Smckusick * so it will be freed and become a BLK_NOCOPY 180298542Smckusick * (don't care) for us. 180398542Smckusick */ 180462976Smckusick if (claimedblk) 180562976Smckusick panic("snapblkfree: inconsistent block type"); 180662976Smckusick if (lbn < NDADDR) { 1807132775Skan DIP_SET(ip, i_db[lbn], BLK_NOCOPY); 180862976Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 180998542Smckusick } else if (ip->i_ump->um_fstype == UFS1) { 181098542Smckusick ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 181198542Smckusick BLK_NOCOPY; 181298542Smckusick bdwrite(ibp); 181362976Smckusick } else { 181498542Smckusick ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 181562976Smckusick BLK_NOCOPY; 181662976Smckusick bdwrite(ibp); 181762976Smckusick } 181862976Smckusick continue; 181998542Smckusick } else /* BLK_NOCOPY or default */ { 182098542Smckusick /* 182198542Smckusick * If the snapshot has already copied the block 182298542Smckusick * (default), or does not care about the block, 182398542Smckusick * it is not needed. 182498542Smckusick */ 182598542Smckusick if (lbn >= NDADDR) 182698542Smckusick bqrelse(ibp); 182798542Smckusick continue; 182862976Smckusick } 182962976Smckusick /* 183062976Smckusick * If this is a full size block, we will just grab it 183162976Smckusick * and assign it to the snapshot inode. Otherwise we 183262976Smckusick * will proceed to copy it. See explanation for this 183362976Smckusick * routine as to why only a single snapshot needs to 183462976Smckusick * claim this block. 183562976Smckusick */ 183662976Smckusick if (size == fs->fs_bsize) { 183762976Smckusick#ifdef DEBUG 183862976Smckusick if (snapdebug) 1839241011Smdf printf("%s %ju lbn %jd from inum %ju\n", 1840241011Smdf "Grabonremove: snapino", 1841241011Smdf (uintmax_t)ip->i_number, 1842241011Smdf (intmax_t)lbn, (uintmax_t)inum); 184362976Smckusick#endif 1844223020Smckusick /* 1845223020Smckusick * If journaling is tracking this write we must add 1846223020Smckusick * the work to the inode or indirect being written. 1847223020Smckusick */ 1848223020Smckusick if (wkhd != NULL) { 1849223020Smckusick if (lbn < NDADDR) 1850223020Smckusick softdep_inode_append(ip, 1851223020Smckusick curthread->td_ucred, wkhd); 1852223020Smckusick else 1853223020Smckusick softdep_buf_append(ibp, wkhd); 1854223020Smckusick } 185562976Smckusick if (lbn < NDADDR) { 1856132775Skan DIP_SET(ip, i_db[lbn], bno); 185798542Smckusick } else if (ip->i_ump->um_fstype == UFS1) { 185898542Smckusick ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = bno; 185998542Smckusick bdwrite(ibp); 186062976Smckusick } else { 186198542Smckusick ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = bno; 186262976Smckusick bdwrite(ibp); 186362976Smckusick } 1864132775Skan DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(size)); 186562976Smckusick ip->i_flag |= IN_CHANGE | IN_UPDATE; 1866175635Sattilio lockmgr(vp->v_vnlock, LK_RELEASE, NULL); 186762976Smckusick return (1); 186862976Smckusick } 186962976Smckusick if (lbn >= NDADDR) 187063788Smckusick bqrelse(ibp); 187162976Smckusick /* 187262976Smckusick * Allocate the block into which to do the copy. Note that this 187362976Smckusick * allocation will never require any additional allocations for 187462976Smckusick * the snapshot inode. 187562976Smckusick */ 1876121443Sjhb td->td_pflags |= TDP_COWINPROGRESS; 187776132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 187862976Smckusick fs->fs_bsize, KERNCRED, 0, &cbp); 1879121443Sjhb td->td_pflags &= ~TDP_COWINPROGRESS; 1880107414Smckusick if (error) 188162976Smckusick break; 188262976Smckusick#ifdef DEBUG 188362976Smckusick if (snapdebug) 1884241011Smdf printf("%s%ju lbn %jd %s %ju size %ld to blkno %jd\n", 1885241011Smdf "Copyonremove: snapino ", (uintmax_t)ip->i_number, 1886241011Smdf (intmax_t)lbn, "for inum", (uintmax_t)inum, size, 188798542Smckusick (intmax_t)cbp->b_blkno); 188862976Smckusick#endif 188962976Smckusick /* 189062976Smckusick * If we have already read the old block contents, then 189175943Smckusick * simply copy them to the new block. Note that we need 189275943Smckusick * to synchronously write snapshots that have not been 189375943Smckusick * unlinked, and hence will be visible after a crash, 1894223127Smckusick * to ensure their integrity. At a minimum we ensure the 1895223127Smckusick * integrity of the filesystem metadata, but use the 1896223127Smckusick * dopersistence sysctl-setable flag to decide on the 1897223127Smckusick * persistence needed for file content data. 189862976Smckusick */ 1899297791Spfg if (savedcbp != NULL) { 190062976Smckusick bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 190162976Smckusick bawrite(cbp); 1902223127Smckusick if ((vtype == VDIR || dopersistence) && 1903223127Smckusick ip->i_effnlink > 0) 1904233438Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT); 190562976Smckusick continue; 190662976Smckusick } 190762976Smckusick /* 190862976Smckusick * Otherwise, read the old block contents into the buffer. 190962976Smckusick */ 1910135138Sphk if ((error = readblock(vp, cbp, lbn)) != 0) { 191175943Smckusick bzero(cbp->b_data, fs->fs_bsize); 191275943Smckusick bawrite(cbp); 1913223127Smckusick if ((vtype == VDIR || dopersistence) && 1914223127Smckusick ip->i_effnlink > 0) 1915233438Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT); 191662976Smckusick break; 191775943Smckusick } 191862976Smckusick savedcbp = cbp; 191962976Smckusick } 192075943Smckusick /* 192175943Smckusick * Note that we need to synchronously write snapshots that 192275943Smckusick * have not been unlinked, and hence will be visible after 1923223127Smckusick * a crash, to ensure their integrity. At a minimum we 1924223127Smckusick * ensure the integrity of the filesystem metadata, but 1925223127Smckusick * use the dopersistence sysctl-setable flag to decide on 1926223127Smckusick * the persistence needed for file content data. 192775943Smckusick */ 192875943Smckusick if (savedcbp) { 192975943Smckusick vp = savedcbp->b_vp; 193062976Smckusick bawrite(savedcbp); 1931223268Smckusick if ((vtype == VDIR || dopersistence) && 1932223268Smckusick VTOI(vp)->i_effnlink > 0) 1933233438Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT); 193475943Smckusick } 193562976Smckusick /* 193662976Smckusick * If we have been unable to allocate a block in which to do 193762976Smckusick * the copy, then return non-zero so that the fragment will 193862976Smckusick * not be freed. Although space will be lost, the snapshot 193962976Smckusick * will stay consistent. 194062976Smckusick */ 1941223020Smckusick if (error != 0 && wkhd != NULL) 1942223020Smckusick softdep_freework(wkhd); 1943175635Sattilio lockmgr(vp->v_vnlock, LK_RELEASE, NULL); 194462976Smckusick return (error); 194562976Smckusick} 194662976Smckusick 194762976Smckusick/* 194862976Smckusick * Associate snapshot files when mounting. 194962976Smckusick */ 195062976Smckusickvoid 195162976Smckusickffs_snapshot_mount(mp) 195262976Smckusick struct mount *mp; 195362976Smckusick{ 195462976Smckusick struct ufsmount *ump = VFSTOUFS(mp); 1955107414Smckusick struct vnode *devvp = ump->um_devvp; 195662976Smckusick struct fs *fs = ump->um_fs; 195783366Sjulian struct thread *td = curthread; 1958135138Sphk struct snapdata *sn; 195962976Smckusick struct vnode *vp; 1960158636Stegge struct vnode *lastvp; 1961135303Sphk struct inode *ip; 1962104698Smckusick struct uio auio; 1963104698Smckusick struct iovec aiov; 1964107848Smckusick void *snapblklist; 1965104698Smckusick char *reason; 1966107848Smckusick daddr_t snaplistsize; 196762976Smckusick int error, snaploc, loc; 196862976Smckusick 1969104698Smckusick /* 1970141526Sphk * XXX The following needs to be set before ffs_truncate or 1971104698Smckusick * VOP_READ can be called. 1972104698Smckusick */ 1973104698Smckusick mp->mnt_stat.f_iosize = fs->fs_bsize; 1974104698Smckusick /* 1975104698Smckusick * Process each snapshot listed in the superblock. 1976104698Smckusick */ 1977107848Smckusick vp = NULL; 1978158636Stegge lastvp = NULL; 1979177778Sjeff sn = NULL; 198062976Smckusick for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) { 198162976Smckusick if (fs->fs_snapinum[snaploc] == 0) 1982107848Smckusick break; 1983141526Sphk if ((error = ffs_vget(mp, fs->fs_snapinum[snaploc], 198492462Smckusick LK_EXCLUSIVE, &vp)) != 0){ 198562976Smckusick printf("ffs_snapshot_mount: vget failed %d\n", error); 198662976Smckusick continue; 198762976Smckusick } 198862976Smckusick ip = VTOI(vp); 1989232351Smckusick if (!IS_SNAPSHOT(ip) || ip->i_size == 1990104698Smckusick lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) { 1991232351Smckusick if (!IS_SNAPSHOT(ip)) { 1992104698Smckusick reason = "non-snapshot"; 1993104698Smckusick } else { 1994104698Smckusick reason = "old format snapshot"; 1995234605Strasz (void)ffs_truncate(vp, (off_t)0, 0, NOCRED); 1996233438Smckusick (void)ffs_syncvnode(vp, MNT_WAIT, 0); 1997104698Smckusick } 1998104698Smckusick printf("ffs_snapshot_mount: %s inode %d\n", 1999104698Smckusick reason, fs->fs_snapinum[snaploc]); 200062976Smckusick vput(vp); 2001107848Smckusick vp = NULL; 200262976Smckusick for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) { 200362976Smckusick if (fs->fs_snapinum[loc] == 0) 200462976Smckusick break; 200562976Smckusick fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc]; 200662976Smckusick } 200762976Smckusick fs->fs_snapinum[loc - 1] = 0; 200862976Smckusick snaploc--; 200962976Smckusick continue; 201062976Smckusick } 2011104698Smckusick /* 2012177778Sjeff * Acquire a lock on the snapdata structure, creating it if 2013177778Sjeff * necessary. 2014105191Smckusick */ 2015177778Sjeff sn = ffs_snapdata_acquire(devvp); 2016177778Sjeff /* 2017177778Sjeff * Change vnode to use shared snapshot lock instead of the 2018177778Sjeff * original private lock. 2019177778Sjeff */ 2020177778Sjeff vp->v_vnlock = &sn->sn_lock; 2021175635Sattilio lockmgr(&vp->v_lock, LK_RELEASE, NULL); 2022105191Smckusick /* 2023104698Smckusick * Link it onto the active snapshot list. 2024104698Smckusick */ 2025107414Smckusick VI_LOCK(devvp); 202673942Smckusick if (ip->i_nextsnap.tqe_prev != 0) 2027241011Smdf panic("ffs_snapshot_mount: %ju already on list", 2028241011Smdf (uintmax_t)ip->i_number); 202973942Smckusick else 2030135138Sphk TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap); 2031101308Sjeff vp->v_vflag |= VV_SYSTEM; 2032107414Smckusick VI_UNLOCK(devvp); 2033175294Sattilio VOP_UNLOCK(vp, 0); 2034158636Stegge lastvp = vp; 203562976Smckusick } 2036158636Stegge vp = lastvp; 2037107848Smckusick /* 2038107848Smckusick * No usable snapshots found. 2039107848Smckusick */ 2040177778Sjeff if (sn == NULL || vp == NULL) 2041107848Smckusick return; 2042107848Smckusick /* 2043107848Smckusick * Allocate the space for the block hints list. We always want to 2044107848Smckusick * use the list from the newest snapshot. 2045107848Smckusick */ 2046107848Smckusick auio.uio_iov = &aiov; 2047107848Smckusick auio.uio_iovcnt = 1; 2048107848Smckusick aiov.iov_base = (void *)&snaplistsize; 2049107848Smckusick aiov.iov_len = sizeof(snaplistsize); 2050107848Smckusick auio.uio_resid = aiov.iov_len; 2051107848Smckusick auio.uio_offset = 2052107848Smckusick lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)); 2053107848Smckusick auio.uio_segflg = UIO_SYSSPACE; 2054107848Smckusick auio.uio_rw = UIO_READ; 2055107848Smckusick auio.uio_td = td; 2056175202Sattilio vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2057107848Smckusick if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 2058107848Smckusick printf("ffs_snapshot_mount: read_1 failed %d\n", error); 2059175294Sattilio VOP_UNLOCK(vp, 0); 2060107848Smckusick return; 2061107848Smckusick } 2062184205Sdes snapblklist = malloc(snaplistsize * sizeof(daddr_t), 2063111119Simp M_UFSMNT, M_WAITOK); 2064107848Smckusick auio.uio_iovcnt = 1; 2065107848Smckusick aiov.iov_base = snapblklist; 2066107848Smckusick aiov.iov_len = snaplistsize * sizeof (daddr_t); 2067107848Smckusick auio.uio_resid = aiov.iov_len; 2068107848Smckusick auio.uio_offset -= sizeof(snaplistsize); 2069107848Smckusick if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 2070107848Smckusick printf("ffs_snapshot_mount: read_2 failed %d\n", error); 2071175294Sattilio VOP_UNLOCK(vp, 0); 2072184205Sdes free(snapblklist, M_UFSMNT); 2073107848Smckusick return; 2074107848Smckusick } 2075175294Sattilio VOP_UNLOCK(vp, 0); 2076107848Smckusick VI_LOCK(devvp); 2077107848Smckusick ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount"); 2078135138Sphk sn->sn_listsize = snaplistsize; 2079135138Sphk sn->sn_blklist = (daddr_t *)snapblklist; 2080107848Smckusick devvp->v_vflag |= VV_COPYONWRITE; 2081107848Smckusick VI_UNLOCK(devvp); 208262976Smckusick} 208362976Smckusick 208462976Smckusick/* 208562976Smckusick * Disassociate snapshot files when unmounting. 208662976Smckusick */ 208762976Smckusickvoid 208862976Smckusickffs_snapshot_unmount(mp) 208962976Smckusick struct mount *mp; 209062976Smckusick{ 2091107414Smckusick struct vnode *devvp = VFSTOUFS(mp)->um_devvp; 2092135138Sphk struct snapdata *sn; 209362976Smckusick struct inode *xp; 2094105191Smckusick struct vnode *vp; 209562976Smckusick 2096158259Stegge VI_LOCK(devvp); 2097135138Sphk sn = devvp->v_rdev->si_snapdata; 2098158259Stegge while (sn != NULL && (xp = TAILQ_FIRST(&sn->sn_head)) != NULL) { 2099105191Smckusick vp = ITOV(xp); 2100135138Sphk TAILQ_REMOVE(&sn->sn_head, xp, i_nextsnap); 210173942Smckusick xp->i_nextsnap.tqe_prev = 0; 2102175635Sattilio lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE, 2103175635Sattilio VI_MTX(devvp)); 2104177778Sjeff lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL); 2105158259Stegge KASSERT(vp->v_vnlock == &sn->sn_lock, 2106158259Stegge ("ffs_snapshot_unmount: lost lock mutation")); 2107158259Stegge vp->v_vnlock = &vp->v_lock; 2108175635Sattilio lockmgr(&vp->v_lock, LK_RELEASE, NULL); 2109175635Sattilio lockmgr(&sn->sn_lock, LK_RELEASE, NULL); 2110158259Stegge if (xp->i_effnlink > 0) 2111105191Smckusick vrele(vp); 2112158259Stegge VI_LOCK(devvp); 2113158259Stegge sn = devvp->v_rdev->si_snapdata; 211462976Smckusick } 2115177778Sjeff try_free_snapdata(devvp); 2116107414Smckusick ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount"); 211762976Smckusick} 211862976Smckusick 211962976Smckusick/* 2120166193Skib * Check the buffer block to be belong to device buffer that shall be 2121166193Skib * locked after snaplk. devvp shall be locked on entry, and will be 2122166193Skib * leaved locked upon exit. 2123166193Skib */ 2124166193Skibstatic int 2125166193Skibffs_bp_snapblk(devvp, bp) 2126166193Skib struct vnode *devvp; 2127166193Skib struct buf *bp; 2128166193Skib{ 2129166193Skib struct snapdata *sn; 2130166193Skib struct fs *fs; 2131166193Skib ufs2_daddr_t lbn, *snapblklist; 2132166193Skib int lower, upper, mid; 2133166193Skib 2134166193Skib ASSERT_VI_LOCKED(devvp, "ffs_bp_snapblk"); 2135166193Skib KASSERT(devvp->v_type == VCHR, ("Not a device %p", devvp)); 2136166193Skib sn = devvp->v_rdev->si_snapdata; 2137166193Skib if (sn == NULL || TAILQ_FIRST(&sn->sn_head) == NULL) 2138166193Skib return (0); 2139166193Skib fs = TAILQ_FIRST(&sn->sn_head)->i_fs; 2140166193Skib lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); 2141166193Skib snapblklist = sn->sn_blklist; 2142166193Skib upper = sn->sn_listsize - 1; 2143166193Skib lower = 1; 2144166193Skib while (lower <= upper) { 2145166193Skib mid = (lower + upper) / 2; 2146166193Skib if (snapblklist[mid] == lbn) 2147166193Skib break; 2148166193Skib if (snapblklist[mid] < lbn) 2149166193Skib lower = mid + 1; 2150166193Skib else 2151166193Skib upper = mid - 1; 2152166193Skib } 2153166193Skib if (lower <= upper) 2154166193Skib return (1); 2155166193Skib return (0); 2156166193Skib} 2157166193Skib 2158166193Skibvoid 2159166193Skibffs_bdflush(bo, bp) 2160166193Skib struct bufobj *bo; 2161166193Skib struct buf *bp; 2162166193Skib{ 2163166193Skib struct thread *td; 2164166193Skib struct vnode *vp, *devvp; 2165166193Skib struct buf *nbp; 2166166193Skib int bp_bdskip; 2167166193Skib 2168166193Skib if (bo->bo_dirty.bv_cnt <= dirtybufthresh) 2169166193Skib return; 2170166193Skib 2171166193Skib td = curthread; 2172166193Skib vp = bp->b_vp; 2173166193Skib devvp = bo->__bo_vnode; 2174166193Skib KASSERT(vp == devvp, ("devvp != vp %p %p", bo, bp)); 2175166193Skib 2176166193Skib VI_LOCK(devvp); 2177166193Skib bp_bdskip = ffs_bp_snapblk(devvp, bp); 2178166193Skib if (bp_bdskip) 2179166193Skib bdwriteskip++; 2180166193Skib VI_UNLOCK(devvp); 2181166193Skib if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10 && !bp_bdskip) { 2182166193Skib (void) VOP_FSYNC(vp, MNT_NOWAIT, td); 2183166193Skib altbufferflushes++; 2184166193Skib } else { 2185166193Skib BO_LOCK(bo); 2186166193Skib /* 2187166193Skib * Try to find a buffer to flush. 2188166193Skib */ 2189166193Skib TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) { 2190166193Skib if ((nbp->b_vflags & BV_BKGRDINPROG) || 2191166193Skib BUF_LOCK(nbp, 2192166193Skib LK_EXCLUSIVE | LK_NOWAIT, NULL)) 2193166193Skib continue; 2194166193Skib if (bp == nbp) 2195166193Skib panic("bdwrite: found ourselves"); 2196166193Skib BO_UNLOCK(bo); 2197166193Skib /* 2198166193Skib * Don't countdeps with the bo lock 2199166193Skib * held. 2200166193Skib */ 2201166193Skib if (buf_countdeps(nbp, 0)) { 2202166193Skib BO_LOCK(bo); 2203166193Skib BUF_UNLOCK(nbp); 2204166193Skib continue; 2205166193Skib } 2206166193Skib if (bp_bdskip) { 2207166193Skib VI_LOCK(devvp); 2208166193Skib if (!ffs_bp_snapblk(vp, nbp)) { 2209251171Sjeff VI_UNLOCK(devvp); 2210251171Sjeff BO_LOCK(bo); 2211166193Skib BUF_UNLOCK(nbp); 2212166193Skib continue; 2213166193Skib } 2214166193Skib VI_UNLOCK(devvp); 2215166193Skib } 2216166193Skib if (nbp->b_flags & B_CLUSTEROK) { 2217166193Skib vfs_bio_awrite(nbp); 2218166193Skib } else { 2219166193Skib bremfree(nbp); 2220166193Skib bawrite(nbp); 2221166193Skib } 2222166193Skib dirtybufferflushes++; 2223166193Skib break; 2224166193Skib } 2225166193Skib if (nbp == NULL) 2226166193Skib BO_UNLOCK(bo); 2227166193Skib } 2228166193Skib} 2229166193Skib 2230166193Skib/* 223162976Smckusick * Check for need to copy block that is about to be written, 223262976Smckusick * copying the block if necessary. 223362976Smckusick */ 2234136963Sphkint 223573942Smckusickffs_copyonwrite(devvp, bp) 223673942Smckusick struct vnode *devvp; 223773942Smckusick struct buf *bp; 223862976Smckusick{ 2239135138Sphk struct snapdata *sn; 2240238697Skevlo struct buf *ibp, *cbp, *savedcbp = NULL; 224183366Sjulian struct thread *td = curthread; 224273942Smckusick struct fs *fs; 224362976Smckusick struct inode *ip; 2244238697Skevlo struct vnode *vp = NULL; 2245107848Smckusick ufs2_daddr_t lbn, blkno, *snapblklist; 2246151177Stegge int lower, upper, mid, indiroff, error = 0; 2247150760Struckman int launched_async_io, prev_norunningbuf; 2248158260Stegge long saved_runningbufspace; 224962976Smckusick 2250232351Smckusick if (devvp != bp->b_vp && IS_SNAPSHOT(VTOI(bp->b_vp))) 2251151179Stegge return (0); /* Update on a snapshot file */ 2252121443Sjhb if (td->td_pflags & TDP_COWINPROGRESS) 225362976Smckusick panic("ffs_copyonwrite: recursive call"); 2254107848Smckusick /* 2255107848Smckusick * First check to see if it is in the preallocated list. 2256107848Smckusick * By doing this check we avoid several potential deadlocks. 2257107848Smckusick */ 2258107414Smckusick VI_LOCK(devvp); 2259135138Sphk sn = devvp->v_rdev->si_snapdata; 2260151177Stegge if (sn == NULL || 2261168353Sdelphij TAILQ_EMPTY(&sn->sn_head)) { 2262151177Stegge VI_UNLOCK(devvp); 2263151177Stegge return (0); /* No snapshot */ 2264151177Stegge } 2265135138Sphk ip = TAILQ_FIRST(&sn->sn_head); 2266105191Smckusick fs = ip->i_fs; 2267105191Smckusick lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); 2268135138Sphk snapblklist = sn->sn_blklist; 2269135138Sphk upper = sn->sn_listsize - 1; 2270107848Smckusick lower = 1; 2271107848Smckusick while (lower <= upper) { 2272107848Smckusick mid = (lower + upper) / 2; 2273107848Smckusick if (snapblklist[mid] == lbn) 2274107848Smckusick break; 2275107848Smckusick if (snapblklist[mid] < lbn) 2276107848Smckusick lower = mid + 1; 2277107848Smckusick else 2278107848Smckusick upper = mid - 1; 2279107848Smckusick } 2280107848Smckusick if (lower <= upper) { 2281107848Smckusick VI_UNLOCK(devvp); 2282107848Smckusick return (0); 2283107848Smckusick } 2284150760Struckman launched_async_io = 0; 2285150760Struckman prev_norunningbuf = td->td_pflags & TDP_NORUNNINGBUF; 2286107848Smckusick /* 2287150741Struckman * Since I/O on bp isn't yet in progress and it may be blocked 2288150741Struckman * for a long time waiting on snaplk, back it out of 2289150741Struckman * runningbufspace, possibly waking other threads waiting for space. 2290150741Struckman */ 2291158260Stegge saved_runningbufspace = bp->b_runningbufspace; 2292158260Stegge if (saved_runningbufspace != 0) 2293158260Stegge runningbufwakeup(bp); 2294150741Struckman /* 2295107848Smckusick * Not in the precomputed list, so check the snapshots. 2296107848Smckusick */ 2297175635Sattilio while (lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 2298175635Sattilio VI_MTX(devvp)) != 0) { 2299151177Stegge VI_LOCK(devvp); 2300151177Stegge sn = devvp->v_rdev->si_snapdata; 2301151177Stegge if (sn == NULL || 2302168353Sdelphij TAILQ_EMPTY(&sn->sn_head)) { 2303151177Stegge VI_UNLOCK(devvp); 2304158260Stegge if (saved_runningbufspace != 0) { 2305158260Stegge bp->b_runningbufspace = saved_runningbufspace; 2306189595Sjhb atomic_add_long(&runningbufspace, 2307151177Stegge bp->b_runningbufspace); 2308158260Stegge } 2309151177Stegge return (0); /* Snapshot gone */ 2310151177Stegge } 2311151177Stegge } 2312135138Sphk TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 231362976Smckusick vp = ITOV(ip); 2314207742Sjeff if (DOINGSOFTDEP(vp)) 2315207742Sjeff softdep_prealloc(vp, MNT_WAIT); 231662976Smckusick /* 231762976Smckusick * We ensure that everything of our own that needs to be 231862976Smckusick * copied will be done at the time that ffs_snapshot is 231962976Smckusick * called. Thus we can skip the check here which can 232076132Sphk * deadlock in doing the lookup in UFS_BALLOC. 232162976Smckusick */ 232262976Smckusick if (bp->b_vp == vp) 232362976Smckusick continue; 232462976Smckusick /* 2325105670Smckusick * Check to see if block needs to be copied. We do not have 2326105670Smckusick * to hold the snapshot lock while doing this lookup as it 2327105670Smckusick * will never require any additional allocations for the 2328105670Smckusick * snapshot inode. 232962976Smckusick */ 233062976Smckusick if (lbn < NDADDR) { 233198542Smckusick blkno = DIP(ip, i_db[lbn]); 233262976Smckusick } else { 2333150741Struckman td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF; 233476132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2335105191Smckusick fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 2336121443Sjhb td->td_pflags &= ~TDP_COWINPROGRESS; 2337105191Smckusick if (error) 2338105191Smckusick break; 233962976Smckusick indiroff = (lbn - NDADDR) % NINDIR(fs); 234098542Smckusick if (ip->i_ump->um_fstype == UFS1) 234198542Smckusick blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; 234298542Smckusick else 234398542Smckusick blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; 234463788Smckusick bqrelse(ibp); 234562976Smckusick } 2346173464Sobrien#ifdef INVARIANTS 234762976Smckusick if (blkno == BLK_SNAP && bp->b_lblkno >= 0) 234862976Smckusick panic("ffs_copyonwrite: bad copy block"); 234962976Smckusick#endif 2350105191Smckusick if (blkno != 0) 235162976Smckusick continue; 235262976Smckusick /* 2353105670Smckusick * Allocate the block into which to do the copy. Since 2354105670Smckusick * multiple processes may all try to copy the same block, 2355105670Smckusick * we have to recheck our need to do a copy if we sleep 2356105670Smckusick * waiting for the lock. 2357105670Smckusick * 2358105670Smckusick * Because all snapshots on a filesystem share a single 2359105670Smckusick * lock, we ensure that we will never be in competition 2360105670Smckusick * with another process to allocate a block. 236162976Smckusick */ 2362150741Struckman td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF; 236376132Sphk error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2364105191Smckusick fs->fs_bsize, KERNCRED, 0, &cbp); 2365121443Sjhb td->td_pflags &= ~TDP_COWINPROGRESS; 2366105191Smckusick if (error) 2367105191Smckusick break; 236862976Smckusick#ifdef DEBUG 236962976Smckusick if (snapdebug) { 2370241011Smdf printf("Copyonwrite: snapino %ju lbn %jd for ", 2371241011Smdf (uintmax_t)ip->i_number, (intmax_t)lbn); 237273942Smckusick if (bp->b_vp == devvp) 237362976Smckusick printf("fs metadata"); 237462976Smckusick else 2375241011Smdf printf("inum %ju", 2376241011Smdf (uintmax_t)VTOI(bp->b_vp)->i_number); 237798687Smux printf(" lblkno %jd to blkno %jd\n", 237898542Smckusick (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno); 237962976Smckusick } 238062976Smckusick#endif 238162976Smckusick /* 238262976Smckusick * If we have already read the old block contents, then 238375943Smckusick * simply copy them to the new block. Note that we need 238475943Smckusick * to synchronously write snapshots that have not been 238575943Smckusick * unlinked, and hence will be visible after a crash, 2386223127Smckusick * to ensure their integrity. At a minimum we ensure the 2387223127Smckusick * integrity of the filesystem metadata, but use the 2388223127Smckusick * dopersistence sysctl-setable flag to decide on the 2389223127Smckusick * persistence needed for file content data. 239062976Smckusick */ 2391297791Spfg if (savedcbp != NULL) { 239262976Smckusick bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 239362976Smckusick bawrite(cbp); 2394223127Smckusick if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR || 2395223127Smckusick dopersistence) && ip->i_effnlink > 0) 2396233438Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT); 2397150760Struckman else 2398150760Struckman launched_async_io = 1; 239962976Smckusick continue; 240062976Smckusick } 240162976Smckusick /* 240262976Smckusick * Otherwise, read the old block contents into the buffer. 240362976Smckusick */ 2404135138Sphk if ((error = readblock(vp, cbp, lbn)) != 0) { 240575943Smckusick bzero(cbp->b_data, fs->fs_bsize); 240675943Smckusick bawrite(cbp); 2407223127Smckusick if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR || 2408223127Smckusick dopersistence) && ip->i_effnlink > 0) 2409233438Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT); 2410150760Struckman else 2411150760Struckman launched_async_io = 1; 241262976Smckusick break; 241375943Smckusick } 241462976Smckusick savedcbp = cbp; 241562976Smckusick } 241675943Smckusick /* 241775943Smckusick * Note that we need to synchronously write snapshots that 241875943Smckusick * have not been unlinked, and hence will be visible after 2419223127Smckusick * a crash, to ensure their integrity. At a minimum we 2420223127Smckusick * ensure the integrity of the filesystem metadata, but 2421223127Smckusick * use the dopersistence sysctl-setable flag to decide on 2422223127Smckusick * the persistence needed for file content data. 242375943Smckusick */ 242475943Smckusick if (savedcbp) { 242575943Smckusick vp = savedcbp->b_vp; 242662976Smckusick bawrite(savedcbp); 2427223127Smckusick if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR || 2428223127Smckusick dopersistence) && VTOI(vp)->i_effnlink > 0) 2429233438Smckusick (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT); 2430150760Struckman else 2431150760Struckman launched_async_io = 1; 243275943Smckusick } 2433175635Sattilio lockmgr(vp->v_vnlock, LK_RELEASE, NULL); 2434151177Stegge td->td_pflags = (td->td_pflags & ~TDP_NORUNNINGBUF) | 2435151177Stegge prev_norunningbuf; 2436150791Struckman if (launched_async_io && (td->td_pflags & TDP_NORUNNINGBUF) == 0) 2437150760Struckman waitrunningbufspace(); 2438150741Struckman /* 2439150741Struckman * I/O on bp will now be started, so count it in runningbufspace. 2440150741Struckman */ 2441158260Stegge if (saved_runningbufspace != 0) { 2442158260Stegge bp->b_runningbufspace = saved_runningbufspace; 2443189595Sjhb atomic_add_long(&runningbufspace, bp->b_runningbufspace); 2444158260Stegge } 244562976Smckusick return (error); 244662976Smckusick} 244762976Smckusick 244862976Smckusick/* 2449223020Smckusick * sync snapshots to force freework records waiting on snapshots to claim 2450223020Smckusick * blocks to free. 2451223020Smckusick */ 2452223020Smckusickvoid 2453223020Smckusickffs_sync_snap(mp, waitfor) 2454223020Smckusick struct mount *mp; 2455223020Smckusick int waitfor; 2456223020Smckusick{ 2457223020Smckusick struct snapdata *sn; 2458223020Smckusick struct vnode *devvp; 2459223020Smckusick struct vnode *vp; 2460223020Smckusick struct inode *ip; 2461223020Smckusick 2462223020Smckusick devvp = VFSTOUFS(mp)->um_devvp; 2463223020Smckusick if ((devvp->v_vflag & VV_COPYONWRITE) == 0) 2464223020Smckusick return; 2465223020Smckusick for (;;) { 2466223020Smckusick VI_LOCK(devvp); 2467223020Smckusick sn = devvp->v_rdev->si_snapdata; 2468223020Smckusick if (sn == NULL) { 2469223020Smckusick VI_UNLOCK(devvp); 2470223020Smckusick return; 2471223020Smckusick } 2472223020Smckusick if (lockmgr(&sn->sn_lock, 2473223020Smckusick LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, 2474223020Smckusick VI_MTX(devvp)) == 0) 2475223020Smckusick break; 2476223020Smckusick } 2477223020Smckusick TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { 2478223020Smckusick vp = ITOV(ip); 2479233438Smckusick ffs_syncvnode(vp, waitfor, NO_INO_UPDT); 2480223020Smckusick } 2481223020Smckusick lockmgr(&sn->sn_lock, LK_RELEASE, NULL); 2482223020Smckusick} 2483223020Smckusick 2484223020Smckusick/* 248562976Smckusick * Read the specified block into the given buffer. 248662976Smckusick * Much of this boiler-plate comes from bwrite(). 248762976Smckusick */ 248862976Smckusickstatic int 2489135138Sphkreadblock(vp, bp, lbn) 2490135138Sphk struct vnode *vp; 249162976Smckusick struct buf *bp; 249298542Smckusick ufs2_daddr_t lbn; 249362976Smckusick{ 2494135138Sphk struct inode *ip = VTOI(vp); 2495137035Sphk struct bio *bip; 249662976Smckusick 2497137035Sphk bip = g_alloc_bio(); 2498137035Sphk bip->bio_cmd = BIO_READ; 2499137035Sphk bip->bio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn))); 2500137035Sphk bip->bio_data = bp->b_data; 2501137035Sphk bip->bio_length = bp->b_bcount; 2502158308Spjd bip->bio_done = NULL; 2503137035Sphk 2504137035Sphk g_io_request(bip, ip->i_devvp->v_bufobj.bo_private); 2505158308Spjd bp->b_error = biowait(bip, "snaprdb"); 2506137035Sphk g_destroy_bio(bip); 2507137035Sphk return (bp->b_error); 250862976Smckusick} 2509154065Simp 2510183073Skib#endif 2511183073Skib 2512156560Stegge/* 2513156560Stegge * Process file deletes that were deferred by ufs_inactive() due to 2514163194Skib * the file system being suspended. Transfer IN_LAZYACCESS into 2515163194Skib * IN_MODIFIED for vnodes that were accessed during suspension. 2516156560Stegge */ 2517183073Skibvoid 2518156560Steggeprocess_deferred_inactive(struct mount *mp) 2519156560Stegge{ 2520156560Stegge struct vnode *vp, *mvp; 2521163194Skib struct inode *ip; 2522156560Stegge struct thread *td; 2523156560Stegge int error; 2524156560Stegge 2525156560Stegge td = curthread; 2526156560Stegge (void) vn_start_secondary_write(NULL, &mp, V_WAIT); 2527156560Stegge loop: 2528234386Smckusick MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 2529163194Skib /* 2530163194Skib * IN_LAZYACCESS is checked here without holding any 2531163194Skib * vnode lock, but this flag is set only while holding 2532163194Skib * vnode interlock. 2533163194Skib */ 2534234386Smckusick if (vp->v_type == VNON || 2535163194Skib ((VTOI(vp)->i_flag & IN_LAZYACCESS) == 0 && 2536234386Smckusick ((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0))) { 2537156560Stegge VI_UNLOCK(vp); 2538156560Stegge continue; 2539156560Stegge } 2540156560Stegge vholdl(vp); 2541175202Sattilio error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK); 2542156560Stegge if (error != 0) { 2543156560Stegge vdrop(vp); 2544156560Stegge if (error == ENOENT) 2545156560Stegge continue; /* vnode recycled */ 2546234386Smckusick MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 2547156560Stegge goto loop; 2548156560Stegge } 2549163194Skib ip = VTOI(vp); 2550163194Skib if ((ip->i_flag & IN_LAZYACCESS) != 0) { 2551163194Skib ip->i_flag &= ~IN_LAZYACCESS; 2552163194Skib ip->i_flag |= IN_MODIFIED; 2553163194Skib } 2554156560Stegge VI_LOCK(vp); 2555163194Skib if ((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0) { 2556156560Stegge VI_UNLOCK(vp); 2557175294Sattilio VOP_UNLOCK(vp, 0); 2558156560Stegge vdrop(vp); 2559156560Stegge continue; 2560156560Stegge } 2561234158Smckusick vinactive(vp, td); 2562156560Stegge VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, 2563156560Stegge ("process_deferred_inactive: got VI_OWEINACT")); 2564156560Stegge VI_UNLOCK(vp); 2565175294Sattilio VOP_UNLOCK(vp, 0); 2566156560Stegge vdrop(vp); 2567156560Stegge } 2568156560Stegge vn_finished_secondary_write(mp); 2569156560Stegge} 2570158259Stegge 2571183073Skib#ifndef NO_FFS_SNAPSHOT 2572183073Skib 2573177778Sjeffstatic struct snapdata * 2574177778Sjeffffs_snapdata_alloc(void) 2575177778Sjeff{ 2576177778Sjeff struct snapdata *sn; 2577177778Sjeff 2578177778Sjeff /* 2579177778Sjeff * Fetch a snapdata from the free list if there is one available. 2580177778Sjeff */ 2581177778Sjeff mtx_lock(&snapfree_lock); 2582177778Sjeff sn = LIST_FIRST(&snapfree); 2583177778Sjeff if (sn != NULL) 2584177778Sjeff LIST_REMOVE(sn, sn_link); 2585177778Sjeff mtx_unlock(&snapfree_lock); 2586177778Sjeff if (sn != NULL) 2587177778Sjeff return (sn); 2588177778Sjeff /* 2589177778Sjeff * If there were no free snapdatas allocate one. 2590177778Sjeff */ 2591177778Sjeff sn = malloc(sizeof *sn, M_UFSMNT, M_WAITOK | M_ZERO); 2592177778Sjeff TAILQ_INIT(&sn->sn_head); 2593177778Sjeff lockinit(&sn->sn_lock, PVFS, "snaplk", VLKTIMEOUT, 2594177778Sjeff LK_CANRECURSE | LK_NOSHARE); 2595177778Sjeff return (sn); 2596177778Sjeff} 2597177778Sjeff 2598177778Sjeff/* 2599177778Sjeff * The snapdata is never freed because we can not be certain that 2600177778Sjeff * there are no threads sleeping on the snap lock. Persisting 2601177778Sjeff * them permanently avoids costly synchronization in ffs_lock(). 2602177778Sjeff */ 2603177778Sjeffstatic void 2604177778Sjeffffs_snapdata_free(struct snapdata *sn) 2605177778Sjeff{ 2606177778Sjeff mtx_lock(&snapfree_lock); 2607177778Sjeff LIST_INSERT_HEAD(&snapfree, sn, sn_link); 2608177778Sjeff mtx_unlock(&snapfree_lock); 2609177778Sjeff} 2610177778Sjeff 2611158259Stegge/* Try to free snapdata associated with devvp */ 2612158259Steggestatic void 2613177778Sjefftry_free_snapdata(struct vnode *devvp) 2614158259Stegge{ 2615158259Stegge struct snapdata *sn; 2616158259Stegge ufs2_daddr_t *snapblklist; 2617158259Stegge 2618177778Sjeff ASSERT_VI_LOCKED(devvp, "try_free_snapdata"); 2619158259Stegge sn = devvp->v_rdev->si_snapdata; 2620158259Stegge 2621158259Stegge if (sn == NULL || TAILQ_FIRST(&sn->sn_head) != NULL || 2622158259Stegge (devvp->v_vflag & VV_COPYONWRITE) == 0) { 2623158259Stegge VI_UNLOCK(devvp); 2624158259Stegge return; 2625158259Stegge } 2626158259Stegge 2627158259Stegge devvp->v_rdev->si_snapdata = NULL; 2628158259Stegge devvp->v_vflag &= ~VV_COPYONWRITE; 2629177778Sjeff lockmgr(&sn->sn_lock, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp)); 2630158259Stegge snapblklist = sn->sn_blklist; 2631158259Stegge sn->sn_blklist = NULL; 2632158259Stegge sn->sn_listsize = 0; 2633175635Sattilio lockmgr(&sn->sn_lock, LK_RELEASE, NULL); 2634158259Stegge if (snapblklist != NULL) 2635184205Sdes free(snapblklist, M_UFSMNT); 2636177778Sjeff ffs_snapdata_free(sn); 2637158259Stegge} 2638177778Sjeff 2639177778Sjeffstatic struct snapdata * 2640177778Sjeffffs_snapdata_acquire(struct vnode *devvp) 2641177778Sjeff{ 2642177778Sjeff struct snapdata *nsn; 2643177778Sjeff struct snapdata *sn; 2644177778Sjeff 2645177778Sjeff /* 2646262678Spfg * Allocate a free snapdata. This is done before acquiring the 2647177778Sjeff * devvp lock to avoid allocation while the devvp interlock is 2648177778Sjeff * held. 2649177778Sjeff */ 2650177778Sjeff nsn = ffs_snapdata_alloc(); 2651177778Sjeff /* 2652177778Sjeff * If there snapshots already exist on this filesystem grab a 2653177778Sjeff * reference to the shared lock. Otherwise this is the first 2654177778Sjeff * snapshot on this filesystem and we need to use our 2655177778Sjeff * pre-allocated snapdata. 2656177778Sjeff */ 2657177778Sjeff VI_LOCK(devvp); 2658177778Sjeff if (devvp->v_rdev->si_snapdata == NULL) { 2659177778Sjeff devvp->v_rdev->si_snapdata = nsn; 2660177778Sjeff nsn = NULL; 2661177778Sjeff } 2662177778Sjeff sn = devvp->v_rdev->si_snapdata; 2663177778Sjeff /* 2664177778Sjeff * Acquire the snapshot lock. 2665177778Sjeff */ 2666177778Sjeff lockmgr(&sn->sn_lock, 2667177778Sjeff LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, VI_MTX(devvp)); 2668177778Sjeff /* 2669177778Sjeff * Free any unused snapdata. 2670177778Sjeff */ 2671177778Sjeff if (nsn != NULL) 2672177778Sjeff ffs_snapdata_free(nsn); 2673177778Sjeff 2674177778Sjeff return (sn); 2675177778Sjeff} 2676177778Sjeff 2677154065Simp#endif 2678