Cross Reference: /freebsd-9.3-release/sys/ufs/ffs/ffs

Deleted Added

sdiff udiff text old ( 113872 ) new ( 114293 )

full compact

ffs_snapshot.c (113872)	ffs_snapshot.c (114293)
1/* 2 * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. 3 * 4 * Further information about snapshots can be obtained from: 5 * 6 * Marshall Kirk McKusick http://www.mckusick.com/softdep/ 7 * 1614 Oxford Street mckusick@mckusick.com 8 * Berkeley, CA 94709-1608 +1-510-843-9542 9 * USA 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY 22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR 25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00	1/* 2 * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. 3 * 4 * Further information about snapshots can be obtained from: 5 * 6 * Marshall Kirk McKusick http://www.mckusick.com/softdep/ 7 * 1614 Oxford Street mckusick@mckusick.com 8 * Berkeley, CA 94709-1608 +1-510-843-9542 9 * USA 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY 22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR 25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00
34 * $FreeBSD: head/sys/ufs/ffs/ffs_snapshot.c 113872 2003-04-22 20:45:38Z jhb $	34 * $FreeBSD: head/sys/ufs/ffs/ffs_snapshot.c 114293 2003-04-30 12:57:40Z markm $
35 / 36 37#include <sys/param.h> 38#include <sys/kernel.h> 39#include <sys/systm.h> 40#include <sys/conf.h> 41#include <sys/bio.h> 42#include <sys/buf.h> 43#include <sys/proc.h> 44#include <sys/namei.h> 45#include <sys/sched.h> 46#include <sys/stat.h> 47#include <sys/malloc.h> 48#include <sys/mount.h> 49#include <sys/resource.h> 50#include <sys/resourcevar.h> 51#include <sys/vnode.h> 52 53#include <ufs/ufs/extattr.h> 54#include <ufs/ufs/quota.h> 55#include <ufs/ufs/ufsmount.h> 56#include <ufs/ufs/inode.h> 57#include <ufs/ufs/ufs_extern.h> 58 59#include <ufs/ffs/fs.h> 60#include <ufs/ffs/ffs_extern.h> 61 62#define KERNCRED thread0.td_ucred 63#define DEBUG 1 64 65static int cgaccount(int, struct vnode , struct buf , int); 66static int expunge_ufs1(struct vnode , struct inode , struct fs , 67 int ()(struct vnode , ufs1_daddr_t , ufs1_daddr_t , struct fs , 68 ufs_lbn_t, int), int); 69static int indiracct_ufs1(struct vnode , struct vnode , int, 70 ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs , 71 int ()(struct vnode , ufs1_daddr_t , ufs1_daddr_t , struct fs , 72 ufs_lbn_t, int), int); 73static int fullacct_ufs1(struct vnode , ufs1_daddr_t , ufs1_daddr_t , 74 struct fs , ufs_lbn_t, int); 75static int snapacct_ufs1(struct vnode , ufs1_daddr_t , ufs1_daddr_t , 76 struct fs , ufs_lbn_t, int); 77static int mapacct_ufs1(struct vnode , ufs1_daddr_t , ufs1_daddr_t , 78 struct fs , ufs_lbn_t, int); 79static int expunge_ufs2(struct vnode , struct inode , struct fs , 80 int ()(struct vnode , ufs2_daddr_t , ufs2_daddr_t , struct fs , 81 ufs_lbn_t, int), int); 82static int indiracct_ufs2(struct vnode , struct vnode , int, 83 ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs , 84 int ()(struct vnode , ufs2_daddr_t , ufs2_daddr_t , struct fs , 85 ufs_lbn_t, int), int); 86static int fullacct_ufs2(struct vnode , ufs2_daddr_t , ufs2_daddr_t , 87 struct fs , ufs_lbn_t, int); 88static int snapacct_ufs2(struct vnode , ufs2_daddr_t , ufs2_daddr_t , 89 struct fs , ufs_lbn_t, int); 90static int mapacct_ufs2(struct vnode , ufs2_daddr_t , ufs2_daddr_t , 91 struct fs , ufs_lbn_t, int); 92static int ffs_copyonwrite(struct vnode , struct buf ); 93static int readblock(struct buf , ufs2_daddr_t); 94 95/* 96 * To ensure the consistency of snapshots across crashes, we must 97 * synchronously write out copied blocks before allowing the 98 * originals to be modified. Because of the rather severe speed 99 * penalty that this imposes, the following flag allows this 100 * crash persistence to be disabled. 101 / 102int dopersistence = 0; 103* 104#ifdef DEBUG 105#include <sys/sysctl.h> 106SYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, "");	35 / 36 37#include <sys/param.h> 38#include <sys/kernel.h> 39#include <sys/systm.h> 40#include <sys/conf.h> 41#include <sys/bio.h> 42#include <sys/buf.h> 43#include <sys/proc.h> 44#include <sys/namei.h> 45#include <sys/sched.h> 46#include <sys/stat.h> 47#include <sys/malloc.h> 48#include <sys/mount.h> 49#include <sys/resource.h> 50#include <sys/resourcevar.h> 51#include <sys/vnode.h> 52 53#include <ufs/ufs/extattr.h> 54#include <ufs/ufs/quota.h> 55#include <ufs/ufs/ufsmount.h> 56#include <ufs/ufs/inode.h> 57#include <ufs/ufs/ufs_extern.h> 58 59#include <ufs/ffs/fs.h> 60#include <ufs/ffs/ffs_extern.h> 61 62#define KERNCRED thread0.td_ucred 63#define DEBUG 1 64 65static int cgaccount(int, struct vnode , struct buf , int); 66static int expunge_ufs1(struct vnode , struct inode , struct fs , 67 int ()(struct vnode , ufs1_daddr_t , ufs1_daddr_t , struct fs , 68 ufs_lbn_t, int), int); 69static int indiracct_ufs1(struct vnode , struct vnode , int, 70 ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs , 71 int ()(struct vnode , ufs1_daddr_t , ufs1_daddr_t , struct fs , 72 ufs_lbn_t, int), int); 73static int fullacct_ufs1(struct vnode , ufs1_daddr_t , ufs1_daddr_t , 74 struct fs , ufs_lbn_t, int); 75static int snapacct_ufs1(struct vnode , ufs1_daddr_t , ufs1_daddr_t , 76 struct fs , ufs_lbn_t, int); 77static int mapacct_ufs1(struct vnode , ufs1_daddr_t , ufs1_daddr_t , 78 struct fs , ufs_lbn_t, int); 79static int expunge_ufs2(struct vnode , struct inode , struct fs , 80 int ()(struct vnode , ufs2_daddr_t , ufs2_daddr_t , struct fs , 81 ufs_lbn_t, int), int); 82static int indiracct_ufs2(struct vnode , struct vnode , int, 83 ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs , 84 int ()(struct vnode , ufs2_daddr_t , ufs2_daddr_t , struct fs , 85 ufs_lbn_t, int), int); 86static int fullacct_ufs2(struct vnode , ufs2_daddr_t , ufs2_daddr_t , 87 struct fs , ufs_lbn_t, int); 88static int snapacct_ufs2(struct vnode , ufs2_daddr_t , ufs2_daddr_t , 89 struct fs , ufs_lbn_t, int); 90static int mapacct_ufs2(struct vnode , ufs2_daddr_t , ufs2_daddr_t , 91 struct fs , ufs_lbn_t, int); 92static int ffs_copyonwrite(struct vnode , struct buf ); 93static int readblock(struct buf , ufs2_daddr_t); 94 95/* 96 * To ensure the consistency of snapshots across crashes, we must 97 * synchronously write out copied blocks before allowing the 98 * originals to be modified. Because of the rather severe speed 99 * penalty that this imposes, the following flag allows this 100 * crash persistence to be disabled. 101 / 102int dopersistence = 0; 103* 104#ifdef DEBUG 105#include <sys/sysctl.h> 106SYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, "");
107int snapdebug = 0;	107static int snapdebug = 0;
108SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, ""); 109int collectsnapstats = 0; 110SYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats, 111 0, ""); 112#endif /* DEBUG / 113* 114/* 115 * Create a snapshot file and initialize it for the filesystem. 116 / 117int 118ffs_snapshot(mp, snapfile) 119* struct mount mp; 120* char snapfile; 121{ 122* ufs2_daddr_t numblks, blkno, blkp, snapblklist; 123 int error, cg, snaploc; 124 int i, size, len, loc; 125 int flag = mp->mnt_flag; 126 struct timespec starttime = {0, 0}, endtime; 127 char saved_nice = 0; 128 long redo = 0, snaplistsize = 0; 129 int32_t lp; 130* void space; 131* struct fs copy_fs = NULL, fs = VFSTOUFS(mp)->um_fs; 132 struct snaphead snaphead; 133* struct thread td = curthread; 134* struct inode ip, xp; 135 struct buf bp, nbp, ibp, sbp = NULL; 136 struct nameidata nd; 137 struct mount wrtmp; 138* struct vattr vat; 139 struct vnode vp, xvp, nvp, devvp; 140 struct uio auio; 141 struct iovec aiov; 142 143 /* 144 * Need to serialize access to snapshot code per filesystem. 145 / 146* /* 147 * Assign a snapshot slot in the superblock. 148 / 149* for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 150 if (fs->fs_snapinum[snaploc] == 0) 151 break; 152 if (snaploc == FSMAXSNAP) 153 return (ENOSPC); 154 /* 155 * Create the snapshot file. 156 / 157restart: 158* NDINIT(&nd, CREATE, LOCKPARENT \| LOCKLEAF, UIO_USERSPACE, snapfile, td); 159 if ((error = namei(&nd)) != 0) 160 return (error); 161 if (nd.ni_vp != NULL) { 162 vput(nd.ni_vp); 163 error = EEXIST; 164 } 165 if (nd.ni_dvp->v_mount != mp) 166 error = EXDEV; 167 if (error) { 168 NDFREE(&nd, NDF_ONLY_PNBUF); 169 if (nd.ni_dvp == nd.ni_vp) 170 vrele(nd.ni_dvp); 171 else 172 vput(nd.ni_dvp); 173 return (error); 174 } 175 VATTR_NULL(&vat); 176 vat.va_type = VREG; 177 vat.va_mode = S_IRUSR; 178 vat.va_vaflags \|= VA_EXCLUSIVE; 179 if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp)) 180 wrtmp = NULL; 181 if (wrtmp != mp) 182 panic("ffs_snapshot: mount mismatch"); 183 if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) { 184 NDFREE(&nd, NDF_ONLY_PNBUF); 185 vput(nd.ni_dvp); 186 if ((error = vn_start_write(NULL, &wrtmp, 187 V_XSLEEP \| PCATCH)) != 0) 188 return (error); 189 goto restart; 190 } 191 VOP_LEASE(nd.ni_dvp, td, KERNCRED, LEASE_WRITE); 192 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat); 193 vput(nd.ni_dvp); 194 if (error) { 195 NDFREE(&nd, NDF_ONLY_PNBUF); 196 vn_finished_write(wrtmp); 197 return (error); 198 } 199 vp = nd.ni_vp; 200 ip = VTOI(vp); 201 devvp = ip->i_devvp; 202 /* 203 * Allocate and copy the last block contents so as to be able 204 * to set size to that of the filesystem. 205 / 206* numblks = howmany(fs->fs_size, fs->fs_frag); 207 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 208 fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 209 if (error) 210 goto out; 211 ip->i_size = lblktosize(fs, (off_t)numblks); 212 DIP(ip, i_size) = ip->i_size; 213 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 214 if ((error = readblock(bp, numblks - 1)) != 0) 215 goto out; 216 bawrite(bp); 217 /* 218 * Preallocate critical data structures so that we can copy 219 * them in without further allocation after we suspend all 220 * operations on the filesystem. We would like to just release 221 * the allocated buffers without writing them since they will 222 * be filled in below once we are ready to go, but this upsets 223 * the soft update code, so we go ahead and write the new buffers. 224 * 225 * Allocate all indirect blocks and mark all of them as not 226 * needing to be copied. 227 / 228* for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 229 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 230 fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp); 231 if (error) 232 goto out; 233 bawrite(ibp); 234 } 235 /* 236 * Allocate copies for the superblock and its summary information. 237 / 238* error = UFS_BALLOC(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED, 239 0, &nbp); 240 if (error) 241 goto out; 242 bawrite(nbp); 243 blkno = fragstoblks(fs, fs->fs_csaddr); 244 len = howmany(fs->fs_cssize, fs->fs_bsize); 245 for (loc = 0; loc < len; loc++) { 246 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)), 247 fs->fs_bsize, KERNCRED, 0, &nbp); 248 if (error) 249 goto out; 250 bawrite(nbp); 251 } 252 /* 253 * Allocate all cylinder group blocks. 254 / 255* for (cg = 0; cg < fs->fs_ncg; cg++) { 256 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 257 fs->fs_bsize, KERNCRED, 0, &nbp); 258 if (error) 259 goto out; 260 bawrite(nbp); 261 } 262 /* 263 * Copy all the cylinder group maps. Although the 264 * filesystem is still active, we hope that only a few 265 * cylinder groups will change between now and when we 266 * suspend operations. Thus, we will be able to quickly 267 * touch up the few cylinder groups that changed during 268 * the suspension period. 269 / 270* len = howmany(fs->fs_ncg, NBBY); 271 MALLOC(fs->fs_active, int , len, M_DEVBUF, M_WAITOK); 272* bzero(fs->fs_active, len); 273 for (cg = 0; cg < fs->fs_ncg; cg++) { 274 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 275 fs->fs_bsize, KERNCRED, 0, &nbp); 276 if (error) 277 goto out; 278 error = cgaccount(cg, vp, nbp, 1); 279 bawrite(nbp); 280 if (error) 281 goto out; 282 } 283 /* 284 * Change inode to snapshot type file. 285 / 286* ip->i_flags \|= SF_SNAPSHOT; 287 DIP(ip, i_flags) = ip->i_flags; 288 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 289 /* 290 * Ensure that the snapshot is completely on disk. 291 * Since we have marked it as a snapshot it is safe to 292 * unlock it as no process will be allowed to write to it. 293 / 294* if ((error = VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td)) != 0) 295 goto out; 296 VOP_UNLOCK(vp, 0, td); 297 /* 298 * All allocations are done, so we can now snapshot the system. 299 * 300 * Recind nice scheduling while running with the filesystem suspended. 301 / 302* if (td->td_ksegrp->kg_nice > 0) { 303 PROC_LOCK(td->td_proc); 304 mtx_lock_spin(&sched_lock); 305 saved_nice = td->td_ksegrp->kg_nice; 306 sched_nice(td->td_ksegrp, 0); 307 mtx_unlock_spin(&sched_lock); 308 PROC_UNLOCK(td->td_proc); 309 } 310 /* 311 * Suspend operation on filesystem. 312 / 313* for (;;) { 314 vn_finished_write(wrtmp); 315 if ((error = vfs_write_suspend(vp->v_mount)) != 0) { 316 vn_start_write(NULL, &wrtmp, V_WAIT); 317 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 318 goto out; 319 } 320 if (mp->mnt_kern_flag & MNTK_SUSPENDED) 321 break; 322 vn_start_write(NULL, &wrtmp, V_WAIT); 323 } 324 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 325 if (collectsnapstats) 326 nanotime(&starttime); 327 /* 328 * First, copy all the cylinder group maps that have changed. 329 / 330* for (cg = 0; cg < fs->fs_ncg; cg++) { 331 if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0) 332 continue; 333 redo++; 334 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 335 fs->fs_bsize, KERNCRED, 0, &nbp); 336 if (error) 337 goto out1; 338 error = cgaccount(cg, vp, nbp, 2); 339 bawrite(nbp); 340 if (error) 341 goto out1; 342 } 343 /* 344 * Grab a copy of the superblock and its summary information. 345 * We delay writing it until the suspension is released below. 346 / 347* error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize, 348 KERNCRED, &sbp); 349 if (error) { 350 brelse(sbp); 351 sbp = NULL; 352 goto out1; 353 } 354 loc = blkoff(fs, fs->fs_sblockloc); 355 copy_fs = (struct fs )(sbp->b_data + loc); 356* bcopy(fs, copy_fs, fs->fs_sbsize); 357 if ((fs->fs_flags & (FS_UNCLEAN \| FS_NEEDSFSCK)) == 0) 358 copy_fs->fs_clean = 1; 359 size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE; 360 if (fs->fs_sbsize < size) 361 bzero(&sbp->b_data[loc + fs->fs_sbsize], size - fs->fs_sbsize); 362 size = blkroundup(fs, fs->fs_cssize); 363 if (fs->fs_contigsumsize > 0) 364 size += fs->fs_ncg * sizeof(int32_t); 365 space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 366 copy_fs->fs_csp = space; 367 bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize); 368 (char )space += fs->fs_cssize; 369* loc = howmany(fs->fs_cssize, fs->fs_fsize); 370 i = fs->fs_frag - loc % fs->fs_frag; 371 len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize; 372 if (len > 0) { 373 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc), 374 len, KERNCRED, &bp)) != 0) { 375 brelse(bp); 376 free(copy_fs->fs_csp, M_UFSMNT); 377 bawrite(sbp); 378 sbp = NULL; 379 goto out1; 380 } 381 bcopy(bp->b_data, space, (u_int)len); 382 (char )space += len; 383* bp->b_flags \|= B_INVAL \| B_NOCACHE; 384 brelse(bp); 385 } 386 if (fs->fs_contigsumsize > 0) { 387 copy_fs->fs_maxcluster = lp = space; 388 for (i = 0; i < fs->fs_ncg; i++) 389 lp++ = fs->fs_contigsumsize; 390* } 391 /* 392 * We must check for active files that have been unlinked 393 * (e.g., with a zero link count). We have to expunge all 394 * trace of these files from the snapshot so that they are 395 * not reclaimed prematurely by fsck or unnecessarily dumped. 396 * We turn off the MNTK_SUSPENDED flag to avoid a panic from 397 * spec_strategy about writing on a suspended filesystem. 398 * Note that we skip unlinked snapshot files as they will 399 * be handled separately below. 400 * 401 * We also calculate the needed size for the snapshot list. 402 / 403* snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) + 404 FSMAXSNAP + 1 /* superblock / + 1 / last block / + 1 / size /; 405* mp->mnt_kern_flag &= ~MNTK_SUSPENDED; 406 mtx_lock(&mntvnode_mtx); 407loop: 408 for (xvp = TAILQ_FIRST(&mp->mnt_nvnodelist); xvp; xvp = nvp) { 409 /* 410 * Make sure this vnode wasn't reclaimed in getnewvnode(). 411 * Start over if it has (it won't be on the list anymore). 412 / 413* if (xvp->v_mount != mp) 414 goto loop; 415 nvp = TAILQ_NEXT(xvp, v_nmntvnodes); 416 mtx_unlock(&mntvnode_mtx); 417 mp_fixme("Unlocked GETATTR."); 418 if (vrefcnt(xvp) == 0 \|\| xvp->v_type == VNON \|\| 419 (VTOI(xvp)->i_flags & SF_SNAPSHOT) \|\| 420 (VOP_GETATTR(xvp, &vat, td->td_ucred, td) == 0 && 421 vat.va_nlink > 0)) { 422 mtx_lock(&mntvnode_mtx); 423 continue; 424 } 425 if (snapdebug) 426 vprint("ffs_snapshot: busy vnode", xvp); 427 if (vn_lock(xvp, LK_EXCLUSIVE, td) != 0) 428 goto loop; 429 xp = VTOI(xvp); 430 if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) { 431 VOP_UNLOCK(xvp, 0, td); 432 continue; 433 } 434 /* 435 * If there is a fragment, clear it here. 436 / 437* blkno = 0; 438 loc = howmany(xp->i_size, fs->fs_bsize) - 1; 439 if (loc < NDADDR) { 440 len = fragroundup(fs, blkoff(fs, xp->i_size)); 441 if (len < fs->fs_bsize) { 442 ffs_blkfree(copy_fs, vp, DIP(xp, i_db[loc]), 443 len, xp->i_number); 444 blkno = DIP(xp, i_db[loc]); 445 DIP(xp, i_db[loc]) = 0; 446 } 447 } 448 snaplistsize += 1; 449 if (xp->i_ump->um_fstype == UFS1) 450 error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, 451 BLK_NOCOPY); 452 else 453 error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, 454 BLK_NOCOPY); 455 if (blkno) 456 DIP(xp, i_db[loc]) = blkno; 457 if (!error) 458 error = ffs_freefile(copy_fs, vp, xp->i_number, 459 xp->i_mode); 460 VOP_UNLOCK(xvp, 0, td); 461 if (error) { 462 free(copy_fs->fs_csp, M_UFSMNT); 463 bawrite(sbp); 464 sbp = NULL; 465 goto out1; 466 } 467 mtx_lock(&mntvnode_mtx); 468 } 469 mtx_unlock(&mntvnode_mtx); 470 /* 471 * If there already exist snapshots on this filesystem, grab a 472 * reference to their shared lock. If this is the first snapshot 473 * on this filesystem, we need to allocate a lock for the snapshots 474 * to share. In either case, acquire the snapshot lock and give 475 * up our original private lock. 476 / 477* VI_LOCK(devvp); 478 snaphead = &devvp->v_rdev->si_snapshots; 479 if ((xp = TAILQ_FIRST(snaphead)) != NULL) { 480 VI_LOCK(vp); 481 vp->v_vnlock = ITOV(xp)->v_vnlock; 482 VI_UNLOCK(devvp); 483 } else { 484 struct lock lkp; 485* 486 VI_UNLOCK(devvp); 487 MALLOC(lkp, struct lock , sizeof(struct lock), M_UFSMNT, 488* M_WAITOK); 489 lockinit(lkp, PVFS, "snaplk", VLKTIMEOUT, 490 LK_CANRECURSE \| LK_NOPAUSE); 491 VI_LOCK(vp); 492 vp->v_vnlock = lkp; 493 } 494 vn_lock(vp, LK_INTERLOCK \| LK_EXCLUSIVE \| LK_RETRY, td); 495 transferlockers(&vp->v_lock, vp->v_vnlock); 496 lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); 497 /* 498 * If this is the first snapshot on this filesystem, then we need 499 * to allocate the space for the list of preallocated snapshot blocks. 500 * This list will be refined below, but this preliminary one will 501 * keep us out of deadlock until the full one is ready. 502 / 503* if (xp == NULL) { 504 MALLOC(snapblklist, daddr_t , snaplistsize sizeof(daddr_t), 505 M_UFSMNT, M_WAITOK); 506 blkp = &snapblklist[1]; 507 blkp++ = lblkno(fs, fs->fs_sblockloc); 508* blkno = fragstoblks(fs, fs->fs_csaddr); 509 for (cg = 0; cg < fs->fs_ncg; cg++) { 510 if (fragstoblks(fs, cgtod(fs, cg) > blkno)) 511 break; 512 blkp++ = fragstoblks(fs, cgtod(fs, cg)); 513* } 514 len = howmany(fs->fs_cssize, fs->fs_bsize); 515 for (loc = 0; loc < len; loc++) 516 blkp++ = blkno + loc; 517* for (; cg < fs->fs_ncg; cg++) 518 blkp++ = fragstoblks(fs, cgtod(fs, cg)); 519* snapblklist[0] = blkp - snapblklist; 520 VI_LOCK(devvp); 521 if (devvp->v_rdev->si_snapblklist != NULL) 522 panic("ffs_snapshot: non-empty list"); 523 devvp->v_rdev->si_snapblklist = snapblklist; 524 devvp->v_rdev->si_snaplistsize = blkp - snapblklist; 525 VI_UNLOCK(devvp); 526 } 527 /* 528 * Record snapshot inode. Since this is the newest snapshot, 529 * it must be placed at the end of the list. 530 / 531* VI_LOCK(devvp); 532 fs->fs_snapinum[snaploc] = ip->i_number; 533 if (ip->i_nextsnap.tqe_prev != 0) 534 panic("ffs_snapshot: %d already on list", ip->i_number); 535 TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap); 536 devvp->v_rdev->si_copyonwrite = ffs_copyonwrite; 537 devvp->v_vflag \|= VV_COPYONWRITE; 538 VI_UNLOCK(devvp); 539 ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp"); 540 vp->v_vflag \|= VV_SYSTEM; 541out1: 542 /* 543 * Resume operation on filesystem. 544 / 545* vfs_write_resume(vp->v_mount); 546 vn_start_write(NULL, &wrtmp, V_WAIT); 547 if (collectsnapstats && starttime.tv_sec > 0) { 548 nanotime(&endtime); 549 timespecsub(&endtime, &starttime); 550 printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n", 551 vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec, 552 endtime.tv_nsec / 1000000, redo, fs->fs_ncg); 553 } 554 if (sbp == NULL) 555 goto out; 556 /* 557 * Copy allocation information from all the snapshots in 558 * this snapshot and then expunge them from its view. 559 / 560* snaphead = &devvp->v_rdev->si_snapshots; 561 TAILQ_FOREACH(xp, snaphead, i_nextsnap) { 562 if (xp == ip) 563 break; 564 if (xp->i_ump->um_fstype == UFS1) 565 error = expunge_ufs1(vp, xp, fs, snapacct_ufs1, 566 BLK_SNAP); 567 else 568 error = expunge_ufs2(vp, xp, fs, snapacct_ufs2, 569 BLK_SNAP); 570 if (error) { 571 fs->fs_snapinum[snaploc] = 0; 572 goto done; 573 } 574 } 575 /* 576 * Allocate space for the full list of preallocated snapshot blocks. 577 / 578* MALLOC(snapblklist, daddr_t , snaplistsize sizeof(daddr_t), 579 M_UFSMNT, M_WAITOK); 580 ip->i_snapblklist = &snapblklist[1]; 581 /* 582 * Expunge the blocks used by the snapshots from the set of 583 * blocks marked as used in the snapshot bitmaps. Also, collect 584 * the list of allocated blocks in i_snapblklist. 585 / 586* if (ip->i_ump->um_fstype == UFS1) 587 error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP); 588 else 589 error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP); 590 if (error) { 591 fs->fs_snapinum[snaploc] = 0; 592 FREE(snapblklist, M_UFSMNT); 593 goto done; 594 } 595 if (snaplistsize < ip->i_snapblklist - snapblklist) 596 panic("ffs_snapshot: list too small"); 597 snaplistsize = ip->i_snapblklist - snapblklist; 598 snapblklist[0] = snaplistsize; 599 ip->i_snapblklist = 0; 600 /* 601 * Write out the list of allocated blocks to the end of the snapshot. 602 / 603* auio.uio_iov = &aiov; 604 auio.uio_iovcnt = 1; 605 aiov.iov_base = (void )snapblklist; 606* aiov.iov_len = snaplistsize * sizeof(daddr_t); 607 auio.uio_resid = aiov.iov_len;; 608 auio.uio_offset = ip->i_size; 609 auio.uio_segflg = UIO_SYSSPACE; 610 auio.uio_rw = UIO_WRITE; 611 auio.uio_td = td; 612 if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 613 fs->fs_snapinum[snaploc] = 0; 614 FREE(snapblklist, M_UFSMNT); 615 goto done; 616 } 617 /* 618 * Write the superblock and its summary information 619 * to the snapshot. 620 / 621* blkno = fragstoblks(fs, fs->fs_csaddr); 622 len = howmany(fs->fs_cssize, fs->fs_bsize); 623 space = copy_fs->fs_csp; 624 for (loc = 0; loc < len; loc++) { 625 error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp); 626 if (error) { 627 brelse(nbp); 628 fs->fs_snapinum[snaploc] = 0; 629 FREE(snapblklist, M_UFSMNT); 630 goto done; 631 } 632 bcopy(space, nbp->b_data, fs->fs_bsize); 633 space = (char )space + fs->fs_bsize; 634* bawrite(nbp); 635 } 636 /* 637 * As this is the newest list, it is the most inclusive, so 638 * should replace the previous list. 639 / 640* VI_LOCK(devvp); 641 space = devvp->v_rdev->si_snapblklist; 642 devvp->v_rdev->si_snapblklist = snapblklist; 643 devvp->v_rdev->si_snaplistsize = snaplistsize; 644 if (space != NULL) 645 FREE(space, M_UFSMNT); 646 VI_UNLOCK(devvp); 647done: 648 free(copy_fs->fs_csp, M_UFSMNT); 649 bawrite(sbp); 650out: 651 if (saved_nice > 0) { 652 PROC_LOCK(td->td_proc); 653 mtx_lock_spin(&sched_lock); 654 sched_nice(td->td_ksegrp, saved_nice); 655 mtx_unlock_spin(&sched_lock); 656 PROC_UNLOCK(td->td_proc); 657 } 658 if (fs->fs_active != 0) { 659 FREE(fs->fs_active, M_DEVBUF); 660 fs->fs_active = 0; 661 } 662 mp->mnt_flag = flag; 663 if (error) 664 (void) UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED, td); 665 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 666 if (error) 667 vput(vp); 668 else 669 VOP_UNLOCK(vp, 0, td); 670 vn_finished_write(wrtmp); 671 return (error); 672} 673 674/* 675 * Copy a cylinder group map. All the unallocated blocks are marked 676 * BLK_NOCOPY so that the snapshot knows that it need not copy them 677 * if they are later written. If passno is one, then this is a first 678 * pass, so only setting needs to be done. If passno is 2, then this 679 * is a revision to a previous pass which must be undone as the 680 * replacement pass is done. 681 / 682static int 683cgaccount(cg, vp, nbp, passno) 684* int cg; 685 struct vnode vp; 686* struct buf nbp; 687* int passno; 688{ 689 struct buf bp, ibp; 690 struct inode ip; 691* struct cg cgp; 692* struct fs fs; 693* ufs2_daddr_t base, numblks; 694 int error, len, loc, indiroff; 695 696 ip = VTOI(vp); 697 fs = ip->i_fs; 698 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 699 (int)fs->fs_cgsize, KERNCRED, &bp); 700 if (error) { 701 brelse(bp); 702 return (error); 703 } 704 cgp = (struct cg )bp->b_data; 705* if (!cg_chkmagic(cgp)) { 706 brelse(bp); 707 return (EIO); 708 } 709 atomic_set_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 710 bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize); 711 if (fs->fs_cgsize < fs->fs_bsize) 712 bzero(&nbp->b_data[fs->fs_cgsize], 713 fs->fs_bsize - fs->fs_cgsize); 714 if (passno == 2) 715 nbp->b_flags \|= B_VALIDSUSPWRT; 716 numblks = howmany(fs->fs_size, fs->fs_frag); 717 len = howmany(fs->fs_fpg, fs->fs_frag); 718 base = cg * fs->fs_fpg / fs->fs_frag; 719 if (base + len >= numblks) 720 len = numblks - base - 1; 721 loc = 0; 722 if (base < NDADDR) { 723 for ( ; loc < NDADDR; loc++) { 724 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 725 DIP(ip, i_db[loc]) = BLK_NOCOPY; 726 else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY) 727 DIP(ip, i_db[loc]) = 0; 728 else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY) 729 panic("ffs_snapshot: lost direct block"); 730 } 731 } 732 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), 733 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 734 if (error) { 735 brelse(bp); 736 return (error); 737 } 738 indiroff = (base + loc - NDADDR) % NINDIR(fs); 739 for ( ; loc < len; loc++, indiroff++) { 740 if (indiroff >= NINDIR(fs)) { 741 if (passno == 2) 742 ibp->b_flags \|= B_VALIDSUSPWRT; 743 bawrite(ibp); 744 error = UFS_BALLOC(vp, 745 lblktosize(fs, (off_t)(base + loc)), 746 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 747 if (error) { 748 brelse(bp); 749 return (error); 750 } 751 indiroff = 0; 752 } 753 if (ip->i_ump->um_fstype == UFS1) { 754 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 755 ((ufs1_daddr_t )(ibp->b_data))[indiroff] = 756* BLK_NOCOPY; 757 else if (passno == 2 && ((ufs1_daddr_t )(ibp->b_data)) 758* [indiroff] == BLK_NOCOPY) 759 ((ufs1_daddr_t )(ibp->b_data))[indiroff] = 0; 760* else if (passno == 1 && ((ufs1_daddr_t )(ibp->b_data)) 761* [indiroff] == BLK_NOCOPY) 762 panic("ffs_snapshot: lost indirect block"); 763 continue; 764 } 765 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 766 ((ufs2_daddr_t )(ibp->b_data))[indiroff] = BLK_NOCOPY; 767* else if (passno == 2 && 768 ((ufs2_daddr_t )(ibp->b_data)) [indiroff] == BLK_NOCOPY) 769* ((ufs2_daddr_t )(ibp->b_data))[indiroff] = 0; 770* else if (passno == 1 && 771 ((ufs2_daddr_t )(ibp->b_data)) [indiroff] == BLK_NOCOPY) 772* panic("ffs_snapshot: lost indirect block"); 773 } 774 bqrelse(bp); 775 if (passno == 2) 776 ibp->b_flags \|= B_VALIDSUSPWRT; 777 bdwrite(ibp); 778 return (0); 779} 780 781/* 782 * Before expunging a snapshot inode, note all the 783 * blocks that it claims with BLK_SNAP so that fsck will 784 * be able to account for those blocks properly and so 785 * that this snapshot knows that it need not copy them 786 * if the other snapshot holding them is freed. This code 787 * is reproduced once each for UFS1 and UFS2. 788 / 789static int 790expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype) 791* struct vnode snapvp; 792* struct inode cancelip; 793* struct fs fs; 794* int (acctfunc)(struct vnode , ufs1_daddr_t , ufs1_daddr_t , 795 struct fs , ufs_lbn_t, int); 796* int expungetype; 797{ 798 int i, error, indiroff; 799 ufs_lbn_t lbn, rlbn; 800 ufs2_daddr_t len, blkno, numblks, blksperindir; 801 struct ufs1_dinode dip; 802* struct thread td = curthread; 803* struct buf bp; 804* 805 /* 806 * Prepare to expunge the inode. If its inode block has not 807 * yet been copied, then allocate and fill the copy. 808 / 809* lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 810 blkno = 0; 811 if (lbn < NDADDR) { 812 blkno = VTOI(snapvp)->i_din1->di_db[lbn]; 813 } else { 814 td->td_proc->p_flag \|= P_COWINPROGRESS; 815 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 816 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 817 td->td_proc->p_flag &= ~P_COWINPROGRESS; 818 if (error) 819 return (error); 820 indiroff = (lbn - NDADDR) % NINDIR(fs); 821 blkno = ((ufs1_daddr_t )(bp->b_data))[indiroff]; 822* bqrelse(bp); 823 } 824 if (blkno != 0) { 825 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 826 return (error); 827 } else { 828 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 829 fs->fs_bsize, KERNCRED, 0, &bp); 830 if (error) 831 return (error); 832 if ((error = readblock(bp, lbn)) != 0) 833 return (error); 834 } 835 /* 836 * Set a snapshot inode to be a zero length file, regular files 837 * to be completely unallocated. 838 / 839* dip = (struct ufs1_dinode )bp->b_data + 840* ino_to_fsbo(fs, cancelip->i_number); 841 if (expungetype == BLK_NOCOPY) 842 dip->di_mode = 0; 843 dip->di_size = 0; 844 dip->di_blocks = 0; 845 dip->di_flags &= ~SF_SNAPSHOT; 846 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t)); 847 bdwrite(bp); 848 /* 849 * Now go through and expunge all the blocks in the file 850 * using the function requested. 851 / 852* numblks = howmany(cancelip->i_size, fs->fs_bsize); 853 if ((error = (acctfunc)(snapvp, &cancelip->i_din1->di_db[0], 854* &cancelip->i_din1->di_db[NDADDR], fs, 0, expungetype))) 855 return (error); 856 if ((error = (acctfunc)(snapvp, &cancelip->i_din1->di_ib[0], 857* &cancelip->i_din1->di_ib[NIADDR], fs, -1, expungetype))) 858 return (error); 859 blksperindir = 1; 860 lbn = -NDADDR; 861 len = numblks - NDADDR; 862 rlbn = NDADDR; 863 for (i = 0; len > 0 && i < NIADDR; i++) { 864 error = indiracct_ufs1(snapvp, ITOV(cancelip), i, 865 cancelip->i_din1->di_ib[i], lbn, rlbn, len, 866 blksperindir, fs, acctfunc, expungetype); 867 if (error) 868 return (error); 869 blksperindir = NINDIR(fs); 870* lbn -= blksperindir + 1; 871 len -= blksperindir; 872 rlbn += blksperindir; 873 } 874 return (0); 875} 876 877/* 878 * Descend an indirect block chain for vnode cancelvp accounting for all 879 * its indirect blocks in snapvp. 880 / 881static int 882indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 883* blksperindir, fs, acctfunc, expungetype) 884 struct vnode snapvp; 885* struct vnode cancelvp; 886* int level; 887 ufs1_daddr_t blkno; 888 ufs_lbn_t lbn; 889 ufs_lbn_t rlbn; 890 ufs_lbn_t remblks; 891 ufs_lbn_t blksperindir; 892 struct fs fs; 893* int (acctfunc)(struct vnode , ufs1_daddr_t , ufs1_daddr_t , 894 struct fs , ufs_lbn_t, int); 895* int expungetype; 896{ 897 int error, num, i; 898 ufs_lbn_t subblksperindir; 899 struct indir indirs[NIADDR + 2]; 900 ufs1_daddr_t last, bap; 901* struct buf bp; 902* 903 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 904 return (error); 905 if (lbn != indirs[num - 1 - level].in_lbn \|\| blkno == 0 \|\| num < 2) 906 panic("indiracct: botched params"); 907 /* 908 * We have to expand bread here since it will deadlock looking 909 * up the block number for any blocks that are not in the cache. 910 / 911* bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 912 bp->b_blkno = fsbtodb(fs, blkno); 913 if ((bp->b_flags & (B_DONE \| B_DELWRI)) == 0 && 914 (error = readblock(bp, fragstoblks(fs, blkno)))) { 915 brelse(bp); 916 return (error); 917 } 918 /* 919 * Account for the block pointers in this indirect block. 920 / 921* last = howmany(remblks, blksperindir); 922 if (last > NINDIR(fs)) 923 last = NINDIR(fs); 924 MALLOC(bap, ufs1_daddr_t , fs->fs_bsize, M_DEVBUF, M_WAITOK); 925* bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 926 bqrelse(bp); 927 error = (acctfunc)(snapvp, &bap[0], &bap[last], fs, 928* level == 0 ? rlbn : -1, expungetype); 929 if (error \|\| level == 0) 930 goto out; 931 /* 932 * Account for the block pointers in each of the indirect blocks 933 * in the levels below us. 934 / 935* subblksperindir = blksperindir / NINDIR(fs); 936 for (lbn++, level--, i = 0; i < last; i++) { 937 error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn, 938 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 939 if (error) 940 goto out; 941 rlbn += blksperindir; 942 lbn -= blksperindir; 943 remblks -= blksperindir; 944 } 945out: 946 FREE(bap, M_DEVBUF); 947 return (error); 948} 949 950/* 951 * Do both snap accounting and map accounting. 952 / 953static int 954fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype) 955* struct vnode vp; 956* ufs1_daddr_t oldblkp, lastblkp; 957 struct fs fs; 958* ufs_lbn_t lblkno; 959 int exptype; /* BLK_SNAP or BLK_NOCOPY / 960{ 961* int error; 962 963 if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 964 return (error); 965 return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 966} 967 968/* 969 * Identify a set of blocks allocated in a snapshot inode. 970 / 971static int 972snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 973* struct vnode vp; 974* ufs1_daddr_t oldblkp, lastblkp; 975 struct fs fs; 976* ufs_lbn_t lblkno; 977 int expungetype; /* BLK_SNAP or BLK_NOCOPY / 978{ 979* struct inode ip = VTOI(vp); 980* ufs1_daddr_t blkno, blkp; 981* ufs_lbn_t lbn; 982 struct buf ibp; 983* int error; 984 985 for ( ; oldblkp < lastblkp; oldblkp++) { 986 blkno = oldblkp; 987* if (blkno == 0 \|\| blkno == BLK_NOCOPY \|\| blkno == BLK_SNAP) 988 continue; 989 lbn = fragstoblks(fs, blkno); 990 if (lbn < NDADDR) { 991 blkp = &ip->i_din1->di_db[lbn]; 992 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 993 } else { 994 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 995 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 996 if (error) 997 return (error); 998 blkp = &((ufs1_daddr_t )(ibp->b_data)) 999* [(lbn - NDADDR) % NINDIR(fs)]; 1000 } 1001 /* 1002 * If we are expunging a snapshot vnode and we 1003 * find a block marked BLK_NOCOPY, then it is 1004 * one that has been allocated to this snapshot after 1005 * we took our current snapshot and can be ignored. 1006 / 1007* if (expungetype == BLK_SNAP && blkp == BLK_NOCOPY) { 1008* if (lbn >= NDADDR) 1009 brelse(ibp); 1010 } else { 1011 if (blkp != 0) 1012* panic("snapacct: bad block"); 1013 blkp = expungetype; 1014* if (lbn >= NDADDR) 1015 bdwrite(ibp); 1016 } 1017 } 1018 return (0); 1019} 1020 1021/* 1022 * Account for a set of blocks allocated in a snapshot inode. 1023 / 1024static int 1025mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1026* struct vnode vp; 1027* ufs1_daddr_t oldblkp, lastblkp; 1028 struct fs fs; 1029* ufs_lbn_t lblkno; 1030 int expungetype; 1031{ 1032 ufs1_daddr_t blkno; 1033 struct inode ip; 1034* ino_t inum; 1035 int acctit; 1036 1037 ip = VTOI(vp); 1038 inum = ip->i_number; 1039 if (lblkno == -1) 1040 acctit = 0; 1041 else 1042 acctit = 1; 1043 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1044 blkno = oldblkp; 1045* if (blkno == 0 \|\| blkno == BLK_NOCOPY) 1046 continue; 1047 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1048 ip->i_snapblklist++ = lblkno; 1049* if (blkno == BLK_SNAP) 1050 blkno = blkstofrags(fs, lblkno); 1051 ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); 1052 } 1053 return (0); 1054} 1055 1056/* 1057 * Before expunging a snapshot inode, note all the 1058 * blocks that it claims with BLK_SNAP so that fsck will 1059 * be able to account for those blocks properly and so 1060 * that this snapshot knows that it need not copy them 1061 * if the other snapshot holding them is freed. This code 1062 * is reproduced once each for UFS1 and UFS2. 1063 / 1064static int 1065expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype) 1066* struct vnode snapvp; 1067* struct inode cancelip; 1068* struct fs fs; 1069* int (acctfunc)(struct vnode , ufs2_daddr_t , ufs2_daddr_t , 1070 struct fs , ufs_lbn_t, int); 1071* int expungetype; 1072{ 1073 int i, error, indiroff; 1074 ufs_lbn_t lbn, rlbn; 1075 ufs2_daddr_t len, blkno, numblks, blksperindir; 1076 struct ufs2_dinode dip; 1077* struct thread td = curthread; 1078* struct buf bp; 1079* 1080 /* 1081 * Prepare to expunge the inode. If its inode block has not 1082 * yet been copied, then allocate and fill the copy. 1083 / 1084* lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 1085 blkno = 0; 1086 if (lbn < NDADDR) { 1087 blkno = VTOI(snapvp)->i_din2->di_db[lbn]; 1088 } else { 1089 td->td_proc->p_flag \|= P_COWINPROGRESS; 1090 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 1091 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 1092 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1093 if (error) 1094 return (error); 1095 indiroff = (lbn - NDADDR) % NINDIR(fs); 1096 blkno = ((ufs2_daddr_t )(bp->b_data))[indiroff]; 1097* bqrelse(bp); 1098 } 1099 if (blkno != 0) { 1100 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 1101 return (error); 1102 } else { 1103 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 1104 fs->fs_bsize, KERNCRED, 0, &bp); 1105 if (error) 1106 return (error); 1107 if ((error = readblock(bp, lbn)) != 0) 1108 return (error); 1109 } 1110 /* 1111 * Set a snapshot inode to be a zero length file, regular files 1112 * to be completely unallocated. 1113 / 1114* dip = (struct ufs2_dinode )bp->b_data + 1115* ino_to_fsbo(fs, cancelip->i_number); 1116 if (expungetype == BLK_NOCOPY) 1117 dip->di_mode = 0; 1118 dip->di_size = 0; 1119 dip->di_blocks = 0; 1120 dip->di_flags &= ~SF_SNAPSHOT; 1121 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t)); 1122 bdwrite(bp); 1123 /* 1124 * Now go through and expunge all the blocks in the file 1125 * using the function requested. 1126 / 1127* numblks = howmany(cancelip->i_size, fs->fs_bsize); 1128 if ((error = (acctfunc)(snapvp, &cancelip->i_din2->di_db[0], 1129* &cancelip->i_din2->di_db[NDADDR], fs, 0, expungetype))) 1130 return (error); 1131 if ((error = (acctfunc)(snapvp, &cancelip->i_din2->di_ib[0], 1132* &cancelip->i_din2->di_ib[NIADDR], fs, -1, expungetype))) 1133 return (error); 1134 blksperindir = 1; 1135 lbn = -NDADDR; 1136 len = numblks - NDADDR; 1137 rlbn = NDADDR; 1138 for (i = 0; len > 0 && i < NIADDR; i++) { 1139 error = indiracct_ufs2(snapvp, ITOV(cancelip), i, 1140 cancelip->i_din2->di_ib[i], lbn, rlbn, len, 1141 blksperindir, fs, acctfunc, expungetype); 1142 if (error) 1143 return (error); 1144 blksperindir = NINDIR(fs); 1145* lbn -= blksperindir + 1; 1146 len -= blksperindir; 1147 rlbn += blksperindir; 1148 } 1149 return (0); 1150} 1151 1152/* 1153 * Descend an indirect block chain for vnode cancelvp accounting for all 1154 * its indirect blocks in snapvp. 1155 / 1156static int 1157indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 1158* blksperindir, fs, acctfunc, expungetype) 1159 struct vnode snapvp; 1160* struct vnode cancelvp; 1161* int level; 1162 ufs2_daddr_t blkno; 1163 ufs_lbn_t lbn; 1164 ufs_lbn_t rlbn; 1165 ufs_lbn_t remblks; 1166 ufs_lbn_t blksperindir; 1167 struct fs fs; 1168* int (acctfunc)(struct vnode , ufs2_daddr_t , ufs2_daddr_t , 1169 struct fs , ufs_lbn_t, int); 1170* int expungetype; 1171{ 1172 int error, num, i; 1173 ufs_lbn_t subblksperindir; 1174 struct indir indirs[NIADDR + 2]; 1175 ufs2_daddr_t last, bap; 1176* struct buf bp; 1177* 1178 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 1179 return (error); 1180 if (lbn != indirs[num - 1 - level].in_lbn \|\| blkno == 0 \|\| num < 2) 1181 panic("indiracct: botched params"); 1182 /* 1183 * We have to expand bread here since it will deadlock looking 1184 * up the block number for any blocks that are not in the cache. 1185 / 1186* bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 1187 bp->b_blkno = fsbtodb(fs, blkno); 1188 if ((bp->b_flags & (B_DONE \| B_DELWRI)) == 0 && 1189 (error = readblock(bp, fragstoblks(fs, blkno)))) { 1190 brelse(bp); 1191 return (error); 1192 } 1193 /* 1194 * Account for the block pointers in this indirect block. 1195 / 1196* last = howmany(remblks, blksperindir); 1197 if (last > NINDIR(fs)) 1198 last = NINDIR(fs); 1199 MALLOC(bap, ufs2_daddr_t , fs->fs_bsize, M_DEVBUF, M_WAITOK); 1200* bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 1201 bqrelse(bp); 1202 error = (acctfunc)(snapvp, &bap[0], &bap[last], fs, 1203* level == 0 ? rlbn : -1, expungetype); 1204 if (error \|\| level == 0) 1205 goto out; 1206 /* 1207 * Account for the block pointers in each of the indirect blocks 1208 * in the levels below us. 1209 / 1210* subblksperindir = blksperindir / NINDIR(fs); 1211 for (lbn++, level--, i = 0; i < last; i++) { 1212 error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn, 1213 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 1214 if (error) 1215 goto out; 1216 rlbn += blksperindir; 1217 lbn -= blksperindir; 1218 remblks -= blksperindir; 1219 } 1220out: 1221 FREE(bap, M_DEVBUF); 1222 return (error); 1223} 1224 1225/* 1226 * Do both snap accounting and map accounting. 1227 / 1228static int 1229fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype) 1230* struct vnode vp; 1231* ufs2_daddr_t oldblkp, lastblkp; 1232 struct fs fs; 1233* ufs_lbn_t lblkno; 1234 int exptype; /* BLK_SNAP or BLK_NOCOPY / 1235{ 1236* int error; 1237 1238 if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 1239 return (error); 1240 return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 1241} 1242 1243/* 1244 * Identify a set of blocks allocated in a snapshot inode. 1245 / 1246static int 1247snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1248* struct vnode vp; 1249* ufs2_daddr_t oldblkp, lastblkp; 1250 struct fs fs; 1251* ufs_lbn_t lblkno; 1252 int expungetype; /* BLK_SNAP or BLK_NOCOPY / 1253{ 1254* struct inode ip = VTOI(vp); 1255* ufs2_daddr_t blkno, blkp; 1256* ufs_lbn_t lbn; 1257 struct buf ibp; 1258* int error; 1259 1260 for ( ; oldblkp < lastblkp; oldblkp++) { 1261 blkno = oldblkp; 1262* if (blkno == 0 \|\| blkno == BLK_NOCOPY \|\| blkno == BLK_SNAP) 1263 continue; 1264 lbn = fragstoblks(fs, blkno); 1265 if (lbn < NDADDR) { 1266 blkp = &ip->i_din2->di_db[lbn]; 1267 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 1268 } else { 1269 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1270 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1271 if (error) 1272 return (error); 1273 blkp = &((ufs2_daddr_t )(ibp->b_data)) 1274* [(lbn - NDADDR) % NINDIR(fs)]; 1275 } 1276 /* 1277 * If we are expunging a snapshot vnode and we 1278 * find a block marked BLK_NOCOPY, then it is 1279 * one that has been allocated to this snapshot after 1280 * we took our current snapshot and can be ignored. 1281 / 1282* if (expungetype == BLK_SNAP && blkp == BLK_NOCOPY) { 1283* if (lbn >= NDADDR) 1284 brelse(ibp); 1285 } else { 1286 if (blkp != 0) 1287* panic("snapacct: bad block"); 1288 blkp = expungetype; 1289* if (lbn >= NDADDR) 1290 bdwrite(ibp); 1291 } 1292 } 1293 return (0); 1294} 1295 1296/* 1297 * Account for a set of blocks allocated in a snapshot inode. 1298 / 1299static int 1300mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1301* struct vnode vp; 1302* ufs2_daddr_t oldblkp, lastblkp; 1303 struct fs fs; 1304* ufs_lbn_t lblkno; 1305 int expungetype; 1306{ 1307 ufs2_daddr_t blkno; 1308 struct inode ip; 1309* ino_t inum; 1310 int acctit; 1311 1312 ip = VTOI(vp); 1313 inum = ip->i_number; 1314 if (lblkno == -1) 1315 acctit = 0; 1316 else 1317 acctit = 1; 1318 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1319 blkno = oldblkp; 1320* if (blkno == 0 \|\| blkno == BLK_NOCOPY) 1321 continue; 1322 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1323 ip->i_snapblklist++ = lblkno; 1324* if (blkno == BLK_SNAP) 1325 blkno = blkstofrags(fs, lblkno); 1326 ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); 1327 } 1328 return (0); 1329} 1330 1331/* 1332 * Decrement extra reference on snapshot when last name is removed. 1333 * It will not be freed until the last open reference goes away. 1334 / 1335void 1336ffs_snapgone(ip) 1337* struct inode ip; 1338{ 1339* struct inode xp; 1340* struct fs fs; 1341* int snaploc; 1342 1343 /* 1344 * Find snapshot in incore list. 1345 / 1346* TAILQ_FOREACH(xp, &ip->i_devvp->v_rdev->si_snapshots, i_nextsnap) 1347 if (xp == ip) 1348 break; 1349 if (xp != NULL) 1350 vrele(ITOV(ip)); 1351 else if (snapdebug) 1352 printf("ffs_snapgone: lost snapshot vnode %d\n", 1353 ip->i_number); 1354 /* 1355 * Delete snapshot inode from superblock. Keep list dense. 1356 / 1357* fs = ip->i_fs; 1358 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 1359 if (fs->fs_snapinum[snaploc] == ip->i_number) 1360 break; 1361 if (snaploc < FSMAXSNAP) { 1362 for (snaploc++; snaploc < FSMAXSNAP; snaploc++) { 1363 if (fs->fs_snapinum[snaploc] == 0) 1364 break; 1365 fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc]; 1366 } 1367 fs->fs_snapinum[snaploc - 1] = 0; 1368 } 1369} 1370 1371/* 1372 * Prepare a snapshot file for being removed. 1373 / 1374void 1375ffs_snapremove(vp) 1376* struct vnode vp; 1377{ 1378* struct inode ip; 1379* struct vnode devvp; 1380* struct lock lkp; 1381* struct buf ibp; 1382* struct fs fs; 1383* struct thread td = curthread; 1384* ufs2_daddr_t numblks, blkno, dblk, snapblklist; 1385* int error, loc, last; 1386 1387 ip = VTOI(vp); 1388 fs = ip->i_fs; 1389 devvp = ip->i_devvp; 1390 /* 1391 * If active, delete from incore list (this snapshot may 1392 * already have been in the process of being deleted, so 1393 * would not have been active). 1394 * 1395 * Clear copy-on-write flag if last snapshot. 1396 / 1397* if (ip->i_nextsnap.tqe_prev != 0) { 1398 VI_LOCK(devvp); 1399 lockmgr(&vp->v_lock, LK_INTERLOCK \| LK_EXCLUSIVE, 1400 VI_MTX(devvp), td); 1401 VI_LOCK(devvp); 1402 TAILQ_REMOVE(&devvp->v_rdev->si_snapshots, ip, i_nextsnap); 1403 ip->i_nextsnap.tqe_prev = 0; 1404 lkp = vp->v_vnlock; 1405 vp->v_vnlock = &vp->v_lock; 1406 lockmgr(lkp, LK_RELEASE, NULL, td); 1407 if (TAILQ_FIRST(&devvp->v_rdev->si_snapshots) != 0) { 1408 VI_UNLOCK(devvp); 1409 } else { 1410 snapblklist = devvp->v_rdev->si_snapblklist; 1411 devvp->v_rdev->si_snapblklist = 0; 1412 devvp->v_rdev->si_snaplistsize = 0; 1413 devvp->v_rdev->si_copyonwrite = 0; 1414 devvp->v_vflag &= ~VV_COPYONWRITE; 1415 lockmgr(lkp, LK_DRAIN\|LK_INTERLOCK, VI_MTX(devvp), td); 1416 lockmgr(lkp, LK_RELEASE, NULL, td); 1417 lockdestroy(lkp); 1418 FREE(lkp, M_UFSMNT); 1419 FREE(snapblklist, M_UFSMNT); 1420 } 1421 } 1422 /* 1423 * Clear all BLK_NOCOPY fields. Pass any block claims to other 1424 * snapshots that want them (see ffs_snapblkfree below). 1425 / 1426* for (blkno = 1; blkno < NDADDR; blkno++) { 1427 dblk = DIP(ip, i_db[blkno]); 1428 if (dblk == BLK_NOCOPY \|\| dblk == BLK_SNAP) 1429 DIP(ip, i_db[blkno]) = 0; 1430 else if ((dblk == blkstofrags(fs, blkno) && 1431 ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, 1432 ip->i_number))) { 1433 DIP(ip, i_blocks) -= btodb(fs->fs_bsize); 1434 DIP(ip, i_db[blkno]) = 0; 1435 } 1436 } 1437 numblks = howmany(ip->i_size, fs->fs_bsize); 1438 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 1439 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 1440 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1441 if (error) 1442 continue; 1443 if (fs->fs_size - blkno > NINDIR(fs)) 1444 last = NINDIR(fs); 1445 else 1446 last = fs->fs_size - blkno; 1447 for (loc = 0; loc < last; loc++) { 1448 if (ip->i_ump->um_fstype == UFS1) { 1449 dblk = ((ufs1_daddr_t )(ibp->b_data))[loc]; 1450* if (dblk == BLK_NOCOPY \|\| dblk == BLK_SNAP) 1451 ((ufs1_daddr_t )(ibp->b_data))[loc]= 0; 1452* else if ((dblk == blkstofrags(fs, blkno) && 1453 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1454 fs->fs_bsize, ip->i_number))) { 1455 ip->i_din1->di_blocks -= 1456 btodb(fs->fs_bsize); 1457 ((ufs1_daddr_t )(ibp->b_data))[loc]= 0; 1458* } 1459 continue; 1460 } 1461 dblk = ((ufs2_daddr_t )(ibp->b_data))[loc]; 1462* if (dblk == BLK_NOCOPY \|\| dblk == BLK_SNAP) 1463 ((ufs2_daddr_t )(ibp->b_data))[loc] = 0; 1464* else if ((dblk == blkstofrags(fs, blkno) && 1465 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1466 fs->fs_bsize, ip->i_number))) { 1467 ip->i_din2->di_blocks -= btodb(fs->fs_bsize); 1468 ((ufs2_daddr_t )(ibp->b_data))[loc] = 0; 1469* } 1470 } 1471 bawrite(ibp); 1472 } 1473 /* 1474 * Clear snapshot flag and drop reference. 1475 / 1476* ip->i_flags &= ~SF_SNAPSHOT; 1477 DIP(ip, i_flags) = ip->i_flags; 1478 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 1479} 1480 1481/* 1482 * Notification that a block is being freed. Return zero if the free 1483 * should be allowed to proceed. Return non-zero if the snapshot file 1484 * wants to claim the block. The block will be claimed if it is an 1485 * uncopied part of one of the snapshots. It will be freed if it is 1486 * either a BLK_NOCOPY or has already been copied in all of the snapshots. 1487 * If a fragment is being freed, then all snapshots that care about 1488 * it must make a copy since a snapshot file can only claim full sized 1489 * blocks. Note that if more than one snapshot file maps the block, 1490 * we can pick one at random to claim it. Since none of the snapshots 1491 * can change, we are assurred that they will all see the same unmodified 1492 * image. When deleting a snapshot file (see ffs_snapremove above), we 1493 * must push any of these claimed blocks to one of the other snapshots 1494 * that maps it. These claimed blocks are easily identified as they will 1495 * have a block number equal to their logical block number within the 1496 * snapshot. A copied block can never have this property because they 1497 * must always have been allocated from a BLK_NOCOPY location. 1498 / 1499int 1500ffs_snapblkfree(fs, devvp, bno, size, inum) 1501* struct fs fs; 1502* struct vnode devvp; 1503* ufs2_daddr_t bno; 1504 long size; 1505 ino_t inum; 1506{ 1507 struct buf ibp, cbp, savedcbp = 0; 1508* struct thread td = curthread; 1509* struct inode ip; 1510* struct vnode vp = NULL; 1511* ufs_lbn_t lbn; 1512 ufs2_daddr_t blkno; 1513 int indiroff = 0, snapshot_locked = 0, error = 0, claimedblk = 0; 1514 struct snaphead snaphead; 1515* 1516 lbn = fragstoblks(fs, bno); 1517retry: 1518 VI_LOCK(devvp); 1519 snaphead = &devvp->v_rdev->si_snapshots; 1520 TAILQ_FOREACH(ip, snaphead, i_nextsnap) { 1521 vp = ITOV(ip); 1522 /* 1523 * Lookup block being written. 1524 / 1525* if (lbn < NDADDR) { 1526 blkno = DIP(ip, i_db[lbn]); 1527 } else { 1528 if (snapshot_locked == 0 && 1529 lockmgr(vp->v_vnlock, 1530 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_SLEEPFAIL, 1531 VI_MTX(devvp), td) != 0) 1532 goto retry; 1533 snapshot_locked = 1; 1534 td->td_proc->p_flag \|= P_COWINPROGRESS; 1535 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1536 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1537 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1538 if (error) 1539 break; 1540 indiroff = (lbn - NDADDR) % NINDIR(fs); 1541 if (ip->i_ump->um_fstype == UFS1) 1542 blkno=((ufs1_daddr_t )(ibp->b_data))[indiroff]; 1543* else 1544 blkno=((ufs2_daddr_t )(ibp->b_data))[indiroff]; 1545* } 1546 /* 1547 * Check to see if block needs to be copied. 1548 / 1549* if (blkno == 0) { 1550 /* 1551 * A block that we map is being freed. If it has not 1552 * been claimed yet, we will claim or copy it (below). 1553 / 1554* claimedblk = 1; 1555 } else if (blkno == BLK_SNAP) { 1556 /* 1557 * No previous snapshot claimed the block, 1558 * so it will be freed and become a BLK_NOCOPY 1559 * (don't care) for us. 1560 / 1561* if (claimedblk) 1562 panic("snapblkfree: inconsistent block type"); 1563 if (snapshot_locked == 0 && 1564 lockmgr(vp->v_vnlock, 1565 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_NOWAIT, 1566 VI_MTX(devvp), td) != 0) { 1567 if (lbn >= NDADDR) 1568 bqrelse(ibp); 1569 vn_lock(vp, LK_EXCLUSIVE \| LK_SLEEPFAIL, td); 1570 goto retry; 1571 } 1572 snapshot_locked = 1; 1573 if (lbn < NDADDR) { 1574 DIP(ip, i_db[lbn]) = BLK_NOCOPY; 1575 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 1576 } else if (ip->i_ump->um_fstype == UFS1) { 1577 ((ufs1_daddr_t )(ibp->b_data))[indiroff] = 1578* BLK_NOCOPY; 1579 bdwrite(ibp); 1580 } else { 1581 ((ufs2_daddr_t )(ibp->b_data))[indiroff] = 1582* BLK_NOCOPY; 1583 bdwrite(ibp); 1584 } 1585 continue; 1586 } else /* BLK_NOCOPY or default / { 1587* /* 1588 * If the snapshot has already copied the block 1589 * (default), or does not care about the block, 1590 * it is not needed. 1591 / 1592* if (lbn >= NDADDR) 1593 bqrelse(ibp); 1594 continue; 1595 } 1596 /* 1597 * If this is a full size block, we will just grab it 1598 * and assign it to the snapshot inode. Otherwise we 1599 * will proceed to copy it. See explanation for this 1600 * routine as to why only a single snapshot needs to 1601 * claim this block. 1602 / 1603* if (snapshot_locked == 0 && 1604 lockmgr(vp->v_vnlock, 1605 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_NOWAIT, 1606 VI_MTX(devvp), td) != 0) { 1607 if (lbn >= NDADDR) 1608 bqrelse(ibp); 1609 vn_lock(vp, LK_EXCLUSIVE \| LK_SLEEPFAIL, td); 1610 goto retry; 1611 } 1612 snapshot_locked = 1; 1613 if (size == fs->fs_bsize) { 1614#ifdef DEBUG 1615 if (snapdebug) 1616 printf("%s %d lbn %jd from inum %d\n", 1617 "Grabonremove: snapino", ip->i_number, 1618 (intmax_t)lbn, inum); 1619#endif 1620 if (lbn < NDADDR) { 1621 DIP(ip, i_db[lbn]) = bno; 1622 } else if (ip->i_ump->um_fstype == UFS1) { 1623 ((ufs1_daddr_t )(ibp->b_data))[indiroff] = bno; 1624* bdwrite(ibp); 1625 } else { 1626 ((ufs2_daddr_t )(ibp->b_data))[indiroff] = bno; 1627* bdwrite(ibp); 1628 } 1629 DIP(ip, i_blocks) += btodb(size); 1630 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 1631 VOP_UNLOCK(vp, 0, td); 1632 return (1); 1633 } 1634 if (lbn >= NDADDR) 1635 bqrelse(ibp); 1636 /* 1637 * Allocate the block into which to do the copy. Note that this 1638 * allocation will never require any additional allocations for 1639 * the snapshot inode. 1640 / 1641* td->td_proc->p_flag \|= P_COWINPROGRESS; 1642 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1643 fs->fs_bsize, KERNCRED, 0, &cbp); 1644 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1645 if (error) 1646 break; 1647#ifdef DEBUG 1648 if (snapdebug) 1649 printf("%s%d lbn %jd %s %d size %ld to blkno %jd\n", 1650 "Copyonremove: snapino ", ip->i_number, 1651 (intmax_t)lbn, "for inum", inum, size, 1652 (intmax_t)cbp->b_blkno); 1653#endif 1654 /* 1655 * If we have already read the old block contents, then 1656 * simply copy them to the new block. Note that we need 1657 * to synchronously write snapshots that have not been 1658 * unlinked, and hence will be visible after a crash, 1659 * to ensure their integrity. 1660 / 1661* if (savedcbp != 0) { 1662 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 1663 bawrite(cbp); 1664 if (dopersistence && ip->i_effnlink > 0) 1665 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1666 continue; 1667 } 1668 /* 1669 * Otherwise, read the old block contents into the buffer. 1670 / 1671* if ((error = readblock(cbp, lbn)) != 0) { 1672 bzero(cbp->b_data, fs->fs_bsize); 1673 bawrite(cbp); 1674 if (dopersistence && ip->i_effnlink > 0) 1675 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1676 break; 1677 } 1678 savedcbp = cbp; 1679 } 1680 /* 1681 * Note that we need to synchronously write snapshots that 1682 * have not been unlinked, and hence will be visible after 1683 * a crash, to ensure their integrity. 1684 / 1685* if (savedcbp) { 1686 vp = savedcbp->b_vp; 1687 bawrite(savedcbp); 1688 if (dopersistence && VTOI(vp)->i_effnlink > 0) 1689 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1690 } 1691 /* 1692 * If we have been unable to allocate a block in which to do 1693 * the copy, then return non-zero so that the fragment will 1694 * not be freed. Although space will be lost, the snapshot 1695 * will stay consistent. 1696 / 1697* if (snapshot_locked) 1698 VOP_UNLOCK(vp, 0, td); 1699 else 1700 VI_UNLOCK(devvp); 1701 return (error); 1702} 1703 1704/* 1705 * Associate snapshot files when mounting. 1706 / 1707void 1708ffs_snapshot_mount(mp) 1709* struct mount mp; 1710{ 1711* struct ufsmount ump = VFSTOUFS(mp); 1712* struct vnode devvp = ump->um_devvp; 1713* struct fs fs = ump->um_fs; 1714* struct thread td = curthread; 1715* struct snaphead snaphead; 1716* struct vnode vp; 1717* struct inode ip, xp; 1718 struct uio auio; 1719 struct iovec aiov; 1720 void snapblklist; 1721* char reason; 1722* daddr_t snaplistsize; 1723 int error, snaploc, loc; 1724 1725 /* 1726 * XXX The following needs to be set before UFS_TRUNCATE or 1727 * VOP_READ can be called. 1728 / 1729* mp->mnt_stat.f_iosize = fs->fs_bsize; 1730 /* 1731 * Process each snapshot listed in the superblock. 1732 / 1733* vp = NULL; 1734 snaphead = &devvp->v_rdev->si_snapshots; 1735 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) { 1736 if (fs->fs_snapinum[snaploc] == 0) 1737 break; 1738 if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc], 1739 LK_EXCLUSIVE, &vp)) != 0){ 1740 printf("ffs_snapshot_mount: vget failed %d\n", error); 1741 continue; 1742 } 1743 ip = VTOI(vp); 1744 if ((ip->i_flags & SF_SNAPSHOT) == 0 \|\| ip->i_size == 1745 lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) { 1746 if ((ip->i_flags & SF_SNAPSHOT) == 0) { 1747 reason = "non-snapshot"; 1748 } else { 1749 reason = "old format snapshot"; 1750 (void)UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED, td); 1751 (void)VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1752 } 1753 printf("ffs_snapshot_mount: %s inode %d\n", 1754 reason, fs->fs_snapinum[snaploc]); 1755 vput(vp); 1756 vp = NULL; 1757 for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) { 1758 if (fs->fs_snapinum[loc] == 0) 1759 break; 1760 fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc]; 1761 } 1762 fs->fs_snapinum[loc - 1] = 0; 1763 snaploc--; 1764 continue; 1765 } 1766 /* 1767 * If there already exist snapshots on this filesystem, grab a 1768 * reference to their shared lock. If this is the first snapshot 1769 * on this filesystem, we need to allocate a lock for the 1770 * snapshots to share. In either case, acquire the snapshot 1771 * lock and give up our original private lock. 1772 / 1773* VI_LOCK(devvp); 1774 if ((xp = TAILQ_FIRST(snaphead)) != NULL) { 1775 VI_LOCK(vp); 1776 vp->v_vnlock = ITOV(xp)->v_vnlock; 1777 VI_UNLOCK(devvp); 1778 } else { 1779 struct lock lkp; 1780* 1781 VI_UNLOCK(devvp); 1782 MALLOC(lkp, struct lock , sizeof(struct lock), 1783* M_UFSMNT, M_WAITOK); 1784 lockinit(lkp, PVFS, "snaplk", VLKTIMEOUT, 1785 LK_CANRECURSE \| LK_NOPAUSE); 1786 VI_LOCK(vp); 1787 vp->v_vnlock = lkp; 1788 } 1789 vn_lock(vp, LK_INTERLOCK \| LK_EXCLUSIVE \| LK_RETRY, td); 1790 transferlockers(&vp->v_lock, vp->v_vnlock); 1791 lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); 1792 /* 1793 * Link it onto the active snapshot list. 1794 / 1795* VI_LOCK(devvp); 1796 if (ip->i_nextsnap.tqe_prev != 0) 1797 panic("ffs_snapshot_mount: %d already on list", 1798 ip->i_number); 1799 else 1800 TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap); 1801 vp->v_vflag \|= VV_SYSTEM; 1802 VI_UNLOCK(devvp); 1803 VOP_UNLOCK(vp, 0, td); 1804 } 1805 /* 1806 * No usable snapshots found. 1807 / 1808* if (vp == NULL) 1809 return; 1810 /* 1811 * Allocate the space for the block hints list. We always want to 1812 * use the list from the newest snapshot. 1813 / 1814* auio.uio_iov = &aiov; 1815 auio.uio_iovcnt = 1; 1816 aiov.iov_base = (void )&snaplistsize; 1817* aiov.iov_len = sizeof(snaplistsize); 1818 auio.uio_resid = aiov.iov_len; 1819 auio.uio_offset = 1820 lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)); 1821 auio.uio_segflg = UIO_SYSSPACE; 1822 auio.uio_rw = UIO_READ; 1823 auio.uio_td = td; 1824 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 1825 if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 1826 printf("ffs_snapshot_mount: read_1 failed %d\n", error); 1827 VOP_UNLOCK(vp, 0, td); 1828 return; 1829 } 1830 MALLOC(snapblklist, void , snaplistsize sizeof(daddr_t), 1831 M_UFSMNT, M_WAITOK); 1832 auio.uio_iovcnt = 1; 1833 aiov.iov_base = snapblklist; 1834 aiov.iov_len = snaplistsize * sizeof (daddr_t); 1835 auio.uio_resid = aiov.iov_len; 1836 auio.uio_offset -= sizeof(snaplistsize); 1837 if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 1838 printf("ffs_snapshot_mount: read_2 failed %d\n", error); 1839 VOP_UNLOCK(vp, 0, td); 1840 FREE(snapblklist, M_UFSMNT); 1841 return; 1842 } 1843 VOP_UNLOCK(vp, 0, td); 1844 VI_LOCK(devvp); 1845 ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount"); 1846 devvp->v_rdev->si_snaplistsize = snaplistsize; 1847 devvp->v_rdev->si_snapblklist = (daddr_t )snapblklist; 1848* devvp->v_rdev->si_copyonwrite = ffs_copyonwrite; 1849 devvp->v_vflag \|= VV_COPYONWRITE; 1850 VI_UNLOCK(devvp); 1851} 1852 1853/* 1854 * Disassociate snapshot files when unmounting. 1855 / 1856void 1857ffs_snapshot_unmount(mp) 1858* struct mount mp; 1859{ 1860* struct vnode devvp = VFSTOUFS(mp)->um_devvp; 1861* struct snaphead snaphead = &devvp->v_rdev->si_snapshots; 1862* struct lock lkp = NULL; 1863* struct inode xp; 1864* struct vnode vp; 1865* 1866 VI_LOCK(devvp); 1867 while ((xp = TAILQ_FIRST(snaphead)) != 0) { 1868 vp = ITOV(xp); 1869 lkp = vp->v_vnlock; 1870 vp->v_vnlock = &vp->v_lock; 1871 TAILQ_REMOVE(snaphead, xp, i_nextsnap); 1872 xp->i_nextsnap.tqe_prev = 0; 1873 if (xp->i_effnlink > 0) { 1874 VI_UNLOCK(devvp); 1875 vrele(vp); 1876 VI_LOCK(devvp); 1877 } 1878 } 1879 if (devvp->v_rdev->si_snapblklist != NULL) { 1880 FREE(devvp->v_rdev->si_snapblklist, M_UFSMNT); 1881 devvp->v_rdev->si_snapblklist = NULL; 1882 devvp->v_rdev->si_snaplistsize = 0; 1883 } 1884 if (lkp != NULL) { 1885 lockdestroy(lkp); 1886 FREE(lkp, M_UFSMNT); 1887 } 1888 ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount"); 1889 devvp->v_rdev->si_copyonwrite = 0; 1890 devvp->v_vflag &= ~VV_COPYONWRITE; 1891 VI_UNLOCK(devvp); 1892} 1893 1894/* 1895 * Check for need to copy block that is about to be written, 1896 * copying the block if necessary. 1897 / 1898static int 1899ffs_copyonwrite(devvp, bp) 1900* struct vnode devvp; 1901* struct buf bp; 1902{ 1903* struct snaphead snaphead; 1904* struct buf ibp, cbp, savedcbp = 0; 1905* struct thread td = curthread; 1906* struct fs fs; 1907* struct inode ip; 1908* struct vnode vp = 0; 1909* ufs2_daddr_t lbn, blkno, snapblklist; 1910* int lower, upper, mid, indiroff, snapshot_locked = 0, error = 0; 1911 1912 if (td->td_proc->p_flag & P_COWINPROGRESS) 1913 panic("ffs_copyonwrite: recursive call"); 1914 /* 1915 * First check to see if it is in the preallocated list. 1916 * By doing this check we avoid several potential deadlocks. 1917 / 1918* VI_LOCK(devvp); 1919 snaphead = &devvp->v_rdev->si_snapshots; 1920 ip = TAILQ_FIRST(snaphead); 1921 fs = ip->i_fs; 1922 lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); 1923 snapblklist = devvp->v_rdev->si_snapblklist; 1924 upper = devvp->v_rdev->si_snaplistsize - 1; 1925 lower = 1; 1926 while (lower <= upper) { 1927 mid = (lower + upper) / 2; 1928 if (snapblklist[mid] == lbn) 1929 break; 1930 if (snapblklist[mid] < lbn) 1931 lower = mid + 1; 1932 else 1933 upper = mid - 1; 1934 } 1935 if (lower <= upper) { 1936 VI_UNLOCK(devvp); 1937 return (0); 1938 } 1939 /* 1940 * Not in the precomputed list, so check the snapshots. 1941 / 1942retry: 1943* TAILQ_FOREACH(ip, snaphead, i_nextsnap) { 1944 vp = ITOV(ip); 1945 /* 1946 * We ensure that everything of our own that needs to be 1947 * copied will be done at the time that ffs_snapshot is 1948 * called. Thus we can skip the check here which can 1949 * deadlock in doing the lookup in UFS_BALLOC. 1950 / 1951* if (bp->b_vp == vp) 1952 continue; 1953 /* 1954 * Check to see if block needs to be copied. We do not have 1955 * to hold the snapshot lock while doing this lookup as it 1956 * will never require any additional allocations for the 1957 * snapshot inode. 1958 / 1959* if (lbn < NDADDR) { 1960 blkno = DIP(ip, i_db[lbn]); 1961 } else { 1962 if (snapshot_locked == 0 && 1963 lockmgr(vp->v_vnlock, 1964 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_SLEEPFAIL, 1965 VI_MTX(devvp), td) != 0) { 1966 VI_LOCK(devvp); 1967 goto retry; 1968 } 1969 snapshot_locked = 1; 1970 td->td_proc->p_flag \|= P_COWINPROGRESS; 1971 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1972 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1973 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1974 if (error) 1975 break; 1976 indiroff = (lbn - NDADDR) % NINDIR(fs); 1977 if (ip->i_ump->um_fstype == UFS1) 1978 blkno=((ufs1_daddr_t )(ibp->b_data))[indiroff]; 1979* else 1980 blkno=((ufs2_daddr_t )(ibp->b_data))[indiroff]; 1981* bqrelse(ibp); 1982 } 1983#ifdef DIAGNOSTIC 1984 if (blkno == BLK_SNAP && bp->b_lblkno >= 0) 1985 panic("ffs_copyonwrite: bad copy block"); 1986#endif 1987 if (blkno != 0) 1988 continue; 1989 /* 1990 * Allocate the block into which to do the copy. Since 1991 * multiple processes may all try to copy the same block, 1992 * we have to recheck our need to do a copy if we sleep 1993 * waiting for the lock. 1994 * 1995 * Because all snapshots on a filesystem share a single 1996 * lock, we ensure that we will never be in competition 1997 * with another process to allocate a block. 1998 / 1999* if (snapshot_locked == 0 && 2000 lockmgr(vp->v_vnlock, 2001 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_SLEEPFAIL, 2002 VI_MTX(devvp), td) != 0) { 2003 VI_LOCK(devvp); 2004 goto retry; 2005 } 2006 snapshot_locked = 1; 2007 td->td_proc->p_flag \|= P_COWINPROGRESS; 2008 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2009 fs->fs_bsize, KERNCRED, 0, &cbp); 2010 td->td_proc->p_flag &= ~P_COWINPROGRESS; 2011 if (error) 2012 break; 2013#ifdef DEBUG 2014 if (snapdebug) { 2015 printf("Copyonwrite: snapino %d lbn %jd for ", 2016 ip->i_number, (intmax_t)lbn); 2017 if (bp->b_vp == devvp) 2018 printf("fs metadata"); 2019 else 2020 printf("inum %d", VTOI(bp->b_vp)->i_number); 2021 printf(" lblkno %jd to blkno %jd\n", 2022 (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno); 2023 } 2024#endif 2025 /* 2026 * If we have already read the old block contents, then 2027 * simply copy them to the new block. Note that we need 2028 * to synchronously write snapshots that have not been 2029 * unlinked, and hence will be visible after a crash, 2030 * to ensure their integrity. 2031 / 2032* if (savedcbp != 0) { 2033 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 2034 bawrite(cbp); 2035 if (dopersistence && ip->i_effnlink > 0) 2036 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 2037 continue; 2038 } 2039 /* 2040 * Otherwise, read the old block contents into the buffer. 2041 / 2042* if ((error = readblock(cbp, lbn)) != 0) { 2043 bzero(cbp->b_data, fs->fs_bsize); 2044 bawrite(cbp); 2045 if (dopersistence && ip->i_effnlink > 0) 2046 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 2047 break; 2048 } 2049 savedcbp = cbp; 2050 } 2051 /* 2052 * Note that we need to synchronously write snapshots that 2053 * have not been unlinked, and hence will be visible after 2054 * a crash, to ensure their integrity. 2055 / 2056* if (savedcbp) { 2057 vp = savedcbp->b_vp; 2058 bawrite(savedcbp); 2059 if (dopersistence && VTOI(vp)->i_effnlink > 0) 2060 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 2061 } 2062 if (snapshot_locked) 2063 VOP_UNLOCK(vp, 0, td); 2064 else 2065 VI_UNLOCK(devvp); 2066 return (error); 2067} 2068 2069/* 2070 * Read the specified block into the given buffer. 2071 * Much of this boiler-plate comes from bwrite(). 2072 / 2073static int 2074readblock(bp, lbn) 2075* struct buf bp; 2076* ufs2_daddr_t lbn; 2077{ 2078 struct uio auio; 2079 struct iovec aiov; 2080 struct thread td = curthread; 2081* struct inode ip = VTOI(bp->b_vp); 2082* 2083 aiov.iov_base = bp->b_data; 2084 aiov.iov_len = bp->b_bcount; 2085 auio.uio_iov = &aiov; 2086 auio.uio_iovcnt = 1; 2087 auio.uio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn))); 2088 auio.uio_resid = bp->b_bcount; 2089 auio.uio_rw = UIO_READ; 2090 auio.uio_segflg = UIO_SYSSPACE; 2091 auio.uio_td = td; 2092 return (physio(ip->i_devvp->v_rdev, &auio, 0)); 2093}	108SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, ""); 109int collectsnapstats = 0; 110SYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats, 111 0, ""); 112#endif /* DEBUG / 113* 114/* 115 * Create a snapshot file and initialize it for the filesystem. 116 / 117int 118ffs_snapshot(mp, snapfile) 119* struct mount mp; 120* char snapfile; 121{ 122* ufs2_daddr_t numblks, blkno, blkp, snapblklist; 123 int error, cg, snaploc; 124 int i, size, len, loc; 125 int flag = mp->mnt_flag; 126 struct timespec starttime = {0, 0}, endtime; 127 char saved_nice = 0; 128 long redo = 0, snaplistsize = 0; 129 int32_t lp; 130* void space; 131* struct fs copy_fs = NULL, fs = VFSTOUFS(mp)->um_fs; 132 struct snaphead snaphead; 133* struct thread td = curthread; 134* struct inode ip, xp; 135 struct buf bp, nbp, ibp, sbp = NULL; 136 struct nameidata nd; 137 struct mount wrtmp; 138* struct vattr vat; 139 struct vnode vp, xvp, nvp, devvp; 140 struct uio auio; 141 struct iovec aiov; 142 143 /* 144 * Need to serialize access to snapshot code per filesystem. 145 / 146* /* 147 * Assign a snapshot slot in the superblock. 148 / 149* for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 150 if (fs->fs_snapinum[snaploc] == 0) 151 break; 152 if (snaploc == FSMAXSNAP) 153 return (ENOSPC); 154 /* 155 * Create the snapshot file. 156 / 157restart: 158* NDINIT(&nd, CREATE, LOCKPARENT \| LOCKLEAF, UIO_USERSPACE, snapfile, td); 159 if ((error = namei(&nd)) != 0) 160 return (error); 161 if (nd.ni_vp != NULL) { 162 vput(nd.ni_vp); 163 error = EEXIST; 164 } 165 if (nd.ni_dvp->v_mount != mp) 166 error = EXDEV; 167 if (error) { 168 NDFREE(&nd, NDF_ONLY_PNBUF); 169 if (nd.ni_dvp == nd.ni_vp) 170 vrele(nd.ni_dvp); 171 else 172 vput(nd.ni_dvp); 173 return (error); 174 } 175 VATTR_NULL(&vat); 176 vat.va_type = VREG; 177 vat.va_mode = S_IRUSR; 178 vat.va_vaflags \|= VA_EXCLUSIVE; 179 if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp)) 180 wrtmp = NULL; 181 if (wrtmp != mp) 182 panic("ffs_snapshot: mount mismatch"); 183 if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) { 184 NDFREE(&nd, NDF_ONLY_PNBUF); 185 vput(nd.ni_dvp); 186 if ((error = vn_start_write(NULL, &wrtmp, 187 V_XSLEEP \| PCATCH)) != 0) 188 return (error); 189 goto restart; 190 } 191 VOP_LEASE(nd.ni_dvp, td, KERNCRED, LEASE_WRITE); 192 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat); 193 vput(nd.ni_dvp); 194 if (error) { 195 NDFREE(&nd, NDF_ONLY_PNBUF); 196 vn_finished_write(wrtmp); 197 return (error); 198 } 199 vp = nd.ni_vp; 200 ip = VTOI(vp); 201 devvp = ip->i_devvp; 202 /* 203 * Allocate and copy the last block contents so as to be able 204 * to set size to that of the filesystem. 205 / 206* numblks = howmany(fs->fs_size, fs->fs_frag); 207 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), 208 fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); 209 if (error) 210 goto out; 211 ip->i_size = lblktosize(fs, (off_t)numblks); 212 DIP(ip, i_size) = ip->i_size; 213 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 214 if ((error = readblock(bp, numblks - 1)) != 0) 215 goto out; 216 bawrite(bp); 217 /* 218 * Preallocate critical data structures so that we can copy 219 * them in without further allocation after we suspend all 220 * operations on the filesystem. We would like to just release 221 * the allocated buffers without writing them since they will 222 * be filled in below once we are ready to go, but this upsets 223 * the soft update code, so we go ahead and write the new buffers. 224 * 225 * Allocate all indirect blocks and mark all of them as not 226 * needing to be copied. 227 / 228* for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 229 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 230 fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp); 231 if (error) 232 goto out; 233 bawrite(ibp); 234 } 235 /* 236 * Allocate copies for the superblock and its summary information. 237 / 238* error = UFS_BALLOC(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED, 239 0, &nbp); 240 if (error) 241 goto out; 242 bawrite(nbp); 243 blkno = fragstoblks(fs, fs->fs_csaddr); 244 len = howmany(fs->fs_cssize, fs->fs_bsize); 245 for (loc = 0; loc < len; loc++) { 246 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)), 247 fs->fs_bsize, KERNCRED, 0, &nbp); 248 if (error) 249 goto out; 250 bawrite(nbp); 251 } 252 /* 253 * Allocate all cylinder group blocks. 254 / 255* for (cg = 0; cg < fs->fs_ncg; cg++) { 256 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 257 fs->fs_bsize, KERNCRED, 0, &nbp); 258 if (error) 259 goto out; 260 bawrite(nbp); 261 } 262 /* 263 * Copy all the cylinder group maps. Although the 264 * filesystem is still active, we hope that only a few 265 * cylinder groups will change between now and when we 266 * suspend operations. Thus, we will be able to quickly 267 * touch up the few cylinder groups that changed during 268 * the suspension period. 269 / 270* len = howmany(fs->fs_ncg, NBBY); 271 MALLOC(fs->fs_active, int , len, M_DEVBUF, M_WAITOK); 272* bzero(fs->fs_active, len); 273 for (cg = 0; cg < fs->fs_ncg; cg++) { 274 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 275 fs->fs_bsize, KERNCRED, 0, &nbp); 276 if (error) 277 goto out; 278 error = cgaccount(cg, vp, nbp, 1); 279 bawrite(nbp); 280 if (error) 281 goto out; 282 } 283 /* 284 * Change inode to snapshot type file. 285 / 286* ip->i_flags \|= SF_SNAPSHOT; 287 DIP(ip, i_flags) = ip->i_flags; 288 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 289 /* 290 * Ensure that the snapshot is completely on disk. 291 * Since we have marked it as a snapshot it is safe to 292 * unlock it as no process will be allowed to write to it. 293 / 294* if ((error = VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td)) != 0) 295 goto out; 296 VOP_UNLOCK(vp, 0, td); 297 /* 298 * All allocations are done, so we can now snapshot the system. 299 * 300 * Recind nice scheduling while running with the filesystem suspended. 301 / 302* if (td->td_ksegrp->kg_nice > 0) { 303 PROC_LOCK(td->td_proc); 304 mtx_lock_spin(&sched_lock); 305 saved_nice = td->td_ksegrp->kg_nice; 306 sched_nice(td->td_ksegrp, 0); 307 mtx_unlock_spin(&sched_lock); 308 PROC_UNLOCK(td->td_proc); 309 } 310 /* 311 * Suspend operation on filesystem. 312 / 313* for (;;) { 314 vn_finished_write(wrtmp); 315 if ((error = vfs_write_suspend(vp->v_mount)) != 0) { 316 vn_start_write(NULL, &wrtmp, V_WAIT); 317 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 318 goto out; 319 } 320 if (mp->mnt_kern_flag & MNTK_SUSPENDED) 321 break; 322 vn_start_write(NULL, &wrtmp, V_WAIT); 323 } 324 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 325 if (collectsnapstats) 326 nanotime(&starttime); 327 /* 328 * First, copy all the cylinder group maps that have changed. 329 / 330* for (cg = 0; cg < fs->fs_ncg; cg++) { 331 if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0) 332 continue; 333 redo++; 334 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), 335 fs->fs_bsize, KERNCRED, 0, &nbp); 336 if (error) 337 goto out1; 338 error = cgaccount(cg, vp, nbp, 2); 339 bawrite(nbp); 340 if (error) 341 goto out1; 342 } 343 /* 344 * Grab a copy of the superblock and its summary information. 345 * We delay writing it until the suspension is released below. 346 / 347* error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize, 348 KERNCRED, &sbp); 349 if (error) { 350 brelse(sbp); 351 sbp = NULL; 352 goto out1; 353 } 354 loc = blkoff(fs, fs->fs_sblockloc); 355 copy_fs = (struct fs )(sbp->b_data + loc); 356* bcopy(fs, copy_fs, fs->fs_sbsize); 357 if ((fs->fs_flags & (FS_UNCLEAN \| FS_NEEDSFSCK)) == 0) 358 copy_fs->fs_clean = 1; 359 size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE; 360 if (fs->fs_sbsize < size) 361 bzero(&sbp->b_data[loc + fs->fs_sbsize], size - fs->fs_sbsize); 362 size = blkroundup(fs, fs->fs_cssize); 363 if (fs->fs_contigsumsize > 0) 364 size += fs->fs_ncg * sizeof(int32_t); 365 space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 366 copy_fs->fs_csp = space; 367 bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize); 368 (char )space += fs->fs_cssize; 369* loc = howmany(fs->fs_cssize, fs->fs_fsize); 370 i = fs->fs_frag - loc % fs->fs_frag; 371 len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize; 372 if (len > 0) { 373 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc), 374 len, KERNCRED, &bp)) != 0) { 375 brelse(bp); 376 free(copy_fs->fs_csp, M_UFSMNT); 377 bawrite(sbp); 378 sbp = NULL; 379 goto out1; 380 } 381 bcopy(bp->b_data, space, (u_int)len); 382 (char )space += len; 383* bp->b_flags \|= B_INVAL \| B_NOCACHE; 384 brelse(bp); 385 } 386 if (fs->fs_contigsumsize > 0) { 387 copy_fs->fs_maxcluster = lp = space; 388 for (i = 0; i < fs->fs_ncg; i++) 389 lp++ = fs->fs_contigsumsize; 390* } 391 /* 392 * We must check for active files that have been unlinked 393 * (e.g., with a zero link count). We have to expunge all 394 * trace of these files from the snapshot so that they are 395 * not reclaimed prematurely by fsck or unnecessarily dumped. 396 * We turn off the MNTK_SUSPENDED flag to avoid a panic from 397 * spec_strategy about writing on a suspended filesystem. 398 * Note that we skip unlinked snapshot files as they will 399 * be handled separately below. 400 * 401 * We also calculate the needed size for the snapshot list. 402 / 403* snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) + 404 FSMAXSNAP + 1 /* superblock / + 1 / last block / + 1 / size /; 405* mp->mnt_kern_flag &= ~MNTK_SUSPENDED; 406 mtx_lock(&mntvnode_mtx); 407loop: 408 for (xvp = TAILQ_FIRST(&mp->mnt_nvnodelist); xvp; xvp = nvp) { 409 /* 410 * Make sure this vnode wasn't reclaimed in getnewvnode(). 411 * Start over if it has (it won't be on the list anymore). 412 / 413* if (xvp->v_mount != mp) 414 goto loop; 415 nvp = TAILQ_NEXT(xvp, v_nmntvnodes); 416 mtx_unlock(&mntvnode_mtx); 417 mp_fixme("Unlocked GETATTR."); 418 if (vrefcnt(xvp) == 0 \|\| xvp->v_type == VNON \|\| 419 (VTOI(xvp)->i_flags & SF_SNAPSHOT) \|\| 420 (VOP_GETATTR(xvp, &vat, td->td_ucred, td) == 0 && 421 vat.va_nlink > 0)) { 422 mtx_lock(&mntvnode_mtx); 423 continue; 424 } 425 if (snapdebug) 426 vprint("ffs_snapshot: busy vnode", xvp); 427 if (vn_lock(xvp, LK_EXCLUSIVE, td) != 0) 428 goto loop; 429 xp = VTOI(xvp); 430 if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) { 431 VOP_UNLOCK(xvp, 0, td); 432 continue; 433 } 434 /* 435 * If there is a fragment, clear it here. 436 / 437* blkno = 0; 438 loc = howmany(xp->i_size, fs->fs_bsize) - 1; 439 if (loc < NDADDR) { 440 len = fragroundup(fs, blkoff(fs, xp->i_size)); 441 if (len < fs->fs_bsize) { 442 ffs_blkfree(copy_fs, vp, DIP(xp, i_db[loc]), 443 len, xp->i_number); 444 blkno = DIP(xp, i_db[loc]); 445 DIP(xp, i_db[loc]) = 0; 446 } 447 } 448 snaplistsize += 1; 449 if (xp->i_ump->um_fstype == UFS1) 450 error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, 451 BLK_NOCOPY); 452 else 453 error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, 454 BLK_NOCOPY); 455 if (blkno) 456 DIP(xp, i_db[loc]) = blkno; 457 if (!error) 458 error = ffs_freefile(copy_fs, vp, xp->i_number, 459 xp->i_mode); 460 VOP_UNLOCK(xvp, 0, td); 461 if (error) { 462 free(copy_fs->fs_csp, M_UFSMNT); 463 bawrite(sbp); 464 sbp = NULL; 465 goto out1; 466 } 467 mtx_lock(&mntvnode_mtx); 468 } 469 mtx_unlock(&mntvnode_mtx); 470 /* 471 * If there already exist snapshots on this filesystem, grab a 472 * reference to their shared lock. If this is the first snapshot 473 * on this filesystem, we need to allocate a lock for the snapshots 474 * to share. In either case, acquire the snapshot lock and give 475 * up our original private lock. 476 / 477* VI_LOCK(devvp); 478 snaphead = &devvp->v_rdev->si_snapshots; 479 if ((xp = TAILQ_FIRST(snaphead)) != NULL) { 480 VI_LOCK(vp); 481 vp->v_vnlock = ITOV(xp)->v_vnlock; 482 VI_UNLOCK(devvp); 483 } else { 484 struct lock lkp; 485* 486 VI_UNLOCK(devvp); 487 MALLOC(lkp, struct lock , sizeof(struct lock), M_UFSMNT, 488* M_WAITOK); 489 lockinit(lkp, PVFS, "snaplk", VLKTIMEOUT, 490 LK_CANRECURSE \| LK_NOPAUSE); 491 VI_LOCK(vp); 492 vp->v_vnlock = lkp; 493 } 494 vn_lock(vp, LK_INTERLOCK \| LK_EXCLUSIVE \| LK_RETRY, td); 495 transferlockers(&vp->v_lock, vp->v_vnlock); 496 lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); 497 /* 498 * If this is the first snapshot on this filesystem, then we need 499 * to allocate the space for the list of preallocated snapshot blocks. 500 * This list will be refined below, but this preliminary one will 501 * keep us out of deadlock until the full one is ready. 502 / 503* if (xp == NULL) { 504 MALLOC(snapblklist, daddr_t , snaplistsize sizeof(daddr_t), 505 M_UFSMNT, M_WAITOK); 506 blkp = &snapblklist[1]; 507 blkp++ = lblkno(fs, fs->fs_sblockloc); 508* blkno = fragstoblks(fs, fs->fs_csaddr); 509 for (cg = 0; cg < fs->fs_ncg; cg++) { 510 if (fragstoblks(fs, cgtod(fs, cg) > blkno)) 511 break; 512 blkp++ = fragstoblks(fs, cgtod(fs, cg)); 513* } 514 len = howmany(fs->fs_cssize, fs->fs_bsize); 515 for (loc = 0; loc < len; loc++) 516 blkp++ = blkno + loc; 517* for (; cg < fs->fs_ncg; cg++) 518 blkp++ = fragstoblks(fs, cgtod(fs, cg)); 519* snapblklist[0] = blkp - snapblklist; 520 VI_LOCK(devvp); 521 if (devvp->v_rdev->si_snapblklist != NULL) 522 panic("ffs_snapshot: non-empty list"); 523 devvp->v_rdev->si_snapblklist = snapblklist; 524 devvp->v_rdev->si_snaplistsize = blkp - snapblklist; 525 VI_UNLOCK(devvp); 526 } 527 /* 528 * Record snapshot inode. Since this is the newest snapshot, 529 * it must be placed at the end of the list. 530 / 531* VI_LOCK(devvp); 532 fs->fs_snapinum[snaploc] = ip->i_number; 533 if (ip->i_nextsnap.tqe_prev != 0) 534 panic("ffs_snapshot: %d already on list", ip->i_number); 535 TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap); 536 devvp->v_rdev->si_copyonwrite = ffs_copyonwrite; 537 devvp->v_vflag \|= VV_COPYONWRITE; 538 VI_UNLOCK(devvp); 539 ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp"); 540 vp->v_vflag \|= VV_SYSTEM; 541out1: 542 /* 543 * Resume operation on filesystem. 544 / 545* vfs_write_resume(vp->v_mount); 546 vn_start_write(NULL, &wrtmp, V_WAIT); 547 if (collectsnapstats && starttime.tv_sec > 0) { 548 nanotime(&endtime); 549 timespecsub(&endtime, &starttime); 550 printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n", 551 vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec, 552 endtime.tv_nsec / 1000000, redo, fs->fs_ncg); 553 } 554 if (sbp == NULL) 555 goto out; 556 /* 557 * Copy allocation information from all the snapshots in 558 * this snapshot and then expunge them from its view. 559 / 560* snaphead = &devvp->v_rdev->si_snapshots; 561 TAILQ_FOREACH(xp, snaphead, i_nextsnap) { 562 if (xp == ip) 563 break; 564 if (xp->i_ump->um_fstype == UFS1) 565 error = expunge_ufs1(vp, xp, fs, snapacct_ufs1, 566 BLK_SNAP); 567 else 568 error = expunge_ufs2(vp, xp, fs, snapacct_ufs2, 569 BLK_SNAP); 570 if (error) { 571 fs->fs_snapinum[snaploc] = 0; 572 goto done; 573 } 574 } 575 /* 576 * Allocate space for the full list of preallocated snapshot blocks. 577 / 578* MALLOC(snapblklist, daddr_t , snaplistsize sizeof(daddr_t), 579 M_UFSMNT, M_WAITOK); 580 ip->i_snapblklist = &snapblklist[1]; 581 /* 582 * Expunge the blocks used by the snapshots from the set of 583 * blocks marked as used in the snapshot bitmaps. Also, collect 584 * the list of allocated blocks in i_snapblklist. 585 / 586* if (ip->i_ump->um_fstype == UFS1) 587 error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP); 588 else 589 error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP); 590 if (error) { 591 fs->fs_snapinum[snaploc] = 0; 592 FREE(snapblklist, M_UFSMNT); 593 goto done; 594 } 595 if (snaplistsize < ip->i_snapblklist - snapblklist) 596 panic("ffs_snapshot: list too small"); 597 snaplistsize = ip->i_snapblklist - snapblklist; 598 snapblklist[0] = snaplistsize; 599 ip->i_snapblklist = 0; 600 /* 601 * Write out the list of allocated blocks to the end of the snapshot. 602 / 603* auio.uio_iov = &aiov; 604 auio.uio_iovcnt = 1; 605 aiov.iov_base = (void )snapblklist; 606* aiov.iov_len = snaplistsize * sizeof(daddr_t); 607 auio.uio_resid = aiov.iov_len;; 608 auio.uio_offset = ip->i_size; 609 auio.uio_segflg = UIO_SYSSPACE; 610 auio.uio_rw = UIO_WRITE; 611 auio.uio_td = td; 612 if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 613 fs->fs_snapinum[snaploc] = 0; 614 FREE(snapblklist, M_UFSMNT); 615 goto done; 616 } 617 /* 618 * Write the superblock and its summary information 619 * to the snapshot. 620 / 621* blkno = fragstoblks(fs, fs->fs_csaddr); 622 len = howmany(fs->fs_cssize, fs->fs_bsize); 623 space = copy_fs->fs_csp; 624 for (loc = 0; loc < len; loc++) { 625 error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp); 626 if (error) { 627 brelse(nbp); 628 fs->fs_snapinum[snaploc] = 0; 629 FREE(snapblklist, M_UFSMNT); 630 goto done; 631 } 632 bcopy(space, nbp->b_data, fs->fs_bsize); 633 space = (char )space + fs->fs_bsize; 634* bawrite(nbp); 635 } 636 /* 637 * As this is the newest list, it is the most inclusive, so 638 * should replace the previous list. 639 / 640* VI_LOCK(devvp); 641 space = devvp->v_rdev->si_snapblklist; 642 devvp->v_rdev->si_snapblklist = snapblklist; 643 devvp->v_rdev->si_snaplistsize = snaplistsize; 644 if (space != NULL) 645 FREE(space, M_UFSMNT); 646 VI_UNLOCK(devvp); 647done: 648 free(copy_fs->fs_csp, M_UFSMNT); 649 bawrite(sbp); 650out: 651 if (saved_nice > 0) { 652 PROC_LOCK(td->td_proc); 653 mtx_lock_spin(&sched_lock); 654 sched_nice(td->td_ksegrp, saved_nice); 655 mtx_unlock_spin(&sched_lock); 656 PROC_UNLOCK(td->td_proc); 657 } 658 if (fs->fs_active != 0) { 659 FREE(fs->fs_active, M_DEVBUF); 660 fs->fs_active = 0; 661 } 662 mp->mnt_flag = flag; 663 if (error) 664 (void) UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED, td); 665 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 666 if (error) 667 vput(vp); 668 else 669 VOP_UNLOCK(vp, 0, td); 670 vn_finished_write(wrtmp); 671 return (error); 672} 673 674/* 675 * Copy a cylinder group map. All the unallocated blocks are marked 676 * BLK_NOCOPY so that the snapshot knows that it need not copy them 677 * if they are later written. If passno is one, then this is a first 678 * pass, so only setting needs to be done. If passno is 2, then this 679 * is a revision to a previous pass which must be undone as the 680 * replacement pass is done. 681 / 682static int 683cgaccount(cg, vp, nbp, passno) 684* int cg; 685 struct vnode vp; 686* struct buf nbp; 687* int passno; 688{ 689 struct buf bp, ibp; 690 struct inode ip; 691* struct cg cgp; 692* struct fs fs; 693* ufs2_daddr_t base, numblks; 694 int error, len, loc, indiroff; 695 696 ip = VTOI(vp); 697 fs = ip->i_fs; 698 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 699 (int)fs->fs_cgsize, KERNCRED, &bp); 700 if (error) { 701 brelse(bp); 702 return (error); 703 } 704 cgp = (struct cg )bp->b_data; 705* if (!cg_chkmagic(cgp)) { 706 brelse(bp); 707 return (EIO); 708 } 709 atomic_set_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 710 bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize); 711 if (fs->fs_cgsize < fs->fs_bsize) 712 bzero(&nbp->b_data[fs->fs_cgsize], 713 fs->fs_bsize - fs->fs_cgsize); 714 if (passno == 2) 715 nbp->b_flags \|= B_VALIDSUSPWRT; 716 numblks = howmany(fs->fs_size, fs->fs_frag); 717 len = howmany(fs->fs_fpg, fs->fs_frag); 718 base = cg * fs->fs_fpg / fs->fs_frag; 719 if (base + len >= numblks) 720 len = numblks - base - 1; 721 loc = 0; 722 if (base < NDADDR) { 723 for ( ; loc < NDADDR; loc++) { 724 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 725 DIP(ip, i_db[loc]) = BLK_NOCOPY; 726 else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY) 727 DIP(ip, i_db[loc]) = 0; 728 else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY) 729 panic("ffs_snapshot: lost direct block"); 730 } 731 } 732 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), 733 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 734 if (error) { 735 brelse(bp); 736 return (error); 737 } 738 indiroff = (base + loc - NDADDR) % NINDIR(fs); 739 for ( ; loc < len; loc++, indiroff++) { 740 if (indiroff >= NINDIR(fs)) { 741 if (passno == 2) 742 ibp->b_flags \|= B_VALIDSUSPWRT; 743 bawrite(ibp); 744 error = UFS_BALLOC(vp, 745 lblktosize(fs, (off_t)(base + loc)), 746 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 747 if (error) { 748 brelse(bp); 749 return (error); 750 } 751 indiroff = 0; 752 } 753 if (ip->i_ump->um_fstype == UFS1) { 754 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 755 ((ufs1_daddr_t )(ibp->b_data))[indiroff] = 756* BLK_NOCOPY; 757 else if (passno == 2 && ((ufs1_daddr_t )(ibp->b_data)) 758* [indiroff] == BLK_NOCOPY) 759 ((ufs1_daddr_t )(ibp->b_data))[indiroff] = 0; 760* else if (passno == 1 && ((ufs1_daddr_t )(ibp->b_data)) 761* [indiroff] == BLK_NOCOPY) 762 panic("ffs_snapshot: lost indirect block"); 763 continue; 764 } 765 if (ffs_isblock(fs, cg_blksfree(cgp), loc)) 766 ((ufs2_daddr_t )(ibp->b_data))[indiroff] = BLK_NOCOPY; 767* else if (passno == 2 && 768 ((ufs2_daddr_t )(ibp->b_data)) [indiroff] == BLK_NOCOPY) 769* ((ufs2_daddr_t )(ibp->b_data))[indiroff] = 0; 770* else if (passno == 1 && 771 ((ufs2_daddr_t )(ibp->b_data)) [indiroff] == BLK_NOCOPY) 772* panic("ffs_snapshot: lost indirect block"); 773 } 774 bqrelse(bp); 775 if (passno == 2) 776 ibp->b_flags \|= B_VALIDSUSPWRT; 777 bdwrite(ibp); 778 return (0); 779} 780 781/* 782 * Before expunging a snapshot inode, note all the 783 * blocks that it claims with BLK_SNAP so that fsck will 784 * be able to account for those blocks properly and so 785 * that this snapshot knows that it need not copy them 786 * if the other snapshot holding them is freed. This code 787 * is reproduced once each for UFS1 and UFS2. 788 / 789static int 790expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype) 791* struct vnode snapvp; 792* struct inode cancelip; 793* struct fs fs; 794* int (acctfunc)(struct vnode , ufs1_daddr_t , ufs1_daddr_t , 795 struct fs , ufs_lbn_t, int); 796* int expungetype; 797{ 798 int i, error, indiroff; 799 ufs_lbn_t lbn, rlbn; 800 ufs2_daddr_t len, blkno, numblks, blksperindir; 801 struct ufs1_dinode dip; 802* struct thread td = curthread; 803* struct buf bp; 804* 805 /* 806 * Prepare to expunge the inode. If its inode block has not 807 * yet been copied, then allocate and fill the copy. 808 / 809* lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 810 blkno = 0; 811 if (lbn < NDADDR) { 812 blkno = VTOI(snapvp)->i_din1->di_db[lbn]; 813 } else { 814 td->td_proc->p_flag \|= P_COWINPROGRESS; 815 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 816 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 817 td->td_proc->p_flag &= ~P_COWINPROGRESS; 818 if (error) 819 return (error); 820 indiroff = (lbn - NDADDR) % NINDIR(fs); 821 blkno = ((ufs1_daddr_t )(bp->b_data))[indiroff]; 822* bqrelse(bp); 823 } 824 if (blkno != 0) { 825 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 826 return (error); 827 } else { 828 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 829 fs->fs_bsize, KERNCRED, 0, &bp); 830 if (error) 831 return (error); 832 if ((error = readblock(bp, lbn)) != 0) 833 return (error); 834 } 835 /* 836 * Set a snapshot inode to be a zero length file, regular files 837 * to be completely unallocated. 838 / 839* dip = (struct ufs1_dinode )bp->b_data + 840* ino_to_fsbo(fs, cancelip->i_number); 841 if (expungetype == BLK_NOCOPY) 842 dip->di_mode = 0; 843 dip->di_size = 0; 844 dip->di_blocks = 0; 845 dip->di_flags &= ~SF_SNAPSHOT; 846 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t)); 847 bdwrite(bp); 848 /* 849 * Now go through and expunge all the blocks in the file 850 * using the function requested. 851 / 852* numblks = howmany(cancelip->i_size, fs->fs_bsize); 853 if ((error = (acctfunc)(snapvp, &cancelip->i_din1->di_db[0], 854* &cancelip->i_din1->di_db[NDADDR], fs, 0, expungetype))) 855 return (error); 856 if ((error = (acctfunc)(snapvp, &cancelip->i_din1->di_ib[0], 857* &cancelip->i_din1->di_ib[NIADDR], fs, -1, expungetype))) 858 return (error); 859 blksperindir = 1; 860 lbn = -NDADDR; 861 len = numblks - NDADDR; 862 rlbn = NDADDR; 863 for (i = 0; len > 0 && i < NIADDR; i++) { 864 error = indiracct_ufs1(snapvp, ITOV(cancelip), i, 865 cancelip->i_din1->di_ib[i], lbn, rlbn, len, 866 blksperindir, fs, acctfunc, expungetype); 867 if (error) 868 return (error); 869 blksperindir = NINDIR(fs); 870* lbn -= blksperindir + 1; 871 len -= blksperindir; 872 rlbn += blksperindir; 873 } 874 return (0); 875} 876 877/* 878 * Descend an indirect block chain for vnode cancelvp accounting for all 879 * its indirect blocks in snapvp. 880 / 881static int 882indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 883* blksperindir, fs, acctfunc, expungetype) 884 struct vnode snapvp; 885* struct vnode cancelvp; 886* int level; 887 ufs1_daddr_t blkno; 888 ufs_lbn_t lbn; 889 ufs_lbn_t rlbn; 890 ufs_lbn_t remblks; 891 ufs_lbn_t blksperindir; 892 struct fs fs; 893* int (acctfunc)(struct vnode , ufs1_daddr_t , ufs1_daddr_t , 894 struct fs , ufs_lbn_t, int); 895* int expungetype; 896{ 897 int error, num, i; 898 ufs_lbn_t subblksperindir; 899 struct indir indirs[NIADDR + 2]; 900 ufs1_daddr_t last, bap; 901* struct buf bp; 902* 903 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 904 return (error); 905 if (lbn != indirs[num - 1 - level].in_lbn \|\| blkno == 0 \|\| num < 2) 906 panic("indiracct: botched params"); 907 /* 908 * We have to expand bread here since it will deadlock looking 909 * up the block number for any blocks that are not in the cache. 910 / 911* bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 912 bp->b_blkno = fsbtodb(fs, blkno); 913 if ((bp->b_flags & (B_DONE \| B_DELWRI)) == 0 && 914 (error = readblock(bp, fragstoblks(fs, blkno)))) { 915 brelse(bp); 916 return (error); 917 } 918 /* 919 * Account for the block pointers in this indirect block. 920 / 921* last = howmany(remblks, blksperindir); 922 if (last > NINDIR(fs)) 923 last = NINDIR(fs); 924 MALLOC(bap, ufs1_daddr_t , fs->fs_bsize, M_DEVBUF, M_WAITOK); 925* bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 926 bqrelse(bp); 927 error = (acctfunc)(snapvp, &bap[0], &bap[last], fs, 928* level == 0 ? rlbn : -1, expungetype); 929 if (error \|\| level == 0) 930 goto out; 931 /* 932 * Account for the block pointers in each of the indirect blocks 933 * in the levels below us. 934 / 935* subblksperindir = blksperindir / NINDIR(fs); 936 for (lbn++, level--, i = 0; i < last; i++) { 937 error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn, 938 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 939 if (error) 940 goto out; 941 rlbn += blksperindir; 942 lbn -= blksperindir; 943 remblks -= blksperindir; 944 } 945out: 946 FREE(bap, M_DEVBUF); 947 return (error); 948} 949 950/* 951 * Do both snap accounting and map accounting. 952 / 953static int 954fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype) 955* struct vnode vp; 956* ufs1_daddr_t oldblkp, lastblkp; 957 struct fs fs; 958* ufs_lbn_t lblkno; 959 int exptype; /* BLK_SNAP or BLK_NOCOPY / 960{ 961* int error; 962 963 if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 964 return (error); 965 return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 966} 967 968/* 969 * Identify a set of blocks allocated in a snapshot inode. 970 / 971static int 972snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 973* struct vnode vp; 974* ufs1_daddr_t oldblkp, lastblkp; 975 struct fs fs; 976* ufs_lbn_t lblkno; 977 int expungetype; /* BLK_SNAP or BLK_NOCOPY / 978{ 979* struct inode ip = VTOI(vp); 980* ufs1_daddr_t blkno, blkp; 981* ufs_lbn_t lbn; 982 struct buf ibp; 983* int error; 984 985 for ( ; oldblkp < lastblkp; oldblkp++) { 986 blkno = oldblkp; 987* if (blkno == 0 \|\| blkno == BLK_NOCOPY \|\| blkno == BLK_SNAP) 988 continue; 989 lbn = fragstoblks(fs, blkno); 990 if (lbn < NDADDR) { 991 blkp = &ip->i_din1->di_db[lbn]; 992 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 993 } else { 994 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 995 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 996 if (error) 997 return (error); 998 blkp = &((ufs1_daddr_t )(ibp->b_data)) 999* [(lbn - NDADDR) % NINDIR(fs)]; 1000 } 1001 /* 1002 * If we are expunging a snapshot vnode and we 1003 * find a block marked BLK_NOCOPY, then it is 1004 * one that has been allocated to this snapshot after 1005 * we took our current snapshot and can be ignored. 1006 / 1007* if (expungetype == BLK_SNAP && blkp == BLK_NOCOPY) { 1008* if (lbn >= NDADDR) 1009 brelse(ibp); 1010 } else { 1011 if (blkp != 0) 1012* panic("snapacct: bad block"); 1013 blkp = expungetype; 1014* if (lbn >= NDADDR) 1015 bdwrite(ibp); 1016 } 1017 } 1018 return (0); 1019} 1020 1021/* 1022 * Account for a set of blocks allocated in a snapshot inode. 1023 / 1024static int 1025mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1026* struct vnode vp; 1027* ufs1_daddr_t oldblkp, lastblkp; 1028 struct fs fs; 1029* ufs_lbn_t lblkno; 1030 int expungetype; 1031{ 1032 ufs1_daddr_t blkno; 1033 struct inode ip; 1034* ino_t inum; 1035 int acctit; 1036 1037 ip = VTOI(vp); 1038 inum = ip->i_number; 1039 if (lblkno == -1) 1040 acctit = 0; 1041 else 1042 acctit = 1; 1043 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1044 blkno = oldblkp; 1045* if (blkno == 0 \|\| blkno == BLK_NOCOPY) 1046 continue; 1047 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1048 ip->i_snapblklist++ = lblkno; 1049* if (blkno == BLK_SNAP) 1050 blkno = blkstofrags(fs, lblkno); 1051 ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); 1052 } 1053 return (0); 1054} 1055 1056/* 1057 * Before expunging a snapshot inode, note all the 1058 * blocks that it claims with BLK_SNAP so that fsck will 1059 * be able to account for those blocks properly and so 1060 * that this snapshot knows that it need not copy them 1061 * if the other snapshot holding them is freed. This code 1062 * is reproduced once each for UFS1 and UFS2. 1063 / 1064static int 1065expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype) 1066* struct vnode snapvp; 1067* struct inode cancelip; 1068* struct fs fs; 1069* int (acctfunc)(struct vnode , ufs2_daddr_t , ufs2_daddr_t , 1070 struct fs , ufs_lbn_t, int); 1071* int expungetype; 1072{ 1073 int i, error, indiroff; 1074 ufs_lbn_t lbn, rlbn; 1075 ufs2_daddr_t len, blkno, numblks, blksperindir; 1076 struct ufs2_dinode dip; 1077* struct thread td = curthread; 1078* struct buf bp; 1079* 1080 /* 1081 * Prepare to expunge the inode. If its inode block has not 1082 * yet been copied, then allocate and fill the copy. 1083 / 1084* lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); 1085 blkno = 0; 1086 if (lbn < NDADDR) { 1087 blkno = VTOI(snapvp)->i_din2->di_db[lbn]; 1088 } else { 1089 td->td_proc->p_flag \|= P_COWINPROGRESS; 1090 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 1091 fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); 1092 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1093 if (error) 1094 return (error); 1095 indiroff = (lbn - NDADDR) % NINDIR(fs); 1096 blkno = ((ufs2_daddr_t )(bp->b_data))[indiroff]; 1097* bqrelse(bp); 1098 } 1099 if (blkno != 0) { 1100 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) 1101 return (error); 1102 } else { 1103 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), 1104 fs->fs_bsize, KERNCRED, 0, &bp); 1105 if (error) 1106 return (error); 1107 if ((error = readblock(bp, lbn)) != 0) 1108 return (error); 1109 } 1110 /* 1111 * Set a snapshot inode to be a zero length file, regular files 1112 * to be completely unallocated. 1113 / 1114* dip = (struct ufs2_dinode )bp->b_data + 1115* ino_to_fsbo(fs, cancelip->i_number); 1116 if (expungetype == BLK_NOCOPY) 1117 dip->di_mode = 0; 1118 dip->di_size = 0; 1119 dip->di_blocks = 0; 1120 dip->di_flags &= ~SF_SNAPSHOT; 1121 bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t)); 1122 bdwrite(bp); 1123 /* 1124 * Now go through and expunge all the blocks in the file 1125 * using the function requested. 1126 / 1127* numblks = howmany(cancelip->i_size, fs->fs_bsize); 1128 if ((error = (acctfunc)(snapvp, &cancelip->i_din2->di_db[0], 1129* &cancelip->i_din2->di_db[NDADDR], fs, 0, expungetype))) 1130 return (error); 1131 if ((error = (acctfunc)(snapvp, &cancelip->i_din2->di_ib[0], 1132* &cancelip->i_din2->di_ib[NIADDR], fs, -1, expungetype))) 1133 return (error); 1134 blksperindir = 1; 1135 lbn = -NDADDR; 1136 len = numblks - NDADDR; 1137 rlbn = NDADDR; 1138 for (i = 0; len > 0 && i < NIADDR; i++) { 1139 error = indiracct_ufs2(snapvp, ITOV(cancelip), i, 1140 cancelip->i_din2->di_ib[i], lbn, rlbn, len, 1141 blksperindir, fs, acctfunc, expungetype); 1142 if (error) 1143 return (error); 1144 blksperindir = NINDIR(fs); 1145* lbn -= blksperindir + 1; 1146 len -= blksperindir; 1147 rlbn += blksperindir; 1148 } 1149 return (0); 1150} 1151 1152/* 1153 * Descend an indirect block chain for vnode cancelvp accounting for all 1154 * its indirect blocks in snapvp. 1155 / 1156static int 1157indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, 1158* blksperindir, fs, acctfunc, expungetype) 1159 struct vnode snapvp; 1160* struct vnode cancelvp; 1161* int level; 1162 ufs2_daddr_t blkno; 1163 ufs_lbn_t lbn; 1164 ufs_lbn_t rlbn; 1165 ufs_lbn_t remblks; 1166 ufs_lbn_t blksperindir; 1167 struct fs fs; 1168* int (acctfunc)(struct vnode , ufs2_daddr_t , ufs2_daddr_t , 1169 struct fs , ufs_lbn_t, int); 1170* int expungetype; 1171{ 1172 int error, num, i; 1173 ufs_lbn_t subblksperindir; 1174 struct indir indirs[NIADDR + 2]; 1175 ufs2_daddr_t last, bap; 1176* struct buf bp; 1177* 1178 if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) 1179 return (error); 1180 if (lbn != indirs[num - 1 - level].in_lbn \|\| blkno == 0 \|\| num < 2) 1181 panic("indiracct: botched params"); 1182 /* 1183 * We have to expand bread here since it will deadlock looking 1184 * up the block number for any blocks that are not in the cache. 1185 / 1186* bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); 1187 bp->b_blkno = fsbtodb(fs, blkno); 1188 if ((bp->b_flags & (B_DONE \| B_DELWRI)) == 0 && 1189 (error = readblock(bp, fragstoblks(fs, blkno)))) { 1190 brelse(bp); 1191 return (error); 1192 } 1193 /* 1194 * Account for the block pointers in this indirect block. 1195 / 1196* last = howmany(remblks, blksperindir); 1197 if (last > NINDIR(fs)) 1198 last = NINDIR(fs); 1199 MALLOC(bap, ufs2_daddr_t , fs->fs_bsize, M_DEVBUF, M_WAITOK); 1200* bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); 1201 bqrelse(bp); 1202 error = (acctfunc)(snapvp, &bap[0], &bap[last], fs, 1203* level == 0 ? rlbn : -1, expungetype); 1204 if (error \|\| level == 0) 1205 goto out; 1206 /* 1207 * Account for the block pointers in each of the indirect blocks 1208 * in the levels below us. 1209 / 1210* subblksperindir = blksperindir / NINDIR(fs); 1211 for (lbn++, level--, i = 0; i < last; i++) { 1212 error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn, 1213 rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); 1214 if (error) 1215 goto out; 1216 rlbn += blksperindir; 1217 lbn -= blksperindir; 1218 remblks -= blksperindir; 1219 } 1220out: 1221 FREE(bap, M_DEVBUF); 1222 return (error); 1223} 1224 1225/* 1226 * Do both snap accounting and map accounting. 1227 / 1228static int 1229fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype) 1230* struct vnode vp; 1231* ufs2_daddr_t oldblkp, lastblkp; 1232 struct fs fs; 1233* ufs_lbn_t lblkno; 1234 int exptype; /* BLK_SNAP or BLK_NOCOPY / 1235{ 1236* int error; 1237 1238 if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype))) 1239 return (error); 1240 return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)); 1241} 1242 1243/* 1244 * Identify a set of blocks allocated in a snapshot inode. 1245 / 1246static int 1247snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1248* struct vnode vp; 1249* ufs2_daddr_t oldblkp, lastblkp; 1250 struct fs fs; 1251* ufs_lbn_t lblkno; 1252 int expungetype; /* BLK_SNAP or BLK_NOCOPY / 1253{ 1254* struct inode ip = VTOI(vp); 1255* ufs2_daddr_t blkno, blkp; 1256* ufs_lbn_t lbn; 1257 struct buf ibp; 1258* int error; 1259 1260 for ( ; oldblkp < lastblkp; oldblkp++) { 1261 blkno = oldblkp; 1262* if (blkno == 0 \|\| blkno == BLK_NOCOPY \|\| blkno == BLK_SNAP) 1263 continue; 1264 lbn = fragstoblks(fs, blkno); 1265 if (lbn < NDADDR) { 1266 blkp = &ip->i_din2->di_db[lbn]; 1267 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 1268 } else { 1269 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1270 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1271 if (error) 1272 return (error); 1273 blkp = &((ufs2_daddr_t )(ibp->b_data)) 1274* [(lbn - NDADDR) % NINDIR(fs)]; 1275 } 1276 /* 1277 * If we are expunging a snapshot vnode and we 1278 * find a block marked BLK_NOCOPY, then it is 1279 * one that has been allocated to this snapshot after 1280 * we took our current snapshot and can be ignored. 1281 / 1282* if (expungetype == BLK_SNAP && blkp == BLK_NOCOPY) { 1283* if (lbn >= NDADDR) 1284 brelse(ibp); 1285 } else { 1286 if (blkp != 0) 1287* panic("snapacct: bad block"); 1288 blkp = expungetype; 1289* if (lbn >= NDADDR) 1290 bdwrite(ibp); 1291 } 1292 } 1293 return (0); 1294} 1295 1296/* 1297 * Account for a set of blocks allocated in a snapshot inode. 1298 / 1299static int 1300mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) 1301* struct vnode vp; 1302* ufs2_daddr_t oldblkp, lastblkp; 1303 struct fs fs; 1304* ufs_lbn_t lblkno; 1305 int expungetype; 1306{ 1307 ufs2_daddr_t blkno; 1308 struct inode ip; 1309* ino_t inum; 1310 int acctit; 1311 1312 ip = VTOI(vp); 1313 inum = ip->i_number; 1314 if (lblkno == -1) 1315 acctit = 0; 1316 else 1317 acctit = 1; 1318 for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { 1319 blkno = oldblkp; 1320* if (blkno == 0 \|\| blkno == BLK_NOCOPY) 1321 continue; 1322 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) 1323 ip->i_snapblklist++ = lblkno; 1324* if (blkno == BLK_SNAP) 1325 blkno = blkstofrags(fs, lblkno); 1326 ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); 1327 } 1328 return (0); 1329} 1330 1331/* 1332 * Decrement extra reference on snapshot when last name is removed. 1333 * It will not be freed until the last open reference goes away. 1334 / 1335void 1336ffs_snapgone(ip) 1337* struct inode ip; 1338{ 1339* struct inode xp; 1340* struct fs fs; 1341* int snaploc; 1342 1343 /* 1344 * Find snapshot in incore list. 1345 / 1346* TAILQ_FOREACH(xp, &ip->i_devvp->v_rdev->si_snapshots, i_nextsnap) 1347 if (xp == ip) 1348 break; 1349 if (xp != NULL) 1350 vrele(ITOV(ip)); 1351 else if (snapdebug) 1352 printf("ffs_snapgone: lost snapshot vnode %d\n", 1353 ip->i_number); 1354 /* 1355 * Delete snapshot inode from superblock. Keep list dense. 1356 / 1357* fs = ip->i_fs; 1358 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) 1359 if (fs->fs_snapinum[snaploc] == ip->i_number) 1360 break; 1361 if (snaploc < FSMAXSNAP) { 1362 for (snaploc++; snaploc < FSMAXSNAP; snaploc++) { 1363 if (fs->fs_snapinum[snaploc] == 0) 1364 break; 1365 fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc]; 1366 } 1367 fs->fs_snapinum[snaploc - 1] = 0; 1368 } 1369} 1370 1371/* 1372 * Prepare a snapshot file for being removed. 1373 / 1374void 1375ffs_snapremove(vp) 1376* struct vnode vp; 1377{ 1378* struct inode ip; 1379* struct vnode devvp; 1380* struct lock lkp; 1381* struct buf ibp; 1382* struct fs fs; 1383* struct thread td = curthread; 1384* ufs2_daddr_t numblks, blkno, dblk, snapblklist; 1385* int error, loc, last; 1386 1387 ip = VTOI(vp); 1388 fs = ip->i_fs; 1389 devvp = ip->i_devvp; 1390 /* 1391 * If active, delete from incore list (this snapshot may 1392 * already have been in the process of being deleted, so 1393 * would not have been active). 1394 * 1395 * Clear copy-on-write flag if last snapshot. 1396 / 1397* if (ip->i_nextsnap.tqe_prev != 0) { 1398 VI_LOCK(devvp); 1399 lockmgr(&vp->v_lock, LK_INTERLOCK \| LK_EXCLUSIVE, 1400 VI_MTX(devvp), td); 1401 VI_LOCK(devvp); 1402 TAILQ_REMOVE(&devvp->v_rdev->si_snapshots, ip, i_nextsnap); 1403 ip->i_nextsnap.tqe_prev = 0; 1404 lkp = vp->v_vnlock; 1405 vp->v_vnlock = &vp->v_lock; 1406 lockmgr(lkp, LK_RELEASE, NULL, td); 1407 if (TAILQ_FIRST(&devvp->v_rdev->si_snapshots) != 0) { 1408 VI_UNLOCK(devvp); 1409 } else { 1410 snapblklist = devvp->v_rdev->si_snapblklist; 1411 devvp->v_rdev->si_snapblklist = 0; 1412 devvp->v_rdev->si_snaplistsize = 0; 1413 devvp->v_rdev->si_copyonwrite = 0; 1414 devvp->v_vflag &= ~VV_COPYONWRITE; 1415 lockmgr(lkp, LK_DRAIN\|LK_INTERLOCK, VI_MTX(devvp), td); 1416 lockmgr(lkp, LK_RELEASE, NULL, td); 1417 lockdestroy(lkp); 1418 FREE(lkp, M_UFSMNT); 1419 FREE(snapblklist, M_UFSMNT); 1420 } 1421 } 1422 /* 1423 * Clear all BLK_NOCOPY fields. Pass any block claims to other 1424 * snapshots that want them (see ffs_snapblkfree below). 1425 / 1426* for (blkno = 1; blkno < NDADDR; blkno++) { 1427 dblk = DIP(ip, i_db[blkno]); 1428 if (dblk == BLK_NOCOPY \|\| dblk == BLK_SNAP) 1429 DIP(ip, i_db[blkno]) = 0; 1430 else if ((dblk == blkstofrags(fs, blkno) && 1431 ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, 1432 ip->i_number))) { 1433 DIP(ip, i_blocks) -= btodb(fs->fs_bsize); 1434 DIP(ip, i_db[blkno]) = 0; 1435 } 1436 } 1437 numblks = howmany(ip->i_size, fs->fs_bsize); 1438 for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { 1439 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), 1440 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1441 if (error) 1442 continue; 1443 if (fs->fs_size - blkno > NINDIR(fs)) 1444 last = NINDIR(fs); 1445 else 1446 last = fs->fs_size - blkno; 1447 for (loc = 0; loc < last; loc++) { 1448 if (ip->i_ump->um_fstype == UFS1) { 1449 dblk = ((ufs1_daddr_t )(ibp->b_data))[loc]; 1450* if (dblk == BLK_NOCOPY \|\| dblk == BLK_SNAP) 1451 ((ufs1_daddr_t )(ibp->b_data))[loc]= 0; 1452* else if ((dblk == blkstofrags(fs, blkno) && 1453 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1454 fs->fs_bsize, ip->i_number))) { 1455 ip->i_din1->di_blocks -= 1456 btodb(fs->fs_bsize); 1457 ((ufs1_daddr_t )(ibp->b_data))[loc]= 0; 1458* } 1459 continue; 1460 } 1461 dblk = ((ufs2_daddr_t )(ibp->b_data))[loc]; 1462* if (dblk == BLK_NOCOPY \|\| dblk == BLK_SNAP) 1463 ((ufs2_daddr_t )(ibp->b_data))[loc] = 0; 1464* else if ((dblk == blkstofrags(fs, blkno) && 1465 ffs_snapblkfree(fs, ip->i_devvp, dblk, 1466 fs->fs_bsize, ip->i_number))) { 1467 ip->i_din2->di_blocks -= btodb(fs->fs_bsize); 1468 ((ufs2_daddr_t )(ibp->b_data))[loc] = 0; 1469* } 1470 } 1471 bawrite(ibp); 1472 } 1473 /* 1474 * Clear snapshot flag and drop reference. 1475 / 1476* ip->i_flags &= ~SF_SNAPSHOT; 1477 DIP(ip, i_flags) = ip->i_flags; 1478 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 1479} 1480 1481/* 1482 * Notification that a block is being freed. Return zero if the free 1483 * should be allowed to proceed. Return non-zero if the snapshot file 1484 * wants to claim the block. The block will be claimed if it is an 1485 * uncopied part of one of the snapshots. It will be freed if it is 1486 * either a BLK_NOCOPY or has already been copied in all of the snapshots. 1487 * If a fragment is being freed, then all snapshots that care about 1488 * it must make a copy since a snapshot file can only claim full sized 1489 * blocks. Note that if more than one snapshot file maps the block, 1490 * we can pick one at random to claim it. Since none of the snapshots 1491 * can change, we are assurred that they will all see the same unmodified 1492 * image. When deleting a snapshot file (see ffs_snapremove above), we 1493 * must push any of these claimed blocks to one of the other snapshots 1494 * that maps it. These claimed blocks are easily identified as they will 1495 * have a block number equal to their logical block number within the 1496 * snapshot. A copied block can never have this property because they 1497 * must always have been allocated from a BLK_NOCOPY location. 1498 / 1499int 1500ffs_snapblkfree(fs, devvp, bno, size, inum) 1501* struct fs fs; 1502* struct vnode devvp; 1503* ufs2_daddr_t bno; 1504 long size; 1505 ino_t inum; 1506{ 1507 struct buf ibp, cbp, savedcbp = 0; 1508* struct thread td = curthread; 1509* struct inode ip; 1510* struct vnode vp = NULL; 1511* ufs_lbn_t lbn; 1512 ufs2_daddr_t blkno; 1513 int indiroff = 0, snapshot_locked = 0, error = 0, claimedblk = 0; 1514 struct snaphead snaphead; 1515* 1516 lbn = fragstoblks(fs, bno); 1517retry: 1518 VI_LOCK(devvp); 1519 snaphead = &devvp->v_rdev->si_snapshots; 1520 TAILQ_FOREACH(ip, snaphead, i_nextsnap) { 1521 vp = ITOV(ip); 1522 /* 1523 * Lookup block being written. 1524 / 1525* if (lbn < NDADDR) { 1526 blkno = DIP(ip, i_db[lbn]); 1527 } else { 1528 if (snapshot_locked == 0 && 1529 lockmgr(vp->v_vnlock, 1530 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_SLEEPFAIL, 1531 VI_MTX(devvp), td) != 0) 1532 goto retry; 1533 snapshot_locked = 1; 1534 td->td_proc->p_flag \|= P_COWINPROGRESS; 1535 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1536 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1537 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1538 if (error) 1539 break; 1540 indiroff = (lbn - NDADDR) % NINDIR(fs); 1541 if (ip->i_ump->um_fstype == UFS1) 1542 blkno=((ufs1_daddr_t )(ibp->b_data))[indiroff]; 1543* else 1544 blkno=((ufs2_daddr_t )(ibp->b_data))[indiroff]; 1545* } 1546 /* 1547 * Check to see if block needs to be copied. 1548 / 1549* if (blkno == 0) { 1550 /* 1551 * A block that we map is being freed. If it has not 1552 * been claimed yet, we will claim or copy it (below). 1553 / 1554* claimedblk = 1; 1555 } else if (blkno == BLK_SNAP) { 1556 /* 1557 * No previous snapshot claimed the block, 1558 * so it will be freed and become a BLK_NOCOPY 1559 * (don't care) for us. 1560 / 1561* if (claimedblk) 1562 panic("snapblkfree: inconsistent block type"); 1563 if (snapshot_locked == 0 && 1564 lockmgr(vp->v_vnlock, 1565 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_NOWAIT, 1566 VI_MTX(devvp), td) != 0) { 1567 if (lbn >= NDADDR) 1568 bqrelse(ibp); 1569 vn_lock(vp, LK_EXCLUSIVE \| LK_SLEEPFAIL, td); 1570 goto retry; 1571 } 1572 snapshot_locked = 1; 1573 if (lbn < NDADDR) { 1574 DIP(ip, i_db[lbn]) = BLK_NOCOPY; 1575 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 1576 } else if (ip->i_ump->um_fstype == UFS1) { 1577 ((ufs1_daddr_t )(ibp->b_data))[indiroff] = 1578* BLK_NOCOPY; 1579 bdwrite(ibp); 1580 } else { 1581 ((ufs2_daddr_t )(ibp->b_data))[indiroff] = 1582* BLK_NOCOPY; 1583 bdwrite(ibp); 1584 } 1585 continue; 1586 } else /* BLK_NOCOPY or default / { 1587* /* 1588 * If the snapshot has already copied the block 1589 * (default), or does not care about the block, 1590 * it is not needed. 1591 / 1592* if (lbn >= NDADDR) 1593 bqrelse(ibp); 1594 continue; 1595 } 1596 /* 1597 * If this is a full size block, we will just grab it 1598 * and assign it to the snapshot inode. Otherwise we 1599 * will proceed to copy it. See explanation for this 1600 * routine as to why only a single snapshot needs to 1601 * claim this block. 1602 / 1603* if (snapshot_locked == 0 && 1604 lockmgr(vp->v_vnlock, 1605 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_NOWAIT, 1606 VI_MTX(devvp), td) != 0) { 1607 if (lbn >= NDADDR) 1608 bqrelse(ibp); 1609 vn_lock(vp, LK_EXCLUSIVE \| LK_SLEEPFAIL, td); 1610 goto retry; 1611 } 1612 snapshot_locked = 1; 1613 if (size == fs->fs_bsize) { 1614#ifdef DEBUG 1615 if (snapdebug) 1616 printf("%s %d lbn %jd from inum %d\n", 1617 "Grabonremove: snapino", ip->i_number, 1618 (intmax_t)lbn, inum); 1619#endif 1620 if (lbn < NDADDR) { 1621 DIP(ip, i_db[lbn]) = bno; 1622 } else if (ip->i_ump->um_fstype == UFS1) { 1623 ((ufs1_daddr_t )(ibp->b_data))[indiroff] = bno; 1624* bdwrite(ibp); 1625 } else { 1626 ((ufs2_daddr_t )(ibp->b_data))[indiroff] = bno; 1627* bdwrite(ibp); 1628 } 1629 DIP(ip, i_blocks) += btodb(size); 1630 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 1631 VOP_UNLOCK(vp, 0, td); 1632 return (1); 1633 } 1634 if (lbn >= NDADDR) 1635 bqrelse(ibp); 1636 /* 1637 * Allocate the block into which to do the copy. Note that this 1638 * allocation will never require any additional allocations for 1639 * the snapshot inode. 1640 / 1641* td->td_proc->p_flag \|= P_COWINPROGRESS; 1642 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1643 fs->fs_bsize, KERNCRED, 0, &cbp); 1644 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1645 if (error) 1646 break; 1647#ifdef DEBUG 1648 if (snapdebug) 1649 printf("%s%d lbn %jd %s %d size %ld to blkno %jd\n", 1650 "Copyonremove: snapino ", ip->i_number, 1651 (intmax_t)lbn, "for inum", inum, size, 1652 (intmax_t)cbp->b_blkno); 1653#endif 1654 /* 1655 * If we have already read the old block contents, then 1656 * simply copy them to the new block. Note that we need 1657 * to synchronously write snapshots that have not been 1658 * unlinked, and hence will be visible after a crash, 1659 * to ensure their integrity. 1660 / 1661* if (savedcbp != 0) { 1662 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 1663 bawrite(cbp); 1664 if (dopersistence && ip->i_effnlink > 0) 1665 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1666 continue; 1667 } 1668 /* 1669 * Otherwise, read the old block contents into the buffer. 1670 / 1671* if ((error = readblock(cbp, lbn)) != 0) { 1672 bzero(cbp->b_data, fs->fs_bsize); 1673 bawrite(cbp); 1674 if (dopersistence && ip->i_effnlink > 0) 1675 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1676 break; 1677 } 1678 savedcbp = cbp; 1679 } 1680 /* 1681 * Note that we need to synchronously write snapshots that 1682 * have not been unlinked, and hence will be visible after 1683 * a crash, to ensure their integrity. 1684 / 1685* if (savedcbp) { 1686 vp = savedcbp->b_vp; 1687 bawrite(savedcbp); 1688 if (dopersistence && VTOI(vp)->i_effnlink > 0) 1689 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1690 } 1691 /* 1692 * If we have been unable to allocate a block in which to do 1693 * the copy, then return non-zero so that the fragment will 1694 * not be freed. Although space will be lost, the snapshot 1695 * will stay consistent. 1696 / 1697* if (snapshot_locked) 1698 VOP_UNLOCK(vp, 0, td); 1699 else 1700 VI_UNLOCK(devvp); 1701 return (error); 1702} 1703 1704/* 1705 * Associate snapshot files when mounting. 1706 / 1707void 1708ffs_snapshot_mount(mp) 1709* struct mount mp; 1710{ 1711* struct ufsmount ump = VFSTOUFS(mp); 1712* struct vnode devvp = ump->um_devvp; 1713* struct fs fs = ump->um_fs; 1714* struct thread td = curthread; 1715* struct snaphead snaphead; 1716* struct vnode vp; 1717* struct inode ip, xp; 1718 struct uio auio; 1719 struct iovec aiov; 1720 void snapblklist; 1721* char reason; 1722* daddr_t snaplistsize; 1723 int error, snaploc, loc; 1724 1725 /* 1726 * XXX The following needs to be set before UFS_TRUNCATE or 1727 * VOP_READ can be called. 1728 / 1729* mp->mnt_stat.f_iosize = fs->fs_bsize; 1730 /* 1731 * Process each snapshot listed in the superblock. 1732 / 1733* vp = NULL; 1734 snaphead = &devvp->v_rdev->si_snapshots; 1735 for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) { 1736 if (fs->fs_snapinum[snaploc] == 0) 1737 break; 1738 if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc], 1739 LK_EXCLUSIVE, &vp)) != 0){ 1740 printf("ffs_snapshot_mount: vget failed %d\n", error); 1741 continue; 1742 } 1743 ip = VTOI(vp); 1744 if ((ip->i_flags & SF_SNAPSHOT) == 0 \|\| ip->i_size == 1745 lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) { 1746 if ((ip->i_flags & SF_SNAPSHOT) == 0) { 1747 reason = "non-snapshot"; 1748 } else { 1749 reason = "old format snapshot"; 1750 (void)UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED, td); 1751 (void)VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 1752 } 1753 printf("ffs_snapshot_mount: %s inode %d\n", 1754 reason, fs->fs_snapinum[snaploc]); 1755 vput(vp); 1756 vp = NULL; 1757 for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) { 1758 if (fs->fs_snapinum[loc] == 0) 1759 break; 1760 fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc]; 1761 } 1762 fs->fs_snapinum[loc - 1] = 0; 1763 snaploc--; 1764 continue; 1765 } 1766 /* 1767 * If there already exist snapshots on this filesystem, grab a 1768 * reference to their shared lock. If this is the first snapshot 1769 * on this filesystem, we need to allocate a lock for the 1770 * snapshots to share. In either case, acquire the snapshot 1771 * lock and give up our original private lock. 1772 / 1773* VI_LOCK(devvp); 1774 if ((xp = TAILQ_FIRST(snaphead)) != NULL) { 1775 VI_LOCK(vp); 1776 vp->v_vnlock = ITOV(xp)->v_vnlock; 1777 VI_UNLOCK(devvp); 1778 } else { 1779 struct lock lkp; 1780* 1781 VI_UNLOCK(devvp); 1782 MALLOC(lkp, struct lock , sizeof(struct lock), 1783* M_UFSMNT, M_WAITOK); 1784 lockinit(lkp, PVFS, "snaplk", VLKTIMEOUT, 1785 LK_CANRECURSE \| LK_NOPAUSE); 1786 VI_LOCK(vp); 1787 vp->v_vnlock = lkp; 1788 } 1789 vn_lock(vp, LK_INTERLOCK \| LK_EXCLUSIVE \| LK_RETRY, td); 1790 transferlockers(&vp->v_lock, vp->v_vnlock); 1791 lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); 1792 /* 1793 * Link it onto the active snapshot list. 1794 / 1795* VI_LOCK(devvp); 1796 if (ip->i_nextsnap.tqe_prev != 0) 1797 panic("ffs_snapshot_mount: %d already on list", 1798 ip->i_number); 1799 else 1800 TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap); 1801 vp->v_vflag \|= VV_SYSTEM; 1802 VI_UNLOCK(devvp); 1803 VOP_UNLOCK(vp, 0, td); 1804 } 1805 /* 1806 * No usable snapshots found. 1807 / 1808* if (vp == NULL) 1809 return; 1810 /* 1811 * Allocate the space for the block hints list. We always want to 1812 * use the list from the newest snapshot. 1813 / 1814* auio.uio_iov = &aiov; 1815 auio.uio_iovcnt = 1; 1816 aiov.iov_base = (void )&snaplistsize; 1817* aiov.iov_len = sizeof(snaplistsize); 1818 auio.uio_resid = aiov.iov_len; 1819 auio.uio_offset = 1820 lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)); 1821 auio.uio_segflg = UIO_SYSSPACE; 1822 auio.uio_rw = UIO_READ; 1823 auio.uio_td = td; 1824 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 1825 if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 1826 printf("ffs_snapshot_mount: read_1 failed %d\n", error); 1827 VOP_UNLOCK(vp, 0, td); 1828 return; 1829 } 1830 MALLOC(snapblklist, void , snaplistsize sizeof(daddr_t), 1831 M_UFSMNT, M_WAITOK); 1832 auio.uio_iovcnt = 1; 1833 aiov.iov_base = snapblklist; 1834 aiov.iov_len = snaplistsize * sizeof (daddr_t); 1835 auio.uio_resid = aiov.iov_len; 1836 auio.uio_offset -= sizeof(snaplistsize); 1837 if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { 1838 printf("ffs_snapshot_mount: read_2 failed %d\n", error); 1839 VOP_UNLOCK(vp, 0, td); 1840 FREE(snapblklist, M_UFSMNT); 1841 return; 1842 } 1843 VOP_UNLOCK(vp, 0, td); 1844 VI_LOCK(devvp); 1845 ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount"); 1846 devvp->v_rdev->si_snaplistsize = snaplistsize; 1847 devvp->v_rdev->si_snapblklist = (daddr_t )snapblklist; 1848* devvp->v_rdev->si_copyonwrite = ffs_copyonwrite; 1849 devvp->v_vflag \|= VV_COPYONWRITE; 1850 VI_UNLOCK(devvp); 1851} 1852 1853/* 1854 * Disassociate snapshot files when unmounting. 1855 / 1856void 1857ffs_snapshot_unmount(mp) 1858* struct mount mp; 1859{ 1860* struct vnode devvp = VFSTOUFS(mp)->um_devvp; 1861* struct snaphead snaphead = &devvp->v_rdev->si_snapshots; 1862* struct lock lkp = NULL; 1863* struct inode xp; 1864* struct vnode vp; 1865* 1866 VI_LOCK(devvp); 1867 while ((xp = TAILQ_FIRST(snaphead)) != 0) { 1868 vp = ITOV(xp); 1869 lkp = vp->v_vnlock; 1870 vp->v_vnlock = &vp->v_lock; 1871 TAILQ_REMOVE(snaphead, xp, i_nextsnap); 1872 xp->i_nextsnap.tqe_prev = 0; 1873 if (xp->i_effnlink > 0) { 1874 VI_UNLOCK(devvp); 1875 vrele(vp); 1876 VI_LOCK(devvp); 1877 } 1878 } 1879 if (devvp->v_rdev->si_snapblklist != NULL) { 1880 FREE(devvp->v_rdev->si_snapblklist, M_UFSMNT); 1881 devvp->v_rdev->si_snapblklist = NULL; 1882 devvp->v_rdev->si_snaplistsize = 0; 1883 } 1884 if (lkp != NULL) { 1885 lockdestroy(lkp); 1886 FREE(lkp, M_UFSMNT); 1887 } 1888 ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount"); 1889 devvp->v_rdev->si_copyonwrite = 0; 1890 devvp->v_vflag &= ~VV_COPYONWRITE; 1891 VI_UNLOCK(devvp); 1892} 1893 1894/* 1895 * Check for need to copy block that is about to be written, 1896 * copying the block if necessary. 1897 / 1898static int 1899ffs_copyonwrite(devvp, bp) 1900* struct vnode devvp; 1901* struct buf bp; 1902{ 1903* struct snaphead snaphead; 1904* struct buf ibp, cbp, savedcbp = 0; 1905* struct thread td = curthread; 1906* struct fs fs; 1907* struct inode ip; 1908* struct vnode vp = 0; 1909* ufs2_daddr_t lbn, blkno, snapblklist; 1910* int lower, upper, mid, indiroff, snapshot_locked = 0, error = 0; 1911 1912 if (td->td_proc->p_flag & P_COWINPROGRESS) 1913 panic("ffs_copyonwrite: recursive call"); 1914 /* 1915 * First check to see if it is in the preallocated list. 1916 * By doing this check we avoid several potential deadlocks. 1917 / 1918* VI_LOCK(devvp); 1919 snaphead = &devvp->v_rdev->si_snapshots; 1920 ip = TAILQ_FIRST(snaphead); 1921 fs = ip->i_fs; 1922 lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); 1923 snapblklist = devvp->v_rdev->si_snapblklist; 1924 upper = devvp->v_rdev->si_snaplistsize - 1; 1925 lower = 1; 1926 while (lower <= upper) { 1927 mid = (lower + upper) / 2; 1928 if (snapblklist[mid] == lbn) 1929 break; 1930 if (snapblklist[mid] < lbn) 1931 lower = mid + 1; 1932 else 1933 upper = mid - 1; 1934 } 1935 if (lower <= upper) { 1936 VI_UNLOCK(devvp); 1937 return (0); 1938 } 1939 /* 1940 * Not in the precomputed list, so check the snapshots. 1941 / 1942retry: 1943* TAILQ_FOREACH(ip, snaphead, i_nextsnap) { 1944 vp = ITOV(ip); 1945 /* 1946 * We ensure that everything of our own that needs to be 1947 * copied will be done at the time that ffs_snapshot is 1948 * called. Thus we can skip the check here which can 1949 * deadlock in doing the lookup in UFS_BALLOC. 1950 / 1951* if (bp->b_vp == vp) 1952 continue; 1953 /* 1954 * Check to see if block needs to be copied. We do not have 1955 * to hold the snapshot lock while doing this lookup as it 1956 * will never require any additional allocations for the 1957 * snapshot inode. 1958 / 1959* if (lbn < NDADDR) { 1960 blkno = DIP(ip, i_db[lbn]); 1961 } else { 1962 if (snapshot_locked == 0 && 1963 lockmgr(vp->v_vnlock, 1964 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_SLEEPFAIL, 1965 VI_MTX(devvp), td) != 0) { 1966 VI_LOCK(devvp); 1967 goto retry; 1968 } 1969 snapshot_locked = 1; 1970 td->td_proc->p_flag \|= P_COWINPROGRESS; 1971 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 1972 fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); 1973 td->td_proc->p_flag &= ~P_COWINPROGRESS; 1974 if (error) 1975 break; 1976 indiroff = (lbn - NDADDR) % NINDIR(fs); 1977 if (ip->i_ump->um_fstype == UFS1) 1978 blkno=((ufs1_daddr_t )(ibp->b_data))[indiroff]; 1979* else 1980 blkno=((ufs2_daddr_t )(ibp->b_data))[indiroff]; 1981* bqrelse(ibp); 1982 } 1983#ifdef DIAGNOSTIC 1984 if (blkno == BLK_SNAP && bp->b_lblkno >= 0) 1985 panic("ffs_copyonwrite: bad copy block"); 1986#endif 1987 if (blkno != 0) 1988 continue; 1989 /* 1990 * Allocate the block into which to do the copy. Since 1991 * multiple processes may all try to copy the same block, 1992 * we have to recheck our need to do a copy if we sleep 1993 * waiting for the lock. 1994 * 1995 * Because all snapshots on a filesystem share a single 1996 * lock, we ensure that we will never be in competition 1997 * with another process to allocate a block. 1998 / 1999* if (snapshot_locked == 0 && 2000 lockmgr(vp->v_vnlock, 2001 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_SLEEPFAIL, 2002 VI_MTX(devvp), td) != 0) { 2003 VI_LOCK(devvp); 2004 goto retry; 2005 } 2006 snapshot_locked = 1; 2007 td->td_proc->p_flag \|= P_COWINPROGRESS; 2008 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), 2009 fs->fs_bsize, KERNCRED, 0, &cbp); 2010 td->td_proc->p_flag &= ~P_COWINPROGRESS; 2011 if (error) 2012 break; 2013#ifdef DEBUG 2014 if (snapdebug) { 2015 printf("Copyonwrite: snapino %d lbn %jd for ", 2016 ip->i_number, (intmax_t)lbn); 2017 if (bp->b_vp == devvp) 2018 printf("fs metadata"); 2019 else 2020 printf("inum %d", VTOI(bp->b_vp)->i_number); 2021 printf(" lblkno %jd to blkno %jd\n", 2022 (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno); 2023 } 2024#endif 2025 /* 2026 * If we have already read the old block contents, then 2027 * simply copy them to the new block. Note that we need 2028 * to synchronously write snapshots that have not been 2029 * unlinked, and hence will be visible after a crash, 2030 * to ensure their integrity. 2031 / 2032* if (savedcbp != 0) { 2033 bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); 2034 bawrite(cbp); 2035 if (dopersistence && ip->i_effnlink > 0) 2036 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 2037 continue; 2038 } 2039 /* 2040 * Otherwise, read the old block contents into the buffer. 2041 / 2042* if ((error = readblock(cbp, lbn)) != 0) { 2043 bzero(cbp->b_data, fs->fs_bsize); 2044 bawrite(cbp); 2045 if (dopersistence && ip->i_effnlink > 0) 2046 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 2047 break; 2048 } 2049 savedcbp = cbp; 2050 } 2051 /* 2052 * Note that we need to synchronously write snapshots that 2053 * have not been unlinked, and hence will be visible after 2054 * a crash, to ensure their integrity. 2055 / 2056* if (savedcbp) { 2057 vp = savedcbp->b_vp; 2058 bawrite(savedcbp); 2059 if (dopersistence && VTOI(vp)->i_effnlink > 0) 2060 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); 2061 } 2062 if (snapshot_locked) 2063 VOP_UNLOCK(vp, 0, td); 2064 else 2065 VI_UNLOCK(devvp); 2066 return (error); 2067} 2068 2069/* 2070 * Read the specified block into the given buffer. 2071 * Much of this boiler-plate comes from bwrite(). 2072 / 2073static int 2074readblock(bp, lbn) 2075* struct buf bp; 2076* ufs2_daddr_t lbn; 2077{ 2078 struct uio auio; 2079 struct iovec aiov; 2080 struct thread td = curthread; 2081* struct inode ip = VTOI(bp->b_vp); 2082* 2083 aiov.iov_base = bp->b_data; 2084 aiov.iov_len = bp->b_bcount; 2085 auio.uio_iov = &aiov; 2086 auio.uio_iovcnt = 1; 2087 auio.uio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn))); 2088 auio.uio_resid = bp->b_bcount; 2089 auio.uio_rw = UIO_READ; 2090 auio.uio_segflg = UIO_SYSSPACE; 2091 auio.uio_td = td; 2092 return (physio(ip->i_devvp->v_rdev, &auio, 0)); 2093}