1/* $NetBSD: ffs_inode.c,v 1.131 2020/07/31 04:07:30 chs Exp $ */ 2 3/*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* 33 * Copyright (c) 1982, 1986, 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95 61 */ 62 63#include <sys/cdefs.h> 64__KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.131 2020/07/31 04:07:30 chs Exp $"); 65 66#if defined(_KERNEL_OPT) 67#include "opt_ffs.h" 68#include "opt_quota.h" 69#endif 70 71#include <sys/param.h> 72#include <sys/systm.h> 73#include <sys/buf.h> 74#include <sys/file.h> 75#include <sys/fstrans.h> 76#include <sys/kauth.h> 77#include <sys/kernel.h> 78#include <sys/kmem.h> 79#include <sys/mount.h> 80#include <sys/proc.h> 81#include <sys/resourcevar.h> 82#include <sys/trace.h> 83#include <sys/vnode.h> 84#include <sys/wapbl.h> 85 86#include <ufs/ufs/quota.h> 87#include <ufs/ufs/inode.h> 88#include <ufs/ufs/ufsmount.h> 89#include <ufs/ufs/ufs_extern.h> 90#include <ufs/ufs/ufs_bswap.h> 91#include <ufs/ufs/ufs_wapbl.h> 92 93#include <ufs/ffs/fs.h> 94#include <ufs/ffs/ffs_extern.h> 95 96static int ffs_indirtrunc(struct inode *, daddr_t, daddr_t, daddr_t, int, 97 int64_t *); 98 99/* 100 * Update the access, modified, and inode change times as specified 101 * by the IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. 102 * The IN_MODIFIED flag is used to specify that the inode needs to be 103 * updated but that the times have already been set. The access 104 * and modified times are taken from the second and third parameters; 105 * the inode change time is always taken from the current time. If 106 * UPDATE_WAIT flag is set, or UPDATE_DIROP is set then wait for the 107 * disk write of the inode to complete. 108 */ 109 110int 111ffs_update(struct vnode *vp, const struct timespec *acc, 112 const struct timespec *mod, int updflags) 113{ 114 struct fs *fs; 115 struct buf *bp; 116 struct inode *ip; 117 int error; 118 void *cp; 119 int waitfor, flags; 120 121 if (vp->v_mount->mnt_flag & MNT_RDONLY) 122 return (0); 123 ip = VTOI(vp); 124 FFS_ITIMES(ip, acc, mod, NULL); 125 if (updflags & UPDATE_CLOSE) 126 flags = ip->i_flag & (IN_MODIFIED | IN_ACCESSED); 127 else 128 flags = ip->i_flag & IN_MODIFIED; 129 if (flags == 0) 130 return (0); 131 fs = ip->i_fs; 132 133 if ((flags & IN_MODIFIED) != 0 && 134 (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) { 135 waitfor = updflags & UPDATE_WAIT; 136 if ((updflags & UPDATE_DIROP) != 0) 137 waitfor |= UPDATE_WAIT; 138 } else 139 waitfor = 0; 140 141 /* 142 * Ensure that uid and gid are correct. This is a temporary 143 * fix until fsck has been changed to do the update. 144 */ 145 if (fs->fs_magic == FS_UFS1_MAGIC && /* XXX */ 146 fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */ 147 ip->i_ffs1_ouid = ip->i_uid; /* XXX */ 148 ip->i_ffs1_ogid = ip->i_gid; /* XXX */ 149 } /* XXX */ 150 error = bread(ip->i_devvp, 151 FFS_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)), 152 (int)fs->fs_bsize, B_MODIFY, &bp); 153 if (error) { 154 return (error); 155 } 156 ip->i_flag &= ~(IN_MODIFIED | IN_ACCESSED); 157 /* Keep unlinked inode list up to date */ 158 KDASSERTMSG(DIP(ip, nlink) == ip->i_nlink, 159 "DIP(ip, nlink) [%d] == ip->i_nlink [%d]", 160 DIP(ip, nlink), ip->i_nlink); 161 if (ip->i_mode) { 162 if (ip->i_nlink > 0) { 163 UFS_WAPBL_UNREGISTER_INODE(ip->i_ump->um_mountp, 164 ip->i_number, ip->i_mode); 165 } else { 166 UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp, 167 ip->i_number, ip->i_mode); 168 } 169 } 170 if (fs->fs_magic == FS_UFS1_MAGIC) { 171 cp = (char *)bp->b_data + 172 (ino_to_fsbo(fs, ip->i_number) * DINODE1_SIZE); 173#ifdef FFS_EI 174 if (UFS_FSNEEDSWAP(fs)) 175 ffs_dinode1_swap(ip->i_din.ffs1_din, 176 (struct ufs1_dinode *)cp); 177 else 178#endif 179 memcpy(cp, ip->i_din.ffs1_din, DINODE1_SIZE); 180 } else { 181 cp = (char *)bp->b_data + 182 (ino_to_fsbo(fs, ip->i_number) * DINODE2_SIZE); 183#ifdef FFS_EI 184 if (UFS_FSNEEDSWAP(fs)) 185 ffs_dinode2_swap(ip->i_din.ffs2_din, 186 (struct ufs2_dinode *)cp); 187 else 188#endif 189 memcpy(cp, ip->i_din.ffs2_din, DINODE2_SIZE); 190 } 191 if (waitfor) { 192 return (bwrite(bp)); 193 } else { 194 bdwrite(bp); 195 return (0); 196 } 197} 198 199#define SINGLE 0 /* index of single indirect block */ 200#define DOUBLE 1 /* index of double indirect block */ 201#define TRIPLE 2 /* index of triple indirect block */ 202/* 203 * Truncate the inode oip to at most length size, freeing the 204 * disk blocks. 205 */ 206int 207ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) 208{ 209 daddr_t lastblock; 210 struct inode *oip = VTOI(ovp); 211 struct mount *omp = ovp->v_mount; 212 daddr_t bn, lastiblock[UFS_NIADDR], indir_lbn[UFS_NIADDR]; 213 daddr_t blks[UFS_NDADDR + UFS_NIADDR], oldblks[UFS_NDADDR + UFS_NIADDR]; 214 struct fs *fs; 215 int extblocks; 216 int offset, pgoffset, level; 217 int64_t blocksreleased = 0, datablocks; 218 int i, aflag, nblocks; 219 int error, allerror = 0; 220 off_t osize; 221 int sync; 222 struct ufsmount *ump = oip->i_ump; 223 void *dcookie; 224 long bsize; 225 bool wapbl = omp->mnt_wapbl != NULL; 226 227 UFS_WAPBL_JLOCK_ASSERT(ump->um_mountp); 228 229 if (ovp->v_type == VCHR || ovp->v_type == VBLK || 230 ovp->v_type == VFIFO || ovp->v_type == VSOCK) { 231 KASSERT(oip->i_size == 0); 232 return 0; 233 } 234 235 if (length < 0) 236 return (EINVAL); 237 238 /* 239 * Historically clients did not have to specify which data 240 * they were truncating. So, if not specified, we assume 241 * traditional behavior, e.g., just the normal data. 242 */ 243 if ((ioflag & (IO_EXT | IO_NORMAL)) == 0) 244 ioflag |= IO_NORMAL; 245 246 fs = oip->i_fs; 247#define i_din2 i_din.ffs2_din 248 extblocks = 0; 249 datablocks = DIP(oip, blocks); 250 if (fs->fs_magic == FS_UFS2_MAGIC && oip->i_din2->di_extsize > 0) { 251 extblocks = btodb(ffs_fragroundup(fs, oip->i_din2->di_extsize)); 252 datablocks -= extblocks; 253 } 254 if ((ioflag & IO_EXT) && extblocks > 0) { 255 if (length != 0) 256 panic("ffs_truncate: partial trunc of extdata"); 257 { 258#ifdef QUOTA 259 (void) chkdq(oip, -extblocks, NOCRED, FORCE); 260#endif 261 osize = oip->i_din2->di_extsize; 262 oip->i_din2->di_blocks -= extblocks; 263 oip->i_din2->di_extsize = 0; 264 for (i = 0; i < UFS_NXADDR; i++) { 265 binvalbuf(ovp, -1 - i); 266 oldblks[i] = oip->i_din2->di_extb[i]; 267 oip->i_din2->di_extb[i] = 0; 268 } 269 oip->i_flag |= IN_CHANGE; 270 if ((error = ffs_update(ovp, NULL, NULL, 0))) 271 return (error); 272 for (i = 0; i < UFS_NXADDR; i++) { 273 if (oldblks[i] == 0) 274 continue; 275 bsize = ffs_sblksize(fs, osize, i); 276 if (wapbl) { 277 error = UFS_WAPBL_REGISTER_DEALLOCATION(omp, 278 FFS_FSBTODB(fs, oldblks[i]), bsize, NULL); 279 if (error) 280 return error; 281 } else 282 ffs_blkfree(fs, oip->i_devvp, oldblks[i], 283 bsize, oip->i_number); 284 } 285 extblocks = 0; 286 } 287 } 288 if ((ioflag & IO_NORMAL) == 0) 289 return (0); 290 if (ovp->v_type == VLNK && 291 (oip->i_size < ump->um_maxsymlinklen || 292 (ump->um_maxsymlinklen == 0 && datablocks == 0))) { 293 KDASSERT(length == 0); 294 memset(SHORTLINK(oip), 0, (size_t)oip->i_size); 295 oip->i_size = 0; 296 DIP_ASSIGN(oip, size, 0); 297 oip->i_flag |= IN_CHANGE | IN_UPDATE; 298 return (ffs_update(ovp, NULL, NULL, 0)); 299 } 300 if (oip->i_size == length) { 301 /* still do a uvm_vnp_setsize() as writesize may be larger */ 302 uvm_vnp_setsize(ovp, length); 303 oip->i_flag |= IN_CHANGE | IN_UPDATE; 304 return (ffs_update(ovp, NULL, NULL, 0)); 305 } 306 if (length > ump->um_maxfilesize) 307 return (EFBIG); 308 309 if ((oip->i_flags & SF_SNAPSHOT) != 0) 310 ffs_snapremove(ovp); 311 312 osize = oip->i_size; 313 aflag = ioflag & IO_SYNC ? B_SYNC : 0; 314 315 /* 316 * Lengthen the size of the file. We must ensure that the 317 * last byte of the file is allocated. Since the smallest 318 * value of osize is 0, length will be at least 1. 319 */ 320 321 if (osize < length) { 322 if (ffs_lblkno(fs, osize) < UFS_NDADDR && 323 ffs_lblkno(fs, osize) != ffs_lblkno(fs, length) && 324 ffs_blkroundup(fs, osize) != osize) { 325 off_t eob; 326 327 eob = ffs_blkroundup(fs, osize); 328 uvm_vnp_setwritesize(ovp, eob); 329 error = ufs_balloc_range(ovp, osize, eob - osize, 330 cred, aflag); 331 if (error) { 332 (void) ffs_truncate(ovp, osize, 333 ioflag & IO_SYNC, cred); 334 return error; 335 } 336 if (ioflag & IO_SYNC) { 337 rw_enter(ovp->v_uobj.vmobjlock, RW_WRITER); 338 VOP_PUTPAGES(ovp, 339 trunc_page(osize & fs->fs_bmask), 340 round_page(eob), PGO_CLEANIT | PGO_SYNCIO | 341 PGO_JOURNALLOCKED); 342 } 343 } 344 uvm_vnp_setwritesize(ovp, length); 345 error = ufs_balloc_range(ovp, length - 1, 1, cred, aflag); 346 if (error) { 347 (void) ffs_truncate(ovp, osize, ioflag & IO_SYNC, cred); 348 return (error); 349 } 350 uvm_vnp_setsize(ovp, length); 351 oip->i_flag |= IN_CHANGE | IN_UPDATE; 352 KASSERT(ovp->v_size == oip->i_size); 353 return (ffs_update(ovp, NULL, NULL, 0)); 354 } 355 356 /* 357 * When truncating a regular file down to a non-block-aligned size, 358 * we must zero the part of last block which is past the new EOF. 359 * We must synchronously flush the zeroed pages to disk 360 * since the new pages will be invalidated as soon as we 361 * inform the VM system of the new, smaller size. 362 * We must do this before acquiring the GLOCK, since fetching 363 * the pages will acquire the GLOCK internally. 364 * So there is a window where another thread could see a whole 365 * zeroed page past EOF, but that's life. 366 */ 367 368 offset = ffs_blkoff(fs, length); 369 pgoffset = length & PAGE_MASK; 370 if (ovp->v_type == VREG && (pgoffset != 0 || offset != 0) && 371 osize > length) { 372 daddr_t lbn; 373 voff_t eoz; 374 int size; 375 376 if (offset != 0) { 377 error = ufs_balloc_range(ovp, length - 1, 1, cred, 378 aflag); 379 if (error) 380 return error; 381 } 382 lbn = ffs_lblkno(fs, length); 383 size = ffs_blksize(fs, oip, lbn); 384 eoz = MIN(MAX(ffs_lblktosize(fs, lbn) + size, round_page(pgoffset)), 385 osize); 386 ubc_zerorange(&ovp->v_uobj, length, eoz - length, 387 UBC_VNODE_FLAGS(ovp)); 388 if (round_page(eoz) > round_page(length)) { 389 rw_enter(ovp->v_uobj.vmobjlock, RW_WRITER); 390 error = VOP_PUTPAGES(ovp, round_page(length), 391 round_page(eoz), 392 PGO_CLEANIT | PGO_DEACTIVATE | PGO_JOURNALLOCKED | 393 ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0)); 394 if (error) 395 return error; 396 } 397 } 398 399 genfs_node_wrlock(ovp); 400 oip->i_size = length; 401 DIP_ASSIGN(oip, size, length); 402 uvm_vnp_setsize(ovp, length); 403 /* 404 * Calculate index into inode's block list of 405 * last direct and indirect blocks (if any) 406 * which we want to keep. Lastblock is -1 when 407 * the file is truncated to 0. 408 */ 409 lastblock = ffs_lblkno(fs, length + fs->fs_bsize - 1) - 1; 410 lastiblock[SINGLE] = lastblock - UFS_NDADDR; 411 lastiblock[DOUBLE] = lastiblock[SINGLE] - FFS_NINDIR(fs); 412 lastiblock[TRIPLE] = lastiblock[DOUBLE] - FFS_NINDIR(fs) * FFS_NINDIR(fs); 413 nblocks = btodb(fs->fs_bsize); 414 /* 415 * Update file and block pointers on disk before we start freeing 416 * blocks. If we crash before free'ing blocks below, the blocks 417 * will be returned to the free list. lastiblock values are also 418 * normalized to -1 for calls to ffs_indirtrunc below. 419 */ 420 sync = 0; 421 for (level = TRIPLE; level >= SINGLE; level--) { 422 blks[UFS_NDADDR + level] = DIP(oip, ib[level]); 423 if (lastiblock[level] < 0 && blks[UFS_NDADDR + level] != 0) { 424 sync = 1; 425 DIP_ASSIGN(oip, ib[level], 0); 426 lastiblock[level] = -1; 427 } 428 } 429 for (i = 0; i < UFS_NDADDR; i++) { 430 blks[i] = DIP(oip, db[i]); 431 if (i > lastblock && blks[i] != 0) { 432 sync = 1; 433 DIP_ASSIGN(oip, db[i], 0); 434 } 435 } 436 oip->i_flag |= IN_CHANGE | IN_UPDATE; 437 if (sync) { 438 error = ffs_update(ovp, NULL, NULL, UPDATE_WAIT); 439 if (error && !allerror) 440 allerror = error; 441 } 442 443 /* 444 * Having written the new inode to disk, save its new configuration 445 * and put back the old block pointers long enough to process them. 446 * Note that we save the new block configuration so we can check it 447 * when we are done. 448 */ 449 for (i = 0; i < UFS_NDADDR; i++) { 450 bn = DIP(oip, db[i]); 451 DIP_ASSIGN(oip, db[i], blks[i]); 452 blks[i] = bn; 453 } 454 for (i = 0; i < UFS_NIADDR; i++) { 455 bn = DIP(oip, ib[i]); 456 DIP_ASSIGN(oip, ib[i], blks[UFS_NDADDR + i]); 457 blks[UFS_NDADDR + i] = bn; 458 } 459 460 oip->i_size = osize; 461 DIP_ASSIGN(oip, size, osize); 462 error = vtruncbuf(ovp, lastblock + 1, 0, 0); 463 if (error && !allerror) 464 allerror = error; 465 466 /* 467 * Indirect blocks first. 468 */ 469 indir_lbn[SINGLE] = -UFS_NDADDR; 470 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - FFS_NINDIR(fs) - 1; 471 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - FFS_NINDIR(fs) * FFS_NINDIR(fs) - 1; 472 for (level = TRIPLE; level >= SINGLE; level--) { 473 bn = ffs_getib(fs, oip, level); 474 if (bn != 0) { 475 if (lastiblock[level] < 0 && 476 oip->i_ump->um_mountp->mnt_wapbl) { 477 error = UFS_WAPBL_REGISTER_DEALLOCATION( 478 oip->i_ump->um_mountp, 479 FFS_FSBTODB(fs, bn), fs->fs_bsize, 480 &dcookie); 481 if (error) 482 goto out; 483 } else { 484 dcookie = NULL; 485 } 486 487 error = ffs_indirtrunc(oip, indir_lbn[level], 488 FFS_FSBTODB(fs, bn), lastiblock[level], level, 489 &blocksreleased); 490 if (error) { 491 if (dcookie) { 492 UFS_WAPBL_UNREGISTER_DEALLOCATION( 493 oip->i_ump->um_mountp, dcookie); 494 } 495 goto out; 496 } 497 498 if (lastiblock[level] < 0) { 499 if (!dcookie) 500 ffs_blkfree(fs, oip->i_devvp, bn, 501 fs->fs_bsize, oip->i_number); 502 DIP_ASSIGN(oip, ib[level], 0); 503 blocksreleased += nblocks; 504 } 505 } 506 if (lastiblock[level] >= 0) 507 goto done; 508 } 509 510 /* 511 * All whole direct blocks or frags. 512 */ 513 for (i = UFS_NDADDR - 1; i > lastblock; i--) { 514 bn = ffs_getdb(fs, oip, i); 515 if (bn == 0) 516 continue; 517 518 bsize = ffs_blksize(fs, oip, i); 519 if ((oip->i_ump->um_mountp->mnt_wapbl) && 520 (ovp->v_type != VREG)) { 521 error = UFS_WAPBL_REGISTER_DEALLOCATION( 522 oip->i_ump->um_mountp, 523 FFS_FSBTODB(fs, bn), bsize, NULL); 524 if (error) 525 goto out; 526 } else 527 ffs_blkfree(fs, oip->i_devvp, bn, bsize, oip->i_number); 528 DIP_ASSIGN(oip, db[i], 0); 529 blocksreleased += btodb(bsize); 530 } 531 if (lastblock < 0) 532 goto done; 533 534 /* 535 * Finally, look for a change in size of the 536 * last direct block; release any frags. 537 */ 538 bn = ffs_getdb(fs, oip, lastblock); 539 if (bn != 0) { 540 long oldspace, newspace; 541 542 /* 543 * Calculate amount of space we're giving 544 * back as old block size minus new block size. 545 */ 546 oldspace = ffs_blksize(fs, oip, lastblock); 547 oip->i_size = length; 548 DIP_ASSIGN(oip, size, length); 549 newspace = ffs_blksize(fs, oip, lastblock); 550 if (newspace == 0) 551 panic("itrunc: newspace"); 552 if (oldspace - newspace > 0) { 553 /* 554 * Block number of space to be free'd is 555 * the old block # plus the number of frags 556 * required for the storage we're keeping. 557 */ 558 bn += ffs_numfrags(fs, newspace); 559 if ((oip->i_ump->um_mountp->mnt_wapbl) && 560 (ovp->v_type != VREG)) { 561 error = UFS_WAPBL_REGISTER_DEALLOCATION( 562 oip->i_ump->um_mountp, FFS_FSBTODB(fs, bn), 563 oldspace - newspace, NULL); 564 if (error) 565 goto out; 566 } else 567 ffs_blkfree(fs, oip->i_devvp, bn, 568 oldspace - newspace, oip->i_number); 569 blocksreleased += btodb(oldspace - newspace); 570 } 571 } 572 573done: 574 for (level = SINGLE; level <= TRIPLE; level++) 575 KASSERTMSG((blks[UFS_NDADDR + level] == DIP(oip, ib[level])), 576 "itrunc1 blk mismatch: %jx != %jx", 577 (uintmax_t)blks[UFS_NDADDR + level], 578 (uintmax_t)DIP(oip, ib[level])); 579 for (i = 0; i < UFS_NDADDR; i++) 580 KASSERTMSG((blks[i] == DIP(oip, db[i])), 581 "itrunc2 blk mismatch: %jx != %jx", 582 (uintmax_t)blks[i], (uintmax_t)DIP(oip, db[i])); 583 KASSERTMSG((length != 0 || extblocks || LIST_EMPTY(&ovp->v_cleanblkhd)), 584 "itrunc3: zero length and nonempty cleanblkhd"); 585 KASSERTMSG((length != 0 || extblocks || LIST_EMPTY(&ovp->v_dirtyblkhd)), 586 "itrunc3: zero length and nonempty dirtyblkhd"); 587 588out: 589 /* 590 * Set length back to old size if deallocation failed. Some indirect 591 * blocks were deallocated creating a hole, but that is okay. 592 */ 593 if (error == EAGAIN) { 594 if (!allerror) 595 allerror = error; 596 length = osize; 597 uvm_vnp_setsize(ovp, length); 598 } 599 600 /* 601 * Put back the real size. 602 */ 603 oip->i_size = length; 604 DIP_ASSIGN(oip, size, length); 605 DIP_ADD(oip, blocks, -blocksreleased); 606 genfs_node_unlock(ovp); 607 oip->i_flag |= IN_CHANGE; 608 UFS_WAPBL_UPDATE(ovp, NULL, NULL, 0); 609#if defined(QUOTA) || defined(QUOTA2) 610 (void) chkdq(oip, -blocksreleased, NOCRED, 0); 611#endif 612 KASSERT(ovp->v_type != VREG || ovp->v_size == oip->i_size); 613 return (allerror); 614} 615 616/* 617 * Release blocks associated with the inode ip and stored in the indirect 618 * block bn. Blocks are free'd in LIFO order up to (but not including) 619 * lastbn. If level is greater than SINGLE, the block is an indirect block 620 * and recursive calls to indirtrunc must be used to cleanse other indirect 621 * blocks. 622 * 623 * NB: triple indirect blocks are untested. 624 */ 625static int 626ffs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn, 627 int level, int64_t *countp) 628{ 629 int i; 630 struct buf *bp; 631 struct fs *fs = ip->i_fs; 632 int32_t *bap1 = NULL; 633 int64_t *bap2 = NULL; 634 struct vnode *vp; 635 daddr_t nb, nlbn, last; 636 char *copy = NULL; 637 int64_t factor; 638 int64_t nblocks; 639 int error = 0, allerror = 0; 640 const int needswap = UFS_FSNEEDSWAP(fs); 641 const int wapbl = (ip->i_ump->um_mountp->mnt_wapbl != NULL); 642 void *dcookie; 643 644#define RBAP(ip, i) (((ip)->i_ump->um_fstype == UFS1) ? \ 645 ufs_rw32(bap1[i], needswap) : ufs_rw64(bap2[i], needswap)) 646#define BAP_ASSIGN(ip, i, value) \ 647 do { \ 648 if ((ip)->i_ump->um_fstype == UFS1) \ 649 bap1[i] = (value); \ 650 else \ 651 bap2[i] = (value); \ 652 } while(0) 653 654 /* 655 * Calculate index in current block of last 656 * block to be kept. -1 indicates the entire 657 * block so we need not calculate the index. 658 */ 659 factor = 1; 660 for (i = SINGLE; i < level; i++) 661 factor *= FFS_NINDIR(fs); 662 last = lastbn; 663 if (lastbn > 0) 664 last /= factor; 665 nblocks = btodb(fs->fs_bsize); 666 /* 667 * Get buffer of block pointers, zero those entries corresponding 668 * to blocks to be free'd, and update on disk copy first. Since 669 * double(triple) indirect before single(double) indirect, calls 670 * to bmap on these blocks will fail. However, we already have 671 * the on disk address, so we have to set the b_blkno field 672 * explicitly instead of letting bread do everything for us. 673 */ 674 vp = ITOV(ip); 675 error = ffs_getblk(vp, lbn, FFS_NOBLK, fs->fs_bsize, false, &bp); 676 if (error) 677 return error; 678 679 if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { 680 /* Braces must be here in case trace evaluates to nothing. */ 681 trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn); 682 } else { 683 trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn); 684 curlwp->l_ru.ru_inblock++; /* pay for read */ 685 bp->b_flags |= B_READ; 686 bp->b_flags &= ~B_COWDONE; /* we change blkno below */ 687 if (bp->b_bcount > bp->b_bufsize) 688 panic("ffs_indirtrunc: bad buffer size"); 689 bp->b_blkno = dbn; 690 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); 691 VOP_STRATEGY(vp, bp); 692 error = biowait(bp); 693 if (error == 0) 694 error = fscow_run(bp, true); 695 } 696 if (error) { 697 brelse(bp, 0); 698 return error; 699 } 700 701 /* 702 * Clear reference to blocks to be removed on disk, before actually 703 * reclaiming them, so that fsck is more likely to be able to recover 704 * the filesystem if system goes down during the truncate process. 705 * This assumes the truncate process would not fail, contrary 706 * to the wapbl case. 707 */ 708 if (ip->i_ump->um_fstype == UFS1) 709 bap1 = (int32_t *)bp->b_data; 710 else 711 bap2 = (int64_t *)bp->b_data; 712 if (lastbn >= 0 && !wapbl) { 713 copy = kmem_alloc(fs->fs_bsize, KM_SLEEP); 714 memcpy((void *)copy, bp->b_data, (u_int)fs->fs_bsize); 715 for (i = last + 1; i < FFS_NINDIR(fs); i++) 716 BAP_ASSIGN(ip, i, 0); 717 error = bwrite(bp); 718 if (error) 719 allerror = error; 720 721 if (ip->i_ump->um_fstype == UFS1) 722 bap1 = (int32_t *)copy; 723 else 724 bap2 = (int64_t *)copy; 725 } 726 727 /* 728 * Recursively free totally unused blocks. 729 */ 730 for (i = FFS_NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; 731 i--, nlbn += factor) { 732 nb = RBAP(ip, i); 733 if (nb == 0) 734 continue; 735 736 if ((ip->i_ump->um_mountp->mnt_wapbl) && 737 ((level > SINGLE) || (ITOV(ip)->v_type != VREG))) { 738 error = UFS_WAPBL_REGISTER_DEALLOCATION( 739 ip->i_ump->um_mountp, 740 FFS_FSBTODB(fs, nb), fs->fs_bsize, 741 &dcookie); 742 if (error) 743 goto out; 744 } else { 745 dcookie = NULL; 746 } 747 748 if (level > SINGLE) { 749 error = ffs_indirtrunc(ip, nlbn, FFS_FSBTODB(fs, nb), 750 (daddr_t)-1, level - 1, countp); 751 if (error) { 752 if (dcookie) { 753 UFS_WAPBL_UNREGISTER_DEALLOCATION( 754 ip->i_ump->um_mountp, dcookie); 755 } 756 757 goto out; 758 } 759 } 760 761 if (!dcookie) 762 ffs_blkfree(fs, ip->i_devvp, nb, fs->fs_bsize, 763 ip->i_number); 764 765 BAP_ASSIGN(ip, i, 0); 766 *countp += nblocks; 767 } 768 769 /* 770 * Recursively free blocks on the now last partial indirect block. 771 */ 772 if (level > SINGLE && lastbn >= 0) { 773 last = lastbn % factor; 774 nb = RBAP(ip, i); 775 if (nb != 0) { 776 error = ffs_indirtrunc(ip, nlbn, FFS_FSBTODB(fs, nb), 777 last, level - 1, countp); 778 if (error) 779 goto out; 780 } 781 } 782 783out: 784 if (error && !allerror) 785 allerror = error; 786 787 if (copy != NULL) { 788 kmem_free(copy, fs->fs_bsize); 789 } else if (lastbn < 0 && error == 0) { 790 /* all freed, release without writing back */ 791 brelse(bp, BC_INVAL); 792 } else if (wapbl) { 793 /* only partially freed, write the updated block */ 794 error = bwrite(bp); 795 if (!allerror) 796 allerror = error; 797 } 798 799 return (allerror); 800} 801 802void 803ffs_itimes(struct inode *ip, const struct timespec *acc, 804 const struct timespec *mod, const struct timespec *cre) 805{ 806 struct timespec now; 807 808 if (!(ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY))) { 809 return; 810 } 811 812 vfs_timestamp(&now); 813 if (ip->i_flag & IN_ACCESS) { 814 if (acc == NULL) 815 acc = &now; 816 DIP_ASSIGN(ip, atime, acc->tv_sec); 817 DIP_ASSIGN(ip, atimensec, acc->tv_nsec); 818 } 819 if (ip->i_flag & (IN_UPDATE | IN_MODIFY)) { 820 if ((ip->i_flags & SF_SNAPSHOT) == 0) { 821 if (mod == NULL) 822 mod = &now; 823 DIP_ASSIGN(ip, mtime, mod->tv_sec); 824 DIP_ASSIGN(ip, mtimensec, mod->tv_nsec); 825 } 826 ip->i_modrev++; 827 } 828 if (ip->i_flag & (IN_CHANGE | IN_MODIFY)) { 829 if (cre == NULL) 830 cre = &now; 831 DIP_ASSIGN(ip, ctime, cre->tv_sec); 832 DIP_ASSIGN(ip, ctimensec, cre->tv_nsec); 833 } 834 if (ip->i_flag & (IN_ACCESS | IN_MODIFY)) 835 ip->i_flag |= IN_ACCESSED; 836 if (ip->i_flag & (IN_UPDATE | IN_CHANGE)) 837 ip->i_flag |= IN_MODIFIED; 838 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY); 839} 840