1/* 2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1982, 1986, 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. All advertising materials mentioning features or use of this software 42 * must display the following acknowledgement: 43 * This product includes software developed by the University of 44 * California, Berkeley and its contributors. 45 * 4. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95 62 */ 63 64#include <rev_endian_fs.h> 65#include <vm/vm_pager.h> 66 67#include <sys/param.h> 68#include <sys/systm.h> 69#include <sys/mount_internal.h> 70#include <sys/proc_internal.h> /* for accessing p_stats */ 71#include <sys/file.h> 72#include <sys/buf_internal.h> 73#include <sys/vnode_internal.h> 74#include <sys/kernel.h> 75#include <sys/malloc.h> 76#include <sys/trace.h> 77#include <sys/resourcevar.h> 78#include <sys/ubc.h> 79#include <sys/quota.h> 80 81#include <sys/vm.h> 82 83#include <ufs/ufs/quota.h> 84#include <ufs/ufs/inode.h> 85#include <ufs/ufs/ufsmount.h> 86#include <ufs/ufs/ufs_extern.h> 87 88#include <ufs/ffs/fs.h> 89#include <ufs/ffs/ffs_extern.h> 90 91#if REV_ENDIAN_FS 92#include <ufs/ufs/ufs_byte_order.h> 93#include <libkern/OSByteOrder.h> 94#endif /* REV_ENDIAN_FS */ 95#include <libkern/OSAtomic.h> 96 97static int ffs_indirtrunc(struct inode *, ufs_daddr_t, ufs_daddr_t, 98 ufs_daddr_t, int, long *); 99 100/* 101 * Update the access, modified, and inode change times as specified by the 102 * IACCESS, IUPDATE, and ICHANGE flags respectively. The IMODIFIED flag is 103 * used to specify that the inode needs to be updated but that the times have 104 * already been set. The access and modified times are taken from the second 105 * and third parameters; the inode change time is always taken from the current 106 * time. If waitfor is set, then wait for the disk write of the inode to 107 * complete. 108 */ 109int 110ffs_update(struct vnode *vp, struct timeval *access, struct timeval *modify, int waitfor) 111{ 112 register struct fs *fs; 113 struct buf *bp; 114 struct inode *ip; 115 struct timeval tv; 116 errno_t error; 117#if REV_ENDIAN_FS 118 struct mount *mp=(vp)->v_mount; 119 int rev_endian=(mp->mnt_flag & MNT_REVEND); 120#endif /* REV_ENDIAN_FS */ 121 122 ip = VTOI(vp); 123 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 124 ip->i_flag &= 125 ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); 126 return (0); 127 } 128 if ((ip->i_flag & 129 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) 130 return (0); 131 if (ip->i_flag & IN_ACCESS) 132 ip->i_atime = access->tv_sec; 133 if (ip->i_flag & IN_UPDATE) { 134 ip->i_mtime = modify->tv_sec; 135 ip->i_modrev++; 136 } 137 if (ip->i_flag & IN_CHANGE) { 138 microtime(&tv); 139 ip->i_ctime = tv.tv_sec; 140 } 141 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); 142 fs = ip->i_fs; 143 /* 144 * Ensure that uid and gid are correct. This is a temporary 145 * fix until fsck has been changed to do the update. 146 */ 147 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ 148 ip->i_din.di_ouid = ip->i_uid; /* XXX */ 149 ip->i_din.di_ogid = ip->i_gid; /* XXX */ 150 } /* XXX */ 151 if (error = buf_bread(ip->i_devvp, 152 (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))), 153 (int)fs->fs_bsize, NOCRED, &bp)) { 154 buf_brelse(bp); 155 return ((int)error); 156 } 157#if REV_ENDIAN_FS 158 if (rev_endian) 159 byte_swap_inode_out(ip, ((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ip->i_number))); 160 else { 161#endif /* REV_ENDIAN_FS */ 162 *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ip->i_number)) = ip->i_din; 163#if REV_ENDIAN_FS 164 } 165#endif /* REV_ENDIAN_FS */ 166 167 if (waitfor && (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) 168 return ((int)buf_bwrite(bp)); 169 else { 170 buf_bdwrite(bp); 171 return (0); 172 } 173} 174 175 176#define SINGLE 0 /* index of single indirect block */ 177#define DOUBLE 1 /* index of double indirect block */ 178#define TRIPLE 2 /* index of triple indirect block */ 179 180int 181ffs_truncate_internal(vnode_t ovp, off_t length, int flags, ucred_t cred) 182{ 183 struct inode *oip; 184 struct fs *fs; 185 ufs_daddr_t lastblock; 186 ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; 187 ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; 188 buf_t bp; 189 int offset, size, level, i; 190 long count, nblocks, vflags, blocksreleased = 0; 191 struct timeval tv; 192 int aflags, error, allerror; 193 off_t osize; 194 int devBlockSize=0; 195#if QUOTA 196 int64_t change; /* in bytes */ 197#endif /* QUOTA */ 198 199 if (length < 0) 200 return (EINVAL); 201 202 oip = VTOI(ovp); 203 fs = oip->i_fs; 204 205 if (length > fs->fs_maxfilesize) 206 return (EFBIG); 207 208 microtime(&tv); 209 if (ovp->v_type == VLNK && 210 oip->i_size < ovp->v_mount->mnt_maxsymlinklen) { 211#if DIAGNOSTIC 212 if (length != 0) 213 panic("ffs_truncate: partial truncate of symlink"); 214#endif 215 bzero((char *)&oip->i_shortlink, (u_int)oip->i_size); 216 oip->i_size = 0; 217 oip->i_flag |= IN_CHANGE | IN_UPDATE; 218 return (ffs_update(ovp, &tv, &tv, 1)); 219 } 220 221 if (oip->i_size == length) { 222 oip->i_flag |= IN_CHANGE | IN_UPDATE; 223 return (ffs_update(ovp, &tv, &tv, 0)); 224 } 225#if QUOTA 226 if (error = getinoquota(oip)) 227 return (error); 228#endif 229 osize = oip->i_size; 230 231 /* 232 * Lengthen the size of the file. We must ensure that the 233 * last byte of the file is allocated. Since the smallest 234 * value of osize is 0, length will be at least 1. 235 */ 236 if (osize < length) { 237 offset = blkoff(fs, length - 1); 238 lbn = lblkno(fs, length - 1); 239 aflags = B_CLRBUF; 240 if (flags & IO_SYNC) 241 aflags |= B_SYNC; 242 if (error = ffs_balloc(oip, lbn, offset + 1, cred, &bp, aflags, 0)) 243 return (error); 244 oip->i_size = length; 245 246 if (UBCINFOEXISTS(ovp)) { 247 buf_markinvalid(bp); 248 buf_bwrite(bp); 249 ubc_setsize(ovp, (off_t)length); 250 } else { 251 if (aflags & B_SYNC) 252 buf_bwrite(bp); 253 else 254 buf_bawrite(bp); 255 } 256 oip->i_flag |= IN_CHANGE | IN_UPDATE; 257 return (ffs_update(ovp, &tv, &tv, 1)); 258 } 259 /* 260 * Shorten the size of the file. If the file is not being 261 * truncated to a block boundry, the contents of the 262 * partial block following the end of the file must be 263 * zero'ed in case it ever become accessable again because 264 * of subsequent file growth. 265 */ 266 if (UBCINFOEXISTS(ovp)) 267 ubc_setsize(ovp, (off_t)length); 268 269 vflags = ((length > 0) ? BUF_WRITE_DATA : 0) | BUF_SKIP_META; 270 271 if (vflags & BUF_WRITE_DATA) 272 ffs_fsync_internal(ovp, MNT_WAIT); 273 allerror = buf_invalidateblks(ovp, vflags, 0, 0); 274 275 offset = blkoff(fs, length); 276 if (offset == 0) { 277 oip->i_size = length; 278 } else { 279 lbn = lblkno(fs, length); 280 aflags = B_CLRBUF; 281 if (flags & IO_SYNC) 282 aflags |= B_SYNC; 283 if (error = ffs_balloc(oip, lbn, offset, cred, &bp, aflags, 0)) 284 return (error); 285 oip->i_size = length; 286 size = blksize(fs, oip, lbn); 287 bzero((char *)buf_dataptr(bp) + offset, (u_int)(size - offset)); 288 allocbuf(bp, size); 289 if (UBCINFOEXISTS(ovp)) { 290 buf_markinvalid(bp); 291 buf_bwrite(bp); 292 } else { 293 if (aflags & B_SYNC) 294 buf_bwrite(bp); 295 else 296 buf_bawrite(bp); 297 } 298 } 299 /* 300 * Calculate index into inode's block list of 301 * last direct and indirect blocks (if any) 302 * which we want to keep. Lastblock is -1 when 303 * the file is truncated to 0. 304 */ 305 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 306 lastiblock[SINGLE] = lastblock - NDADDR; 307 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 308 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 309 310 devBlockSize = vfs_devblocksize(vnode_mount(ovp)); 311 nblocks = btodb(fs->fs_bsize, devBlockSize); 312 313 /* 314 * Update file and block pointers on disk before we start freeing 315 * blocks. If we crash before free'ing blocks below, the blocks 316 * will be returned to the free list. lastiblock values are also 317 * normalized to -1 for calls to ffs_indirtrunc below. 318 */ 319 bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks); 320 for (level = TRIPLE; level >= SINGLE; level--) 321 if (lastiblock[level] < 0) { 322 oip->i_ib[level] = 0; 323 lastiblock[level] = -1; 324 } 325 for (i = NDADDR - 1; i > lastblock; i--) 326 oip->i_db[i] = 0; 327 oip->i_flag |= IN_CHANGE | IN_UPDATE; 328 if (error = ffs_update(ovp, &tv, &tv, MNT_WAIT)) 329 allerror = error; 330 /* 331 * Having written the new inode to disk, save its new configuration 332 * and put back the old block pointers long enough to process them. 333 * Note that we save the new block configuration so we can check it 334 * when we are done. 335 */ 336 bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks); 337 bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks); 338 oip->i_size = osize; 339 340 vflags = ((length > 0) ? BUF_WRITE_DATA : 0) | BUF_SKIP_META; 341 342 if (vflags & BUF_WRITE_DATA) 343 ffs_fsync_internal(ovp, MNT_WAIT); 344 allerror = buf_invalidateblks(ovp, vflags, 0, 0); 345 346 /* 347 * Indirect blocks first. 348 */ 349 indir_lbn[SINGLE] = -NDADDR; 350 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; 351 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; 352 for (level = TRIPLE; level >= SINGLE; level--) { 353 bn = oip->i_ib[level]; 354 if (bn != 0) { 355 error = ffs_indirtrunc(oip, indir_lbn[level], 356 fsbtodb(fs, bn), lastiblock[level], level, &count); 357 if (error) 358 allerror = error; 359 blocksreleased += count; 360 if (lastiblock[level] < 0) { 361 oip->i_ib[level] = 0; 362 ffs_blkfree(oip, bn, fs->fs_bsize); 363 blocksreleased += nblocks; 364 } 365 } 366 if (lastiblock[level] >= 0) 367 goto done; 368 } 369 370 /* 371 * All whole direct blocks or frags. 372 */ 373 for (i = NDADDR - 1; i > lastblock; i--) { 374 register long bsize; 375 376 bn = oip->i_db[i]; 377 if (bn == 0) 378 continue; 379 oip->i_db[i] = 0; 380 bsize = blksize(fs, oip, i); 381 ffs_blkfree(oip, bn, bsize); 382 blocksreleased += btodb(bsize, devBlockSize); 383 } 384 if (lastblock < 0) 385 goto done; 386 387 /* 388 * Finally, look for a change in size of the 389 * last direct block; release any frags. 390 */ 391 bn = oip->i_db[lastblock]; 392 if (bn != 0) { 393 long oldspace, newspace; 394 395 /* 396 * Calculate amount of space we're giving 397 * back as old block size minus new block size. 398 */ 399 oldspace = blksize(fs, oip, lastblock); 400 oip->i_size = length; 401 newspace = blksize(fs, oip, lastblock); 402 if (newspace == 0) 403 panic("itrunc: newspace"); 404 if (oldspace - newspace > 0) { 405 /* 406 * Block number of space to be free'd is 407 * the old block # plus the number of frags 408 * required for the storage we're keeping. 409 */ 410 bn += numfrags(fs, newspace); 411 ffs_blkfree(oip, bn, oldspace - newspace); 412 blocksreleased += btodb(oldspace - newspace, devBlockSize); 413 } 414 } 415done: 416#if DIAGNOSTIC 417 for (level = SINGLE; level <= TRIPLE; level++) 418 if (newblks[NDADDR + level] != oip->i_ib[level]) 419 panic("itrunc1"); 420 for (i = 0; i < NDADDR; i++) 421 if (newblks[i] != oip->i_db[i]) 422 panic("itrunc2"); 423 if (length == 0 && 424 (vnode_hasdirtyblks(ovp) || vnode_hascleanblks(ovp))) 425 panic("itrunc3"); 426#endif /* DIAGNOSTIC */ 427 /* 428 * Put back the real size. 429 */ 430 oip->i_size = length; 431 oip->i_blocks -= blocksreleased; 432 if (oip->i_blocks < 0) /* sanity */ 433 oip->i_blocks = 0; 434 oip->i_flag |= IN_CHANGE; 435#if QUOTA 436 change = dbtob((int64_t)blocksreleased,devBlockSize); 437 (void) chkdq(oip, -change, NOCRED, 0); 438#endif 439 return (allerror); 440} 441 442/* 443 * Release blocks associated with the inode ip and stored in the indirect 444 * block bn. Blocks are free'd in LIFO order up to (but not including) 445 * lastbn. If level is greater than SINGLE, the block is an indirect block 446 * and recursive calls to indirtrunc must be used to cleanse other indirect 447 * blocks. 448 * 449 * NB: triple indirect blocks are untested. 450 */ 451static int 452ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) 453 register struct inode *ip; 454 ufs_daddr_t lbn, lastbn; 455 ufs_daddr_t dbn; 456 int level; 457 long *countp; 458{ 459 register int i; 460 struct buf *bp; 461 struct buf *tbp; 462 register struct fs *fs = ip->i_fs; 463 register ufs_daddr_t *bap; 464 struct vnode *vp=ITOV(ip); 465 ufs_daddr_t *copy, nb, nlbn, last; 466 long blkcount, factor; 467 int nblocks, blocksreleased = 0; 468 errno_t error = 0, allerror = 0; 469 int devBlockSize=0; 470 struct mount *mp=vp->v_mount; 471#if REV_ENDIAN_FS 472 int rev_endian=(mp->mnt_flag & MNT_REVEND); 473#endif /* REV_ENDIAN_FS */ 474 475 /* 476 * Calculate index in current block of last 477 * block to be kept. -1 indicates the entire 478 * block so we need not calculate the index. 479 */ 480 factor = 1; 481 for (i = SINGLE; i < level; i++) 482 factor *= NINDIR(fs); 483 last = lastbn; 484 if (lastbn > 0) 485 last /= factor; 486 487 devBlockSize = vfs_devblocksize(mp); 488 nblocks = btodb(fs->fs_bsize, devBlockSize); 489 490 /* Doing a MALLOC here is asking for trouble. We can still 491 * deadlock on pagerfile lock, in case we are running 492 * low on memory and block in MALLOC 493 */ 494 495 tbp = buf_geteblk(fs->fs_bsize); 496 copy = (ufs_daddr_t *)buf_dataptr(tbp); 497 498 /* 499 * Get buffer of block pointers, zero those entries corresponding 500 * to blocks to be free'd, and update on disk copy first. Since 501 * double(triple) indirect before single(double) indirect, calls 502 * to bmap on these blocks will fail. However, we already have 503 * the on disk address, so we have to set the blkno field 504 * explicitly instead of letting buf_bread do everything for us. 505 */ 506 507 vp = ITOV(ip); 508 bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, 0, 0, BLK_META); 509 510 if (buf_valid(bp)) { 511 /* Braces must be here in case trace evaluates to nothing. */ 512 trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn); 513 } else { 514 trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn); 515 OSIncrementAtomic(¤t_proc()->p_stats->p_ru.ru_inblock); /* pay for read */ 516 buf_setflags(bp, B_READ); 517 if (buf_count(bp) > buf_size(bp)) 518 panic("ffs_indirtrunc: bad buffer size"); 519 buf_setblkno(bp, (daddr64_t)((unsigned)dbn)); 520 VNOP_STRATEGY(bp); 521 error = buf_biowait(bp); 522 } 523 if (error) { 524 buf_brelse(bp); 525 *countp = 0; 526 buf_brelse(tbp); 527 return ((int)error); 528 } 529 530 bap = (ufs_daddr_t *)buf_dataptr(bp); 531 bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); 532 bzero((caddr_t)&bap[last + 1], 533 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (ufs_daddr_t)); 534 if (last == -1) 535 buf_markinvalid(bp); 536 if (last != -1 && (vp)->v_mount->mnt_flag & MNT_ASYNC) { 537 error = 0; 538 buf_bdwrite(bp); 539 } else { 540 error = buf_bwrite(bp); 541 if (error) 542 allerror = error; 543 } 544 bap = copy; 545 546 /* 547 * Recursively free totally unused blocks. 548 */ 549 for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; 550 i--, nlbn += factor) { 551#if REV_ENDIAN_FS 552 if (rev_endian) 553 nb = OSSwapInt32(bap[i]); 554 else { 555#endif /* REV_ENDIAN_FS */ 556 nb = bap[i]; 557#if REV_ENDIAN_FS 558 } 559#endif /* REV_ENDIAN_FS */ 560 if (nb == 0) 561 continue; 562 if (level > SINGLE) { 563 if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 564 (ufs_daddr_t)-1, level - 1, &blkcount)) 565 allerror = error; 566 blocksreleased += blkcount; 567 } 568 ffs_blkfree(ip, nb, fs->fs_bsize); 569 blocksreleased += nblocks; 570 } 571 572 /* 573 * Recursively free last partial block. 574 */ 575 if (level > SINGLE && lastbn >= 0) { 576 last = lastbn % factor; 577#if REV_ENDIAN_FS 578 if (rev_endian) 579 nb = OSSwapInt32(bap[i]); 580 else { 581#endif /* REV_ENDIAN_FS */ 582 nb = bap[i]; 583#if REV_ENDIAN_FS 584 } 585#endif /* REV_ENDIAN_FS */ 586 if (nb != 0) { 587 if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 588 last, level - 1, &blkcount)) 589 allerror = error; 590 blocksreleased += blkcount; 591 } 592 } 593 buf_brelse(tbp); 594 *countp = blocksreleased; 595 return ((int)allerror); 596} 597 598