ffs_balloc.c revision 105422
1/* 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Copyright (c) 1982, 1989, 1993 12 * The Regents of the University of California. All rights reserved. 13 * (c) UNIX System Laboratories, Inc. 14 * Copyright (c) 1982, 1986, 1989, 1993 15 * The Regents of the University of California. All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. All advertising materials mentioning features or use of this software 26 * must display the following acknowledgement: 27 * This product includes software developed by the University of 28 * California, Berkeley and its contributors. 29 * 4. Neither the name of the University nor the names of its contributors 30 * may be used to endorse or promote products derived from this software 31 * without specific prior written permission. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 34 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 35 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 36 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 37 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 38 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 39 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 41 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 42 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 43 * SUCH DAMAGE. 44 * 45 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 46 * $FreeBSD: head/sys/ufs/ffs/ffs_balloc.c 105422 2002-10-18 22:52:41Z dillon $ 47 */ 48 49#include <sys/param.h> 50#include <sys/systm.h> 51#include <sys/bio.h> 52#include <sys/buf.h> 53#include <sys/lock.h> 54#include <sys/mount.h> 55#include <sys/vnode.h> 56 57#include <ufs/ufs/quota.h> 58#include <ufs/ufs/inode.h> 59#include <ufs/ufs/ufs_extern.h> 60 61#include <ufs/ffs/fs.h> 62#include <ufs/ffs/ffs_extern.h> 63 64/* 65 * Balloc defines the structure of filesystem storage 66 * by allocating the physical blocks on a device given 67 * the inode and the logical block number in a file. 68 * This is the allocation strategy for UFS1. Below is 69 * the allocation strategy for UFS2. 70 */ 71int 72ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, 73 struct ucred *cred, int flags, struct buf **bpp) 74{ 75 struct inode *ip; 76 struct ufs1_dinode *dp; 77 ufs_lbn_t lbn, lastlbn; 78 struct fs *fs; 79 ufs1_daddr_t nb; 80 struct buf *bp, *nbp; 81 struct indir indirs[NIADDR + 2]; 82 int deallocated, osize, nsize, num, i, error; 83 ufs2_daddr_t newb; 84 ufs1_daddr_t *bap, pref; 85 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 86 int unwindidx = -1; 87 struct thread *td = curthread; /* XXX */ 88 89 ip = VTOI(vp); 90 dp = ip->i_din1; 91 fs = ip->i_fs; 92 lbn = lblkno(fs, startoffset); 93 size = blkoff(fs, startoffset) + size; 94 if (size > fs->fs_bsize) 95 panic("ffs_balloc_ufs1: blk too big"); 96 *bpp = NULL; 97 if (flags & IO_EXT) 98 return (EOPNOTSUPP); 99 if (lbn < 0) 100 return (EFBIG); 101 102 /* 103 * If the next write will extend the file into a new block, 104 * and the file is currently composed of a fragment 105 * this fragment has to be extended to be a full block. 106 */ 107 lastlbn = lblkno(fs, ip->i_size); 108 if (lastlbn < NDADDR && lastlbn < lbn) { 109 nb = lastlbn; 110 osize = blksize(fs, ip, nb); 111 if (osize < fs->fs_bsize && osize > 0) { 112 error = ffs_realloccg(ip, nb, dp->di_db[nb], 113 ffs_blkpref_ufs1(ip, lastlbn, (int)nb, 114 &dp->di_db[0]), osize, (int)fs->fs_bsize, cred, &bp); 115 if (error) 116 return (error); 117 if (DOINGSOFTDEP(vp)) 118 softdep_setup_allocdirect(ip, nb, 119 dbtofsb(fs, bp->b_blkno), dp->di_db[nb], 120 fs->fs_bsize, osize, bp); 121 ip->i_size = smalllblktosize(fs, nb + 1); 122 dp->di_size = ip->i_size; 123 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 124 ip->i_flag |= IN_CHANGE | IN_UPDATE; 125 if (flags & IO_SYNC) 126 bwrite(bp); 127 else 128 bawrite(bp); 129 } 130 } 131 /* 132 * The first NDADDR blocks are direct blocks 133 */ 134 if (lbn < NDADDR) { 135 if (flags & BA_METAONLY) 136 panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); 137 nb = dp->di_db[lbn]; 138 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 139 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 140 if (error) { 141 brelse(bp); 142 return (error); 143 } 144 bp->b_blkno = fsbtodb(fs, nb); 145 *bpp = bp; 146 return (0); 147 } 148 if (nb != 0) { 149 /* 150 * Consider need to reallocate a fragment. 151 */ 152 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 153 nsize = fragroundup(fs, size); 154 if (nsize <= osize) { 155 error = bread(vp, lbn, osize, NOCRED, &bp); 156 if (error) { 157 brelse(bp); 158 return (error); 159 } 160 bp->b_blkno = fsbtodb(fs, nb); 161 } else { 162 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 163 ffs_blkpref_ufs1(ip, lbn, (int)lbn, 164 &dp->di_db[0]), osize, nsize, cred, &bp); 165 if (error) 166 return (error); 167 if (DOINGSOFTDEP(vp)) 168 softdep_setup_allocdirect(ip, lbn, 169 dbtofsb(fs, bp->b_blkno), nb, 170 nsize, osize, bp); 171 } 172 } else { 173 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 174 nsize = fragroundup(fs, size); 175 else 176 nsize = fs->fs_bsize; 177 error = ffs_alloc(ip, lbn, 178 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), 179 nsize, cred, &newb); 180 if (error) 181 return (error); 182 bp = getblk(vp, lbn, nsize, 0, 0); 183 bp->b_blkno = fsbtodb(fs, newb); 184 if (flags & BA_CLRBUF) 185 vfs_bio_clrbuf(bp); 186 if (DOINGSOFTDEP(vp)) 187 softdep_setup_allocdirect(ip, lbn, newb, 0, 188 nsize, 0, bp); 189 } 190 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 191 ip->i_flag |= IN_CHANGE | IN_UPDATE; 192 *bpp = bp; 193 return (0); 194 } 195 /* 196 * Determine the number of levels of indirection. 197 */ 198 pref = 0; 199 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 200 return(error); 201#ifdef DIAGNOSTIC 202 if (num < 1) 203 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); 204#endif 205 /* 206 * Fetch the first indirect block allocating if necessary. 207 */ 208 --num; 209 nb = dp->di_ib[indirs[0].in_off]; 210 allocib = NULL; 211 allocblk = allociblk; 212 if (nb == 0) { 213 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 214 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 215 cred, &newb)) != 0) 216 return (error); 217 nb = newb; 218 *allocblk++ = nb; 219 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); 220 bp->b_blkno = fsbtodb(fs, nb); 221 vfs_bio_clrbuf(bp); 222 if (DOINGSOFTDEP(vp)) { 223 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 224 newb, 0, fs->fs_bsize, 0, bp); 225 bdwrite(bp); 226 } else { 227 /* 228 * Write synchronously so that indirect blocks 229 * never point at garbage. 230 */ 231 if (DOINGASYNC(vp)) 232 bdwrite(bp); 233 else if ((error = bwrite(bp)) != 0) 234 goto fail; 235 } 236 allocib = &dp->di_ib[indirs[0].in_off]; 237 *allocib = nb; 238 ip->i_flag |= IN_CHANGE | IN_UPDATE; 239 } 240 /* 241 * Fetch through the indirect blocks, allocating as necessary. 242 */ 243 for (i = 1;;) { 244 error = bread(vp, 245 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 246 if (error) { 247 brelse(bp); 248 goto fail; 249 } 250 bap = (ufs1_daddr_t *)bp->b_data; 251 nb = bap[indirs[i].in_off]; 252 if (i == num) 253 break; 254 i += 1; 255 if (nb != 0) { 256 bqrelse(bp); 257 continue; 258 } 259 if (pref == 0) 260 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 261 if ((error = 262 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { 263 brelse(bp); 264 goto fail; 265 } 266 nb = newb; 267 *allocblk++ = nb; 268 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); 269 nbp->b_blkno = fsbtodb(fs, nb); 270 vfs_bio_clrbuf(nbp); 271 if (DOINGSOFTDEP(vp)) { 272 softdep_setup_allocindir_meta(nbp, ip, bp, 273 indirs[i - 1].in_off, nb); 274 bdwrite(nbp); 275 } else { 276 /* 277 * Write synchronously so that indirect blocks 278 * never point at garbage. 279 */ 280 if ((error = bwrite(nbp)) != 0) { 281 brelse(bp); 282 goto fail; 283 } 284 } 285 bap[indirs[i - 1].in_off] = nb; 286 if (allocib == NULL && unwindidx < 0) 287 unwindidx = i - 1; 288 /* 289 * If required, write synchronously, otherwise use 290 * delayed write. 291 */ 292 if (flags & IO_SYNC) { 293 bwrite(bp); 294 } else { 295 if (bp->b_bufsize == fs->fs_bsize) 296 bp->b_flags |= B_CLUSTEROK; 297 bdwrite(bp); 298 } 299 } 300 /* 301 * If asked only for the indirect block, then return it. 302 */ 303 if (flags & BA_METAONLY) { 304 *bpp = bp; 305 return (0); 306 } 307 /* 308 * Get the data block, allocating if necessary. 309 */ 310 if (nb == 0) { 311 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); 312 error = ffs_alloc(ip, 313 lbn, pref, (int)fs->fs_bsize, cred, &newb); 314 if (error) { 315 brelse(bp); 316 goto fail; 317 } 318 nb = newb; 319 *allocblk++ = nb; 320 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); 321 nbp->b_blkno = fsbtodb(fs, nb); 322 if (flags & BA_CLRBUF) 323 vfs_bio_clrbuf(nbp); 324 if (DOINGSOFTDEP(vp)) 325 softdep_setup_allocindir_page(ip, lbn, bp, 326 indirs[i].in_off, nb, 0, nbp); 327 bap[indirs[i].in_off] = nb; 328 /* 329 * If required, write synchronously, otherwise use 330 * delayed write. 331 */ 332 if (flags & IO_SYNC) { 333 bwrite(bp); 334 } else { 335 if (bp->b_bufsize == fs->fs_bsize) 336 bp->b_flags |= B_CLUSTEROK; 337 bdwrite(bp); 338 } 339 *bpp = nbp; 340 return (0); 341 } 342 brelse(bp); 343 if (flags & BA_CLRBUF) { 344 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 345 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 346 error = cluster_read(vp, ip->i_size, lbn, 347 (int)fs->fs_bsize, NOCRED, 348 MAXBSIZE, seqcount, &nbp); 349 } else { 350 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 351 } 352 if (error) { 353 brelse(nbp); 354 goto fail; 355 } 356 } else { 357 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); 358 nbp->b_blkno = fsbtodb(fs, nb); 359 } 360 *bpp = nbp; 361 return (0); 362fail: 363 /* 364 * If we have failed part way through block allocation, we 365 * have to deallocate any indirect blocks that we have allocated. 366 * We have to fsync the file before we start to get rid of all 367 * of its dependencies so that we do not leave them dangling. 368 * We have to sync it at the end so that the soft updates code 369 * does not find any untracked changes. Although this is really 370 * slow, running out of disk space is not expected to be a common 371 * occurence. The error return from fsync is ignored as we already 372 * have an error to return to the user. 373 */ 374 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); 375 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { 376 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number); 377 deallocated += fs->fs_bsize; 378 } 379 if (allocib != NULL) { 380 *allocib = 0; 381 } else if (unwindidx >= 0) { 382 int r; 383 384 r = bread(vp, indirs[unwindidx].in_lbn, 385 (int)fs->fs_bsize, NOCRED, &bp); 386 if (r) { 387 panic("Could not unwind indirect block, error %d", r); 388 brelse(bp); 389 } else { 390 bap = (ufs1_daddr_t *)bp->b_data; 391 bap[indirs[unwindidx].in_off] = 0; 392 if (flags & IO_SYNC) { 393 bwrite(bp); 394 } else { 395 if (bp->b_bufsize == fs->fs_bsize) 396 bp->b_flags |= B_CLUSTEROK; 397 bdwrite(bp); 398 } 399 } 400 } 401 if (deallocated) { 402#ifdef QUOTA 403 /* 404 * Restore user's disk quota because allocation failed. 405 */ 406 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 407#endif 408 dp->di_blocks -= btodb(deallocated); 409 ip->i_flag |= IN_CHANGE | IN_UPDATE; 410 } 411 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); 412 return (error); 413} 414 415/* 416 * Balloc defines the structure of file system storage 417 * by allocating the physical blocks on a device given 418 * the inode and the logical block number in a file. 419 * This is the allocation strategy for UFS2. Above is 420 * the allocation strategy for UFS1. 421 */ 422int 423ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, 424 struct ucred *cred, int flags, struct buf **bpp) 425{ 426 struct inode *ip; 427 struct ufs2_dinode *dp; 428 ufs_lbn_t lbn, lastlbn; 429 struct fs *fs; 430 struct buf *bp, *nbp; 431 struct indir indirs[NIADDR + 2]; 432 ufs2_daddr_t nb, newb, *bap, pref; 433 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 434 int deallocated, osize, nsize, num, i, error; 435 int unwindidx = -1; 436 struct thread *td = curthread; /* XXX */ 437 438 ip = VTOI(vp); 439 dp = ip->i_din2; 440 fs = ip->i_fs; 441 lbn = lblkno(fs, startoffset); 442 size = blkoff(fs, startoffset) + size; 443 if (size > fs->fs_bsize) 444 panic("ffs_balloc_ufs2: blk too big"); 445 *bpp = NULL; 446 if (lbn < 0) 447 return (EFBIG); 448 449 /* 450 * Check for allocating external data. 451 */ 452 if (flags & IO_EXT) { 453 if (lbn >= NXADDR) 454 return (EFBIG); 455 /* 456 * If the next write will extend the data into a new block, 457 * and the data is currently composed of a fragment 458 * this fragment has to be extended to be a full block. 459 */ 460 lastlbn = lblkno(fs, dp->di_extsize); 461 if (lastlbn < lbn) { 462 nb = lastlbn; 463 osize = sblksize(fs, dp->di_extsize, nb); 464 if (osize < fs->fs_bsize && osize > 0) { 465 error = ffs_realloccg(ip, -1 - nb, 466 dp->di_extb[nb], 467 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 468 &dp->di_extb[0]), osize, 469 (int)fs->fs_bsize, cred, &bp); 470 if (error) 471 return (error); 472 if (DOINGSOFTDEP(vp)) 473 softdep_setup_allocext(ip, nb, 474 dbtofsb(fs, bp->b_blkno), 475 dp->di_extb[nb], 476 fs->fs_bsize, osize, bp); 477 dp->di_extsize = smalllblktosize(fs, nb + 1); 478 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); 479 bp->b_xflags |= BX_ALTDATA; 480 ip->i_flag |= IN_CHANGE | IN_UPDATE; 481 if (flags & IO_SYNC) 482 bwrite(bp); 483 else 484 bawrite(bp); 485 } 486 } 487 /* 488 * All blocks are direct blocks 489 */ 490 if (flags & BA_METAONLY) 491 panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); 492 nb = dp->di_extb[lbn]; 493 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { 494 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp); 495 if (error) { 496 brelse(bp); 497 return (error); 498 } 499 bp->b_blkno = fsbtodb(fs, nb); 500 bp->b_xflags |= BX_ALTDATA; 501 *bpp = bp; 502 return (0); 503 } 504 if (nb != 0) { 505 /* 506 * Consider need to reallocate a fragment. 507 */ 508 osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); 509 nsize = fragroundup(fs, size); 510 if (nsize <= osize) { 511 error = bread(vp, -1 - lbn, osize, NOCRED, &bp); 512 if (error) { 513 brelse(bp); 514 return (error); 515 } 516 bp->b_blkno = fsbtodb(fs, nb); 517 bp->b_xflags |= BX_ALTDATA; 518 } else { 519 error = ffs_realloccg(ip, -1 - lbn, 520 dp->di_extb[lbn], 521 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 522 &dp->di_extb[0]), osize, nsize, cred, &bp); 523 if (error) 524 return (error); 525 bp->b_xflags |= BX_ALTDATA; 526 if (DOINGSOFTDEP(vp)) 527 softdep_setup_allocext(ip, lbn, 528 dbtofsb(fs, bp->b_blkno), nb, 529 nsize, osize, bp); 530 } 531 } else { 532 if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) 533 nsize = fragroundup(fs, size); 534 else 535 nsize = fs->fs_bsize; 536 error = ffs_alloc(ip, lbn, 537 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), 538 nsize, cred, &newb); 539 if (error) 540 return (error); 541 bp = getblk(vp, -1 - lbn, nsize, 0, 0); 542 bp->b_blkno = fsbtodb(fs, newb); 543 bp->b_xflags |= BX_ALTDATA; 544 if (flags & BA_CLRBUF) 545 vfs_bio_clrbuf(bp); 546 if (DOINGSOFTDEP(vp)) 547 softdep_setup_allocext(ip, lbn, newb, 0, 548 nsize, 0, bp); 549 } 550 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); 551 ip->i_flag |= IN_CHANGE | IN_UPDATE; 552 *bpp = bp; 553 return (0); 554 } 555 /* 556 * If the next write will extend the file into a new block, 557 * and the file is currently composed of a fragment 558 * this fragment has to be extended to be a full block. 559 */ 560 lastlbn = lblkno(fs, ip->i_size); 561 if (lastlbn < NDADDR && lastlbn < lbn) { 562 nb = lastlbn; 563 osize = blksize(fs, ip, nb); 564 if (osize < fs->fs_bsize && osize > 0) { 565 error = ffs_realloccg(ip, nb, dp->di_db[nb], 566 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 567 &dp->di_db[0]), osize, (int)fs->fs_bsize, 568 cred, &bp); 569 if (error) 570 return (error); 571 if (DOINGSOFTDEP(vp)) 572 softdep_setup_allocdirect(ip, nb, 573 dbtofsb(fs, bp->b_blkno), 574 dp->di_db[nb], 575 fs->fs_bsize, osize, bp); 576 ip->i_size = smalllblktosize(fs, nb + 1); 577 dp->di_size = ip->i_size; 578 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 579 ip->i_flag |= IN_CHANGE | IN_UPDATE; 580 if (flags & IO_SYNC) 581 bwrite(bp); 582 else 583 bawrite(bp); 584 } 585 } 586 /* 587 * The first NDADDR blocks are direct blocks 588 */ 589 if (lbn < NDADDR) { 590 if (flags & BA_METAONLY) 591 panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); 592 nb = dp->di_db[lbn]; 593 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 594 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 595 if (error) { 596 brelse(bp); 597 return (error); 598 } 599 bp->b_blkno = fsbtodb(fs, nb); 600 *bpp = bp; 601 return (0); 602 } 603 if (nb != 0) { 604 /* 605 * Consider need to reallocate a fragment. 606 */ 607 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 608 nsize = fragroundup(fs, size); 609 if (nsize <= osize) { 610 error = bread(vp, lbn, osize, NOCRED, &bp); 611 if (error) { 612 brelse(bp); 613 return (error); 614 } 615 bp->b_blkno = fsbtodb(fs, nb); 616 } else { 617 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 618 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 619 &dp->di_db[0]), osize, nsize, cred, &bp); 620 if (error) 621 return (error); 622 if (DOINGSOFTDEP(vp)) 623 softdep_setup_allocdirect(ip, lbn, 624 dbtofsb(fs, bp->b_blkno), nb, 625 nsize, osize, bp); 626 } 627 } else { 628 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 629 nsize = fragroundup(fs, size); 630 else 631 nsize = fs->fs_bsize; 632 error = ffs_alloc(ip, lbn, 633 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 634 &dp->di_db[0]), nsize, cred, &newb); 635 if (error) 636 return (error); 637 bp = getblk(vp, lbn, nsize, 0, 0); 638 bp->b_blkno = fsbtodb(fs, newb); 639 if (flags & BA_CLRBUF) 640 vfs_bio_clrbuf(bp); 641 if (DOINGSOFTDEP(vp)) 642 softdep_setup_allocdirect(ip, lbn, newb, 0, 643 nsize, 0, bp); 644 } 645 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 646 ip->i_flag |= IN_CHANGE | IN_UPDATE; 647 *bpp = bp; 648 return (0); 649 } 650 /* 651 * Determine the number of levels of indirection. 652 */ 653 pref = 0; 654 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 655 return(error); 656#ifdef DIAGNOSTIC 657 if (num < 1) 658 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); 659#endif 660 /* 661 * Fetch the first indirect block allocating if necessary. 662 */ 663 --num; 664 nb = dp->di_ib[indirs[0].in_off]; 665 allocib = NULL; 666 allocblk = allociblk; 667 if (nb == 0) { 668 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 669 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 670 cred, &newb)) != 0) 671 return (error); 672 nb = newb; 673 *allocblk++ = nb; 674 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); 675 bp->b_blkno = fsbtodb(fs, nb); 676 vfs_bio_clrbuf(bp); 677 if (DOINGSOFTDEP(vp)) { 678 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 679 newb, 0, fs->fs_bsize, 0, bp); 680 bdwrite(bp); 681 } else { 682 /* 683 * Write synchronously so that indirect blocks 684 * never point at garbage. 685 */ 686 if (DOINGASYNC(vp)) 687 bdwrite(bp); 688 else if ((error = bwrite(bp)) != 0) 689 goto fail; 690 } 691 allocib = &dp->di_ib[indirs[0].in_off]; 692 *allocib = nb; 693 ip->i_flag |= IN_CHANGE | IN_UPDATE; 694 } 695 /* 696 * Fetch through the indirect blocks, allocating as necessary. 697 */ 698 for (i = 1;;) { 699 error = bread(vp, 700 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 701 if (error) { 702 brelse(bp); 703 goto fail; 704 } 705 bap = (ufs2_daddr_t *)bp->b_data; 706 nb = bap[indirs[i].in_off]; 707 if (i == num) 708 break; 709 i += 1; 710 if (nb != 0) { 711 bqrelse(bp); 712 continue; 713 } 714 if (pref == 0) 715 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 716 if ((error = 717 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { 718 brelse(bp); 719 goto fail; 720 } 721 nb = newb; 722 *allocblk++ = nb; 723 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); 724 nbp->b_blkno = fsbtodb(fs, nb); 725 vfs_bio_clrbuf(nbp); 726 if (DOINGSOFTDEP(vp)) { 727 softdep_setup_allocindir_meta(nbp, ip, bp, 728 indirs[i - 1].in_off, nb); 729 bdwrite(nbp); 730 } else { 731 /* 732 * Write synchronously so that indirect blocks 733 * never point at garbage. 734 */ 735 if ((error = bwrite(nbp)) != 0) { 736 brelse(bp); 737 goto fail; 738 } 739 } 740 bap[indirs[i - 1].in_off] = nb; 741 if (allocib == NULL && unwindidx < 0) 742 unwindidx = i - 1; 743 /* 744 * If required, write synchronously, otherwise use 745 * delayed write. 746 */ 747 if (flags & IO_SYNC) { 748 bwrite(bp); 749 } else { 750 if (bp->b_bufsize == fs->fs_bsize) 751 bp->b_flags |= B_CLUSTEROK; 752 bdwrite(bp); 753 } 754 } 755 /* 756 * If asked only for the indirect block, then return it. 757 */ 758 if (flags & BA_METAONLY) { 759 *bpp = bp; 760 return (0); 761 } 762 /* 763 * Get the data block, allocating if necessary. 764 */ 765 if (nb == 0) { 766 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); 767 error = ffs_alloc(ip, 768 lbn, pref, (int)fs->fs_bsize, cred, &newb); 769 if (error) { 770 brelse(bp); 771 goto fail; 772 } 773 nb = newb; 774 *allocblk++ = nb; 775 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); 776 nbp->b_blkno = fsbtodb(fs, nb); 777 if (flags & BA_CLRBUF) 778 vfs_bio_clrbuf(nbp); 779 if (DOINGSOFTDEP(vp)) 780 softdep_setup_allocindir_page(ip, lbn, bp, 781 indirs[i].in_off, nb, 0, nbp); 782 bap[indirs[i].in_off] = nb; 783 /* 784 * If required, write synchronously, otherwise use 785 * delayed write. 786 */ 787 if (flags & IO_SYNC) { 788 bwrite(bp); 789 } else { 790 if (bp->b_bufsize == fs->fs_bsize) 791 bp->b_flags |= B_CLUSTEROK; 792 bdwrite(bp); 793 } 794 *bpp = nbp; 795 return (0); 796 } 797 brelse(bp); 798 /* 799 * If requested clear invalid portions of the buffer. If we 800 * have to do a read-before-write (typical if BA_CLRBUF is set), 801 * try to do some read-ahead in the sequential case to reduce 802 * the number of I/O transactions. 803 */ 804 if (flags & BA_CLRBUF) { 805 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 806 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 807 error = cluster_read(vp, ip->i_size, lbn, 808 (int)fs->fs_bsize, NOCRED, 809 MAXBSIZE, seqcount, &nbp); 810 } else { 811 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 812 } 813 if (error) { 814 brelse(nbp); 815 goto fail; 816 } 817 } else { 818 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); 819 nbp->b_blkno = fsbtodb(fs, nb); 820 } 821 *bpp = nbp; 822 return (0); 823fail: 824 /* 825 * If we have failed part way through block allocation, we 826 * have to deallocate any indirect blocks that we have allocated. 827 * We have to fsync the file before we start to get rid of all 828 * of its dependencies so that we do not leave them dangling. 829 * We have to sync it at the end so that the soft updates code 830 * does not find any untracked changes. Although this is really 831 * slow, running out of disk space is not expected to be a common 832 * occurence. The error return from fsync is ignored as we already 833 * have an error to return to the user. 834 */ 835 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); 836 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { 837 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number); 838 deallocated += fs->fs_bsize; 839 } 840 if (allocib != NULL) { 841 *allocib = 0; 842 } else if (unwindidx >= 0) { 843 int r; 844 845 r = bread(vp, indirs[unwindidx].in_lbn, 846 (int)fs->fs_bsize, NOCRED, &bp); 847 if (r) { 848 panic("Could not unwind indirect block, error %d", r); 849 brelse(bp); 850 } else { 851 bap = (ufs2_daddr_t *)bp->b_data; 852 bap[indirs[unwindidx].in_off] = 0; 853 if (flags & IO_SYNC) { 854 bwrite(bp); 855 } else { 856 if (bp->b_bufsize == fs->fs_bsize) 857 bp->b_flags |= B_CLUSTEROK; 858 bdwrite(bp); 859 } 860 } 861 } 862 if (deallocated) { 863#ifdef QUOTA 864 /* 865 * Restore user's disk quota because allocation failed. 866 */ 867 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 868#endif 869 dp->di_blocks -= btodb(deallocated); 870 ip->i_flag |= IN_CHANGE | IN_UPDATE; 871 } 872 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); 873 return (error); 874} 875