ext2_alloc.c revision 47099
1/* 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/* 8 * Copyright (c) 1982, 1986, 1989, 1993 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)ext2_alloc.c 8.8 (Berkeley) 2/21/94 40 */ 41 42#include "opt_quota.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/vnode.h> 47#include <sys/stat.h> 48#include <sys/mount.h> 49#include <sys/syslog.h> 50 51#include <ufs/ufs/quota.h> 52#include <ufs/ufs/inode.h> 53#include <ufs/ufs/ufsmount.h> 54 55#include <gnu/ext2fs/ext2_fs.h> 56#include <gnu/ext2fs/ext2_fs_sb.h> 57#include <gnu/ext2fs/fs.h> 58#include <gnu/ext2fs/ext2_extern.h> 59 60static void ext2_fserr __P((struct ext2_sb_info *, u_int, char *)); 61 62/* 63 * Linux calls this functions at the following locations: 64 * (1) the inode is freed 65 * (2) a preallocation miss occurs 66 * (3) truncate is called 67 * (4) release_file is called and f_mode & 2 68 * 69 * I call it in ext2_inactive, ext2_truncate, ext2_vfree and in (2) 70 * the call in vfree might be redundant 71 */ 72void 73ext2_discard_prealloc(ip) 74 struct inode * ip; 75{ 76#ifdef EXT2_PREALLOCATE 77 if (ip->i_prealloc_count) { 78 int i = ip->i_prealloc_count; 79 ip->i_prealloc_count = 0; 80 ext2_free_blocks (ITOV(ip)->v_mount, 81 ip->i_prealloc_block, 82 i); 83 } 84#endif 85} 86 87/* 88 * Allocate a block in the file system. 89 * 90 * this takes the framework from ffs_alloc. To implement the 91 * actual allocation, it calls ext2_new_block, the ported version 92 * of the same Linux routine. 93 * 94 * we note that this is always called in connection with ext2_blkpref 95 * 96 * preallocation is done as Linux does it 97 */ 98int 99ext2_alloc(ip, lbn, bpref, size, cred, bnp) 100 register struct inode *ip; 101 daddr_t lbn, bpref; 102 int size; 103 struct ucred *cred; 104 daddr_t *bnp; 105{ 106 register struct ext2_sb_info *fs; 107 daddr_t bno; 108#if QUOTA 109 int error; 110#endif 111 112 *bnp = 0; 113 fs = ip->i_e2fs; 114#if DIAGNOSTIC 115 if ((u_int)size > fs->s_blocksize || blkoff(fs, size) != 0) { 116 printf("dev = 0x%lx, bsize = %lu, size = %d, fs = %s\n", 117 (u_long)ip->i_dev, fs->s_blocksize, size, fs->fs_fsmnt); 118 panic("ext2_alloc: bad size"); 119 } 120 if (cred == NOCRED) 121 panic("ext2_alloc: missing credential"); 122#endif /* DIAGNOSTIC */ 123 if (size == fs->s_blocksize && fs->s_es->s_free_blocks_count == 0) 124 goto nospace; 125 if (cred->cr_uid != 0 && 126 fs->s_es->s_free_blocks_count < fs->s_es->s_r_blocks_count) 127 goto nospace; 128#if QUOTA 129 if ((error = chkdq(ip, (long)btodb(size), cred, 0)) != 0) 130 return (error); 131#endif 132 if (bpref >= fs->s_es->s_blocks_count) 133 bpref = 0; 134 /* call the Linux code */ 135#ifdef EXT2_PREALLOCATE 136 /* To have a preallocation hit, we must 137 * - have at least one block preallocated 138 * - and our preferred block must have that block number or one below 139 */ 140 if (ip->i_prealloc_count && 141 (bpref == ip->i_prealloc_block || 142 bpref + 1 == ip->i_prealloc_block)) 143 { 144 bno = ip->i_prealloc_block++; 145 ip->i_prealloc_count--; 146 /* ext2_debug ("preallocation hit (%lu/%lu).\n", 147 ++alloc_hits, ++alloc_attempts); */ 148 149 /* Linux gets, clears, and releases the buffer at this 150 point - we don't have to that; we leave it to the caller 151 */ 152 } else { 153 ext2_discard_prealloc (ip); 154 /* ext2_debug ("preallocation miss (%lu/%lu).\n", 155 alloc_hits, ++alloc_attempts); */ 156 if (S_ISREG(ip->i_mode)) 157 bno = ext2_new_block 158 (ITOV(ip)->v_mount, bpref, 159 &ip->i_prealloc_count, 160 &ip->i_prealloc_block); 161 else 162 bno = (daddr_t)ext2_new_block(ITOV(ip)->v_mount, 163 bpref, 0, 0); 164 } 165#else 166 bno = (daddr_t)ext2_new_block(ITOV(ip)->v_mount, bpref, 0, 0); 167#endif 168 169 if (bno > 0) { 170 /* set next_alloc fields as done in block_getblk */ 171 ip->i_next_alloc_block = lbn; 172 ip->i_next_alloc_goal = bno; 173 174 ip->i_blocks += btodb(size); 175 ip->i_flag |= IN_CHANGE | IN_UPDATE; 176 *bnp = bno; 177 return (0); 178 } 179#if QUOTA 180 /* 181 * Restore user's disk quota because allocation failed. 182 */ 183 (void) chkdq(ip, (long)-btodb(size), cred, FORCE); 184#endif 185nospace: 186 ext2_fserr(fs, cred->cr_uid, "file system full"); 187 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 188 return (ENOSPC); 189} 190 191/* 192 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 193 * 194 * The vnode and an array of buffer pointers for a range of sequential 195 * logical blocks to be made contiguous is given. The allocator attempts 196 * to find a range of sequential blocks starting as close as possible to 197 * an fs_rotdelay offset from the end of the allocation for the logical 198 * block immediately preceeding the current range. If successful, the 199 * physical block numbers in the buffer pointers and in the inode are 200 * changed to reflect the new allocation. If unsuccessful, the allocation 201 * is left unchanged. The success in doing the reallocation is returned. 202 * Note that the error return is not reflected back to the user. Rather 203 * the previous block allocation will be used. 204 */ 205 206#ifdef FANCY_REALLOC 207#include <sys/sysctl.h> 208static int doasyncfree = 1; 209#ifdef OPT_DEBUG 210SYSCTL_INT(_debug, 14, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 211#endif /* OPT_DEBUG */ 212#endif 213 214int 215ext2_reallocblks(ap) 216 struct vop_reallocblks_args /* { 217 struct vnode *a_vp; 218 struct cluster_save *a_buflist; 219 } */ *ap; 220{ 221#ifndef FANCY_REALLOC 222/* printf("ext2_reallocblks not implemented\n"); */ 223return ENOSPC; 224#else 225 226 struct ext2_sb_info *fs; 227 struct inode *ip; 228 struct vnode *vp; 229 struct buf *sbp, *ebp; 230 daddr_t *bap, *sbap, *ebap; 231 struct cluster_save *buflist; 232 daddr_t start_lbn, end_lbn, soff, eoff, newblk, blkno; 233 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 234 int i, len, start_lvl, end_lvl, pref, ssize; 235 236 vp = ap->a_vp; 237 ip = VTOI(vp); 238 fs = ip->i_e2fs; 239#ifdef UNKLAR 240 if (fs->fs_contigsumsize <= 0) 241 return (ENOSPC); 242#endif 243 buflist = ap->a_buflist; 244 len = buflist->bs_nchildren; 245 start_lbn = buflist->bs_children[0]->b_lblkno; 246 end_lbn = start_lbn + len - 1; 247#if DIAGNOSTIC 248 for (i = 1; i < len; i++) 249 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 250 panic("ext2_reallocblks: non-cluster"); 251#endif 252 /* 253 * If the latest allocation is in a new cylinder group, assume that 254 * the filesystem has decided to move and do not force it back to 255 * the previous cylinder group. 256 */ 257 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 258 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 259 return (ENOSPC); 260 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 261 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 262 return (ENOSPC); 263 /* 264 * Get the starting offset and block map for the first block. 265 */ 266 if (start_lvl == 0) { 267 sbap = &ip->i_db[0]; 268 soff = start_lbn; 269 } else { 270 idp = &start_ap[start_lvl - 1]; 271 if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &sbp)) { 272 brelse(sbp); 273 return (ENOSPC); 274 } 275 sbap = (daddr_t *)sbp->b_data; 276 soff = idp->in_off; 277 } 278 /* 279 * Find the preferred location for the cluster. 280 */ 281 pref = ext2_blkpref(ip, start_lbn, soff, sbap); 282 /* 283 * If the block range spans two block maps, get the second map. 284 */ 285 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 286 ssize = len; 287 } else { 288#if DIAGNOSTIC 289 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 290 panic("ext2_reallocblk: start == end"); 291#endif 292 ssize = len - (idp->in_off + 1); 293 if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &ebp)) 294 goto fail; 295 ebap = (daddr_t *)ebp->b_data; 296 } 297 /* 298 * Search the block map looking for an allocation of the desired size. 299 */ 300 if ((newblk = (daddr_t)ext2_hashalloc(ip, dtog(fs, pref), (long)pref, 301 len, (u_long (*)())ext2_clusteralloc)) == 0) 302 goto fail; 303 /* 304 * We have found a new contiguous block. 305 * 306 * First we have to replace the old block pointers with the new 307 * block pointers in the inode and indirect blocks associated 308 * with the file. 309 */ 310 blkno = newblk; 311 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->s_frags_per_block) { 312 if (i == ssize) 313 bap = ebap; 314#if DIAGNOSTIC 315 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 316 panic("ext2_reallocblks: alloc mismatch"); 317#endif 318 *bap++ = blkno; 319 } 320 /* 321 * Next we must write out the modified inode and indirect blocks. 322 * For strict correctness, the writes should be synchronous since 323 * the old block values may have been written to disk. In practise 324 * they are almost never written, but if we are concerned about 325 * strict correctness, the `doasyncfree' flag should be set to zero. 326 * 327 * The test on `doasyncfree' should be changed to test a flag 328 * that shows whether the associated buffers and inodes have 329 * been written. The flag should be set when the cluster is 330 * started and cleared whenever the buffer or inode is flushed. 331 * We can then check below to see if it is set, and do the 332 * synchronous write only when it has been cleared. 333 */ 334 if (sbap != &ip->i_db[0]) { 335 if (doasyncfree) 336 bdwrite(sbp); 337 else 338 bwrite(sbp); 339 } else { 340 ip->i_flag |= IN_CHANGE | IN_UPDATE; 341 if (!doasyncfree) 342 UFS_UPDATE(vp, 1); 343 } 344 if (ssize < len) 345 if (doasyncfree) 346 bdwrite(ebp); 347 else 348 bwrite(ebp); 349 /* 350 * Last, free the old blocks and assign the new blocks to the buffers. 351 */ 352 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->s_frags_per_block) { 353 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 354 fs->s_blocksize); 355 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 356 } 357 return (0); 358 359fail: 360 if (ssize < len) 361 brelse(ebp); 362 if (sbap != &ip->i_db[0]) 363 brelse(sbp); 364 return (ENOSPC); 365 366#endif /* FANCY_REALLOC */ 367} 368 369/* 370 * Allocate an inode in the file system. 371 * 372 * we leave the actual allocation strategy to the (modified) 373 * ext2_new_inode(), to make sure we get the policies right 374 */ 375int 376ext2_valloc(pvp, mode, cred, vpp) 377 struct vnode *pvp; 378 int mode; 379 struct ucred *cred; 380 struct vnode **vpp; 381{ 382 register struct inode *pip; 383 register struct ext2_sb_info *fs; 384 register struct inode *ip; 385 ino_t ino; 386 int i, error; 387 388 *vpp = NULL; 389 pip = VTOI(pvp); 390 fs = pip->i_e2fs; 391 if (fs->s_es->s_free_inodes_count == 0) 392 goto noinodes; 393 394 /* call the Linux routine - it returns the inode number only */ 395 ino = ext2_new_inode(pip, mode); 396 397 if (ino == 0) 398 goto noinodes; 399 error = VFS_VGET(pvp->v_mount, ino, vpp); 400 if (error) { 401 UFS_VFREE(pvp, ino, mode); 402 return (error); 403 } 404 ip = VTOI(*vpp); 405 406 /* 407 the question is whether using VGET was such good idea at all - 408 Linux doesn't read the old inode in when it's allocating a 409 new one. I will set at least i_size & i_blocks the zero. 410 */ 411 ip->i_mode = 0; 412 ip->i_size = 0; 413 ip->i_blocks = 0; 414 ip->i_flags = 0; 415 /* now we want to make sure that the block pointers are zeroed out */ 416 for (i = 0; i < NDADDR; i++) 417 ip->i_db[i] = 0; 418 for (i = 0; i < NIADDR; i++) 419 ip->i_ib[i] = 0; 420 421 /* 422 * Set up a new generation number for this inode. 423 * XXX check if this makes sense in ext2 424 */ 425 if (ip->i_gen == 0 || ++ip->i_gen == 0) 426 ip->i_gen = random() / 2 + 1; 427/* 428printf("ext2_valloc: allocated inode %d\n", ino); 429*/ 430 return (0); 431noinodes: 432 ext2_fserr(fs, cred->cr_uid, "out of inodes"); 433 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 434 return (ENOSPC); 435} 436 437/* 438 * Select the desired position for the next block in a file. 439 * 440 * we try to mimic what Remy does in inode_getblk/block_getblk 441 * 442 * we note: blocknr == 0 means that we're about to allocate either 443 * a direct block or a pointer block at the first level of indirection 444 * (In other words, stuff that will go in i_db[] or i_ib[]) 445 * 446 * blocknr != 0 means that we're allocating a block that is none 447 * of the above. Then, blocknr tells us the number of the block 448 * that will hold the pointer 449 */ 450daddr_t 451ext2_blkpref(ip, lbn, indx, bap, blocknr) 452 struct inode *ip; 453 daddr_t lbn; 454 int indx; 455 daddr_t *bap; 456 daddr_t blocknr; 457{ 458 int tmp; 459 460 /* if the next block is actually what we thought it is, 461 then set the goal to what we thought it should be 462 */ 463 if(ip->i_next_alloc_block == lbn) 464 return ip->i_next_alloc_goal; 465 466 /* now check whether we were provided with an array that basically 467 tells us previous blocks to which we want to stay closeby 468 */ 469 if(bap) 470 for (tmp = indx - 1; tmp >= 0; tmp--) 471 if (bap[tmp]) 472 return bap[tmp]; 473 474 /* else let's fall back to the blocknr, or, if there is none, 475 follow the rule that a block should be allocated near its inode 476 */ 477 return blocknr ? blocknr : 478 (daddr_t)(ip->i_block_group * 479 EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + 480 ip->i_e2fs->s_es->s_first_data_block; 481} 482 483/* 484 * Free a block or fragment. 485 * 486 * pass on to the Linux code 487 */ 488void 489ext2_blkfree(ip, bno, size) 490 register struct inode *ip; 491 daddr_t bno; 492 long size; 493{ 494 register struct ext2_sb_info *fs; 495 496 fs = ip->i_e2fs; 497 /* 498 * call Linux code with mount *, block number, count 499 */ 500 ext2_free_blocks(ITOV(ip)->v_mount, bno, size / fs->s_frag_size); 501} 502 503/* 504 * Free an inode. 505 * 506 * the maintenance of the actual bitmaps is again up to the linux code 507 */ 508int 509ext2_vfree(pvp, ino, mode) 510 struct vnode *pvp; 511 ino_t ino; 512 int mode; 513{ 514 register struct ext2_sb_info *fs; 515 register struct inode *pip; 516 register mode_t save_i_mode; 517 518 pip = VTOI(pvp); 519 fs = pip->i_e2fs; 520 if ((u_int)ino >= fs->s_inodes_per_group * fs->s_groups_count) 521 panic("ext2_vfree: range: dev = (%d, %d), ino = %d, fs = %s", 522 major(pip->i_dev), minor(pip->i_dev), ino, fs->fs_fsmnt); 523 524/* ext2_debug("ext2_vfree (%d, %d) called\n", pip->i_number, mode); 525 */ 526 ext2_discard_prealloc(pip); 527 528 /* we need to make sure that ext2_free_inode can adjust the 529 used_dir_counts in the group summary information - I'd 530 really like to know what the rationale behind this 531 'set i_mode to zero to denote an unused inode' is 532 */ 533 save_i_mode = pip->i_mode; 534 pip->i_mode = mode; 535 ext2_free_inode(pip); 536 pip->i_mode = save_i_mode; 537 return (0); 538} 539 540/* 541 * Fserr prints the name of a file system with an error diagnostic. 542 * 543 * The form of the error message is: 544 * fs: error message 545 */ 546static void 547ext2_fserr(fs, uid, cp) 548 struct ext2_sb_info *fs; 549 u_int uid; 550 char *cp; 551{ 552 553 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); 554} 555