ext2_alloc.c revision 31485
1/* 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/* 8 * Copyright (c) 1982, 1986, 1989, 1993 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)ext2_alloc.c 8.8 (Berkeley) 2/21/94 40 */ 41 42#include "opt_quota.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/vnode.h> 47#include <sys/stat.h> 48#include <sys/mount.h> 49#include <sys/kernel.h> 50#include <sys/syslog.h> 51 52#include <ufs/ufs/quota.h> 53#include <ufs/ufs/inode.h> 54#include <ufs/ufs/ufsmount.h> 55 56#include <gnu/ext2fs/ext2_fs.h> 57#include <gnu/ext2fs/ext2_fs_sb.h> 58#include <gnu/ext2fs/fs.h> 59#include <gnu/ext2fs/ext2_extern.h> 60 61static void ext2_fserr __P((struct ext2_sb_info *, u_int, char *)); 62 63/* 64 * Linux calls this functions at the following locations: 65 * (1) the inode is freed 66 * (2) a preallocation miss occurs 67 * (3) truncate is called 68 * (4) release_file is called and f_mode & 2 69 * 70 * I call it in ext2_inactive, ext2_truncate, ext2_vfree and in (2) 71 * the call in vfree might be redundant 72 */ 73void 74ext2_discard_prealloc(ip) 75 struct inode * ip; 76{ 77#ifdef EXT2_PREALLOCATE 78 if (ip->i_prealloc_count) { 79 int i = ip->i_prealloc_count; 80 ip->i_prealloc_count = 0; 81 ext2_free_blocks (ITOV(ip)->v_mount, 82 ip->i_prealloc_block, 83 i); 84 } 85#endif 86} 87 88/* 89 * Allocate a block in the file system. 90 * 91 * this takes the framework from ffs_alloc. To implement the 92 * actual allocation, it calls ext2_new_block, the ported version 93 * of the same Linux routine. 94 * 95 * we note that this is always called in connection with ext2_blkpref 96 * 97 * preallocation is done as Linux does it 98 */ 99int 100ext2_alloc(ip, lbn, bpref, size, cred, bnp) 101 register struct inode *ip; 102 daddr_t lbn, bpref; 103 int size; 104 struct ucred *cred; 105 daddr_t *bnp; 106{ 107 register struct ext2_sb_info *fs; 108 daddr_t bno; 109#if QUOTA 110 int error; 111#endif 112 113 *bnp = 0; 114 fs = ip->i_e2fs; 115#if DIAGNOSTIC 116 if ((u_int)size > fs->s_blocksize || blkoff(fs, size) != 0) { 117 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", 118 ip->i_dev, fs->s_blocksize, size, fs->fs_fsmnt); 119 panic("ext2_alloc: bad size"); 120 } 121 if (cred == NOCRED) 122 panic("ext2_alloc: missing credential"); 123#endif /* DIAGNOSTIC */ 124 if (size == fs->s_blocksize && fs->s_es->s_free_blocks_count == 0) 125 goto nospace; 126 if (cred->cr_uid != 0 && 127 fs->s_es->s_free_blocks_count < fs->s_es->s_r_blocks_count) 128 goto nospace; 129#if QUOTA 130 if (error = chkdq(ip, (long)btodb(size), cred, 0)) 131 return (error); 132#endif 133 if (bpref >= fs->s_es->s_blocks_count) 134 bpref = 0; 135 /* call the Linux code */ 136#ifdef EXT2_PREALLOCATE 137 /* To have a preallocation hit, we must 138 * - have at least one block preallocated 139 * - and our preferred block must have that block number or one below 140 */ 141 if (ip->i_prealloc_count && 142 (bpref == ip->i_prealloc_block || 143 bpref + 1 == ip->i_prealloc_block)) 144 { 145 bno = ip->i_prealloc_block++; 146 ip->i_prealloc_count--; 147 /* ext2_debug ("preallocation hit (%lu/%lu).\n", 148 ++alloc_hits, ++alloc_attempts); */ 149 150 /* Linux gets, clears, and releases the buffer at this 151 point - we don't have to that; we leave it to the caller 152 */ 153 } else { 154 ext2_discard_prealloc (ip); 155 /* ext2_debug ("preallocation miss (%lu/%lu).\n", 156 alloc_hits, ++alloc_attempts); */ 157 if (S_ISREG(ip->i_mode)) 158 bno = ext2_new_block 159 (ITOV(ip)->v_mount, bpref, 160 &ip->i_prealloc_count, 161 &ip->i_prealloc_block); 162 else 163 bno = (daddr_t)ext2_new_block(ITOV(ip)->v_mount, 164 bpref, 0, 0); 165 } 166#else 167 bno = (daddr_t)ext2_new_block(ITOV(ip)->v_mount, bpref, 0, 0); 168#endif 169 170 if (bno > 0) { 171 /* set next_alloc fields as done in block_getblk */ 172 ip->i_next_alloc_block = lbn; 173 ip->i_next_alloc_goal = bno; 174 175 ip->i_blocks += btodb(size); 176 ip->i_flag |= IN_CHANGE | IN_UPDATE; 177 *bnp = bno; 178 return (0); 179 } 180#if QUOTA 181 /* 182 * Restore user's disk quota because allocation failed. 183 */ 184 (void) chkdq(ip, (long)-btodb(size), cred, FORCE); 185#endif 186nospace: 187 ext2_fserr(fs, cred->cr_uid, "file system full"); 188 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 189 return (ENOSPC); 190} 191 192/* 193 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 194 * 195 * The vnode and an array of buffer pointers for a range of sequential 196 * logical blocks to be made contiguous is given. The allocator attempts 197 * to find a range of sequential blocks starting as close as possible to 198 * an fs_rotdelay offset from the end of the allocation for the logical 199 * block immediately preceeding the current range. If successful, the 200 * physical block numbers in the buffer pointers and in the inode are 201 * changed to reflect the new allocation. If unsuccessful, the allocation 202 * is left unchanged. The success in doing the reallocation is returned. 203 * Note that the error return is not reflected back to the user. Rather 204 * the previous block allocation will be used. 205 */ 206 207#ifdef FANCY_REALLOC 208#include <sys/sysctl.h> 209static int doasyncfree = 1; 210#ifdef OPT_DEBUG 211SYSCTL_INT(_debug, 14, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 212#endif /* OPT_DEBUG */ 213#endif 214 215int 216ext2_reallocblks(ap) 217 struct vop_reallocblks_args /* { 218 struct vnode *a_vp; 219 struct cluster_save *a_buflist; 220 } */ *ap; 221{ 222#ifndef FANCY_REALLOC 223/* printf("ext2_reallocblks not implemented\n"); */ 224return ENOSPC; 225#else 226 227 struct ext2_sb_info *fs; 228 struct inode *ip; 229 struct vnode *vp; 230 struct buf *sbp, *ebp; 231 daddr_t *bap, *sbap, *ebap; 232 struct cluster_save *buflist; 233 daddr_t start_lbn, end_lbn, soff, eoff, newblk, blkno; 234 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 235 int i, len, start_lvl, end_lvl, pref, ssize; 236 struct timeval tv; 237 238 vp = ap->a_vp; 239 ip = VTOI(vp); 240 fs = ip->i_e2fs; 241#ifdef UNKLAR 242 if (fs->fs_contigsumsize <= 0) 243 return (ENOSPC); 244#endif 245 buflist = ap->a_buflist; 246 len = buflist->bs_nchildren; 247 start_lbn = buflist->bs_children[0]->b_lblkno; 248 end_lbn = start_lbn + len - 1; 249#if DIAGNOSTIC 250 for (i = 1; i < len; i++) 251 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 252 panic("ext2_reallocblks: non-cluster"); 253#endif 254 /* 255 * If the latest allocation is in a new cylinder group, assume that 256 * the filesystem has decided to move and do not force it back to 257 * the previous cylinder group. 258 */ 259 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 260 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 261 return (ENOSPC); 262 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 263 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 264 return (ENOSPC); 265 /* 266 * Get the starting offset and block map for the first block. 267 */ 268 if (start_lvl == 0) { 269 sbap = &ip->i_db[0]; 270 soff = start_lbn; 271 } else { 272 idp = &start_ap[start_lvl - 1]; 273 if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &sbp)) { 274 brelse(sbp); 275 return (ENOSPC); 276 } 277 sbap = (daddr_t *)sbp->b_data; 278 soff = idp->in_off; 279 } 280 /* 281 * Find the preferred location for the cluster. 282 */ 283 pref = ext2_blkpref(ip, start_lbn, soff, sbap); 284 /* 285 * If the block range spans two block maps, get the second map. 286 */ 287 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 288 ssize = len; 289 } else { 290#if DIAGNOSTIC 291 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 292 panic("ext2_reallocblk: start == end"); 293#endif 294 ssize = len - (idp->in_off + 1); 295 if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &ebp)) 296 goto fail; 297 ebap = (daddr_t *)ebp->b_data; 298 } 299 /* 300 * Search the block map looking for an allocation of the desired size. 301 */ 302 if ((newblk = (daddr_t)ext2_hashalloc(ip, dtog(fs, pref), (long)pref, 303 len, (u_long (*)())ext2_clusteralloc)) == 0) 304 goto fail; 305 /* 306 * We have found a new contiguous block. 307 * 308 * First we have to replace the old block pointers with the new 309 * block pointers in the inode and indirect blocks associated 310 * with the file. 311 */ 312 blkno = newblk; 313 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->s_frags_per_block) { 314 if (i == ssize) 315 bap = ebap; 316#if DIAGNOSTIC 317 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 318 panic("ext2_reallocblks: alloc mismatch"); 319#endif 320 *bap++ = blkno; 321 } 322 /* 323 * Next we must write out the modified inode and indirect blocks. 324 * For strict correctness, the writes should be synchronous since 325 * the old block values may have been written to disk. In practise 326 * they are almost never written, but if we are concerned about 327 * strict correctness, the `doasyncfree' flag should be set to zero. 328 * 329 * The test on `doasyncfree' should be changed to test a flag 330 * that shows whether the associated buffers and inodes have 331 * been written. The flag should be set when the cluster is 332 * started and cleared whenever the buffer or inode is flushed. 333 * We can then check below to see if it is set, and do the 334 * synchronous write only when it has been cleared. 335 */ 336 if (sbap != &ip->i_db[0]) { 337 if (doasyncfree) 338 bdwrite(sbp); 339 else 340 bwrite(sbp); 341 } else { 342 ip->i_flag |= IN_CHANGE | IN_UPDATE; 343 if (!doasyncfree) { 344 gettime(&tv); 345 UFS_UPDATE(vp, &tv, &tv, MNT_WAIT); 346 } 347 } 348 if (ssize < len) 349 if (doasyncfree) 350 bdwrite(ebp); 351 else 352 bwrite(ebp); 353 /* 354 * Last, free the old blocks and assign the new blocks to the buffers. 355 */ 356 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->s_frags_per_block) { 357 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 358 fs->s_blocksize); 359 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 360 } 361 return (0); 362 363fail: 364 if (ssize < len) 365 brelse(ebp); 366 if (sbap != &ip->i_db[0]) 367 brelse(sbp); 368 return (ENOSPC); 369 370#endif /* FANCY_REALLOC */ 371} 372 373/* 374 * Allocate an inode in the file system. 375 * 376 * we leave the actual allocation strategy to the (modified) 377 * ext2_new_inode(), to make sure we get the policies right 378 */ 379int 380ext2_valloc(pvp, mode, cred, vpp) 381 struct vnode *pvp; 382 int mode; 383 struct ucred *cred; 384 struct vnode **vpp; 385{ 386 register struct inode *pip; 387 register struct ext2_sb_info *fs; 388 register struct inode *ip; 389 ino_t ino; 390 int i, error; 391 392 *vpp = NULL; 393 pip = VTOI(pvp); 394 fs = pip->i_e2fs; 395 if (fs->s_es->s_free_inodes_count == 0) 396 goto noinodes; 397 398 /* call the Linux routine - it returns the inode number only */ 399 ino = ext2_new_inode(pip, mode); 400 401 if (ino == 0) 402 goto noinodes; 403 error = VFS_VGET(pvp->v_mount, ino, vpp); 404 if (error) { 405 UFS_VFREE(pvp, ino, mode); 406 return (error); 407 } 408 ip = VTOI(*vpp); 409 410 /* 411 the question is whether using VGET was such good idea at all - 412 Linux doesn't read the old inode in when it's allocating a 413 new one. I will set at least i_size & i_blocks the zero. 414 */ 415 ip->i_mode = 0; 416 ip->i_size = 0; 417 ip->i_blocks = 0; 418 ip->i_flags = 0; 419 /* now we want to make sure that the block pointers are zeroed out */ 420 for(i = 0; i < EXT2_NDIR_BLOCKS; i++) 421 ip->i_db[i] = 0; 422 423 /* 424 * Set up a new generation number for this inode. 425 * XXX check if this makes sense in ext2 426 */ 427 if (ip->i_gen == 0 || ++ip->i_gen == 0) 428 ip->i_gen = random() / 2 + 1; 429/* 430printf("ext2_valloc: allocated inode %d\n", ino); 431*/ 432 return (0); 433noinodes: 434 ext2_fserr(fs, cred->cr_uid, "out of inodes"); 435 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 436 return (ENOSPC); 437} 438 439/* 440 * Select the desired position for the next block in a file. 441 * 442 * we try to mimic what Remy does in inode_getblk/block_getblk 443 * 444 * we note: blocknr == 0 means that we're about to allocate either 445 * a direct block or a pointer block at the first level of indirection 446 * (In other words, stuff that will go in i_db[] or i_ib[]) 447 * 448 * blocknr != 0 means that we're allocating a block that is none 449 * of the above. Then, blocknr tells us the number of the block 450 * that will hold the pointer 451 */ 452daddr_t 453ext2_blkpref(ip, lbn, indx, bap, blocknr) 454 struct inode *ip; 455 daddr_t lbn; 456 int indx; 457 daddr_t *bap; 458 daddr_t blocknr; 459{ 460 int tmp; 461 462 /* if the next block is actually what we thought it is, 463 then set the goal to what we thought it should be 464 */ 465 if(ip->i_next_alloc_block == lbn) 466 return ip->i_next_alloc_goal; 467 468 /* now check whether we were provided with an array that basically 469 tells us previous blocks to which we want to stay closeby 470 */ 471 if(bap) 472 for (tmp = indx - 1; tmp >= 0; tmp--) 473 if (bap[tmp]) 474 return bap[tmp]; 475 476 /* else let's fall back to the blocknr, or, if there is none, 477 follow the rule that a block should be allocated near it's inode 478 */ 479 return blocknr ? blocknr : 480 (daddr_t)(ip->i_block_group * 481 EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + 482 ip->i_e2fs->s_es->s_first_data_block; 483} 484 485/* 486 * Free a block or fragment. 487 * 488 * pass on to the Linux code 489 */ 490void 491ext2_blkfree(ip, bno, size) 492 register struct inode *ip; 493 daddr_t bno; 494 long size; 495{ 496 register struct ext2_sb_info *fs; 497 498 fs = ip->i_e2fs; 499 /* 500 * call Linux code with mount *, block number, count 501 */ 502 ext2_free_blocks(ITOV(ip)->v_mount, bno, size / fs->s_frag_size); 503} 504 505/* 506 * Free an inode. 507 * 508 * the maintenance of the actual bitmaps is again up to the linux code 509 */ 510int 511ext2_vfree(pvp, ino, mode) 512 struct vnode *pvp; 513 ino_t ino; 514 int mode; 515{ 516 register struct ext2_sb_info *fs; 517 register struct inode *pip; 518 519 pip = VTOI(pvp); 520 fs = pip->i_e2fs; 521 if ((u_int)ino >= fs->s_inodes_per_group * fs->s_groups_count) 522 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s", 523 pip->i_dev, ino, fs->fs_fsmnt); 524 525/* ext2_debug("ext2_vfree (%d, %d) called\n", pip->i_number, mode); 526 */ 527 ext2_discard_prealloc(pip); 528 529 /* we need to make sure that ext2_free_inode can adjust the 530 used_dir_counts in the group summary information - I'd 531 really like to know what the rationale behind this 532 'set i_mode to zero to denote an unused inode' is 533 */ 534 mode = pip->i_mode; 535 pip->i_mode = mode; 536 ext2_free_inode(pip); 537 pip->i_mode = mode; 538 return (0); 539} 540 541/* 542 * Fserr prints the name of a file system with an error diagnostic. 543 * 544 * The form of the error message is: 545 * fs: error message 546 */ 547static void 548ext2_fserr(fs, uid, cp) 549 struct ext2_sb_info *fs; 550 u_int uid; 551 char *cp; 552{ 553 554 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); 555} 556