ext2_alloc.c revision 153110
1/*- 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/*- 8 * Copyright (c) 1982, 1986, 1989, 1993 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 36 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_alloc.c 153110 2005-12-05 11:58:35Z ru $ 37 */ 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/conf.h> 42#include <sys/vnode.h> 43#include <sys/stat.h> 44#include <sys/mount.h> 45#include <sys/syslog.h> 46 47#include <gnu/fs/ext2fs/inode.h> 48#include <gnu/fs/ext2fs/ext2_mount.h> 49#include <gnu/fs/ext2fs/ext2_fs.h> 50#include <gnu/fs/ext2fs/ext2_fs_sb.h> 51#include <gnu/fs/ext2fs/fs.h> 52#include <gnu/fs/ext2fs/ext2_extern.h> 53 54static void ext2_fserr(struct ext2_sb_info *, u_int, char *); 55 56/* 57 * Linux calls this functions at the following locations: 58 * (1) the inode is freed 59 * (2) a preallocation miss occurs 60 * (3) truncate is called 61 * (4) release_file is called and f_mode & 2 62 * 63 * I call it in ext2_inactive, ext2_truncate, ext2_vfree and in (2) 64 * the call in vfree might be redundant 65 */ 66void 67ext2_discard_prealloc(ip) 68 struct inode * ip; 69{ 70#ifdef EXT2_PREALLOCATE 71 if (ip->i_prealloc_count) { 72 int i = ip->i_prealloc_count; 73 ip->i_prealloc_count = 0; 74 ext2_free_blocks (ITOV(ip)->v_mount, 75 ip->i_prealloc_block, 76 i); 77 } 78#endif 79} 80 81/* 82 * Allocate a block in the file system. 83 * 84 * this takes the framework from ffs_alloc. To implement the 85 * actual allocation, it calls ext2_new_block, the ported version 86 * of the same Linux routine. 87 * 88 * we note that this is always called in connection with ext2_blkpref 89 * 90 * preallocation is done as Linux does it 91 */ 92int 93ext2_alloc(ip, lbn, bpref, size, cred, bnp) 94 struct inode *ip; 95 int32_t lbn, bpref; 96 int size; 97 struct ucred *cred; 98 int32_t *bnp; 99{ 100 struct ext2_sb_info *fs; 101 int32_t bno; 102 103 *bnp = 0; 104 fs = ip->i_e2fs; 105#ifdef DIAGNOSTIC 106 if ((u_int)size > fs->s_blocksize || blkoff(fs, size) != 0) { 107 vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n", 108 fs->s_blocksize, size, fs->fs_fsmnt); 109 panic("ext2_alloc: bad size"); 110 } 111 if (cred == NOCRED) 112 panic("ext2_alloc: missing credential"); 113#endif /* DIAGNOSTIC */ 114 if (size == fs->s_blocksize && fs->s_es->s_free_blocks_count == 0) 115 goto nospace; 116 if (cred->cr_uid != 0 && 117 fs->s_es->s_free_blocks_count < fs->s_es->s_r_blocks_count) 118 goto nospace; 119 if (bpref >= fs->s_es->s_blocks_count) 120 bpref = 0; 121 /* call the Linux code */ 122#ifdef EXT2_PREALLOCATE 123 /* To have a preallocation hit, we must 124 * - have at least one block preallocated 125 * - and our preferred block must have that block number or one below 126 */ 127 if (ip->i_prealloc_count && 128 (bpref == ip->i_prealloc_block || 129 bpref + 1 == ip->i_prealloc_block)) 130 { 131 bno = ip->i_prealloc_block++; 132 ip->i_prealloc_count--; 133 /* ext2_debug ("preallocation hit (%lu/%lu).\n", 134 ++alloc_hits, ++alloc_attempts); */ 135 136 /* Linux gets, clears, and releases the buffer at this 137 point - we don't have to that; we leave it to the caller 138 */ 139 } else { 140 ext2_discard_prealloc (ip); 141 /* ext2_debug ("preallocation miss (%lu/%lu).\n", 142 alloc_hits, ++alloc_attempts); */ 143 if (S_ISREG(ip->i_mode)) 144 bno = ext2_new_block 145 (ITOV(ip)->v_mount, bpref, 146 &ip->i_prealloc_count, 147 &ip->i_prealloc_block); 148 else 149 bno = (int32_t)ext2_new_block(ITOV(ip)->v_mount, 150 bpref, 0, 0); 151 } 152#else 153 bno = (int32_t)ext2_new_block(ITOV(ip)->v_mount, bpref, 0, 0); 154#endif 155 156 if (bno > 0) { 157 /* set next_alloc fields as done in block_getblk */ 158 ip->i_next_alloc_block = lbn; 159 ip->i_next_alloc_goal = bno; 160 161 ip->i_blocks += btodb(size); 162 ip->i_flag |= IN_CHANGE | IN_UPDATE; 163 *bnp = bno; 164 return (0); 165 } 166nospace: 167 ext2_fserr(fs, cred->cr_uid, "file system full"); 168 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 169 return (ENOSPC); 170} 171 172/* 173 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 174 * 175 * The vnode and an array of buffer pointers for a range of sequential 176 * logical blocks to be made contiguous is given. The allocator attempts 177 * to find a range of sequential blocks starting as close as possible to 178 * an fs_rotdelay offset from the end of the allocation for the logical 179 * block immediately preceding the current range. If successful, the 180 * physical block numbers in the buffer pointers and in the inode are 181 * changed to reflect the new allocation. If unsuccessful, the allocation 182 * is left unchanged. The success in doing the reallocation is returned. 183 * Note that the error return is not reflected back to the user. Rather 184 * the previous block allocation will be used. 185 */ 186 187#ifdef FANCY_REALLOC 188#include <sys/sysctl.h> 189static int doasyncfree = 1; 190#ifdef OPT_DEBUG 191SYSCTL_INT(_debug, 14, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 192#endif /* OPT_DEBUG */ 193#endif 194 195int 196ext2_reallocblks(ap) 197 struct vop_reallocblks_args /* { 198 struct vnode *a_vp; 199 struct cluster_save *a_buflist; 200 } */ *ap; 201{ 202#ifndef FANCY_REALLOC 203/* printf("ext2_reallocblks not implemented\n"); */ 204return ENOSPC; 205#else 206 207 struct ext2_sb_info *fs; 208 struct inode *ip; 209 struct vnode *vp; 210 struct buf *sbp, *ebp; 211 int32_t *bap, *sbap, *ebap; 212 struct cluster_save *buflist; 213 int32_t start_lbn, end_lbn, soff, eoff, newblk, blkno; 214 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 215 int i, len, start_lvl, end_lvl, pref, ssize; 216 217 vp = ap->a_vp; 218 ip = VTOI(vp); 219 fs = ip->i_e2fs; 220#ifdef UNKLAR 221 if (fs->fs_contigsumsize <= 0) 222 return (ENOSPC); 223#endif 224 buflist = ap->a_buflist; 225 len = buflist->bs_nchildren; 226 start_lbn = buflist->bs_children[0]->b_lblkno; 227 end_lbn = start_lbn + len - 1; 228#ifdef DIAGNOSTIC 229 for (i = 1; i < len; i++) 230 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 231 panic("ext2_reallocblks: non-cluster"); 232#endif 233 /* 234 * If the latest allocation is in a new cylinder group, assume that 235 * the filesystem has decided to move and do not force it back to 236 * the previous cylinder group. 237 */ 238 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 239 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 240 return (ENOSPC); 241 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 242 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 243 return (ENOSPC); 244 /* 245 * Get the starting offset and block map for the first block. 246 */ 247 if (start_lvl == 0) { 248 sbap = &ip->i_db[0]; 249 soff = start_lbn; 250 } else { 251 idp = &start_ap[start_lvl - 1]; 252 if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &sbp)) { 253 brelse(sbp); 254 return (ENOSPC); 255 } 256 sbap = (int32_t *)sbp->b_data; 257 soff = idp->in_off; 258 } 259 /* 260 * Find the preferred location for the cluster. 261 */ 262 pref = ext2_blkpref(ip, start_lbn, soff, sbap); 263 /* 264 * If the block range spans two block maps, get the second map. 265 */ 266 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 267 ssize = len; 268 } else { 269#ifdef DIAGNOSTIC 270 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 271 panic("ext2_reallocblk: start == end"); 272#endif 273 ssize = len - (idp->in_off + 1); 274 if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &ebp)) 275 goto fail; 276 ebap = (int32_t *)ebp->b_data; 277 } 278 /* 279 * Search the block map looking for an allocation of the desired size. 280 */ 281 if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), (long)pref, 282 len, (u_long (*)())ext2_clusteralloc)) == 0) 283 goto fail; 284 /* 285 * We have found a new contiguous block. 286 * 287 * First we have to replace the old block pointers with the new 288 * block pointers in the inode and indirect blocks associated 289 * with the file. 290 */ 291 blkno = newblk; 292 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->s_frags_per_block) { 293 if (i == ssize) 294 bap = ebap; 295#ifdef DIAGNOSTIC 296 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 297 panic("ext2_reallocblks: alloc mismatch"); 298#endif 299 *bap++ = blkno; 300 } 301 /* 302 * Next we must write out the modified inode and indirect blocks. 303 * For strict correctness, the writes should be synchronous since 304 * the old block values may have been written to disk. In practise 305 * they are almost never written, but if we are concerned about 306 * strict correctness, the `doasyncfree' flag should be set to zero. 307 * 308 * The test on `doasyncfree' should be changed to test a flag 309 * that shows whether the associated buffers and inodes have 310 * been written. The flag should be set when the cluster is 311 * started and cleared whenever the buffer or inode is flushed. 312 * We can then check below to see if it is set, and do the 313 * synchronous write only when it has been cleared. 314 */ 315 if (sbap != &ip->i_db[0]) { 316 if (doasyncfree) 317 bdwrite(sbp); 318 else 319 bwrite(sbp); 320 } else { 321 ip->i_flag |= IN_CHANGE | IN_UPDATE; 322 if (!doasyncfree) 323 ext2_update(vp, 1); 324 } 325 if (ssize < len) 326 if (doasyncfree) 327 bdwrite(ebp); 328 else 329 bwrite(ebp); 330 /* 331 * Last, free the old blocks and assign the new blocks to the buffers. 332 */ 333 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->s_frags_per_block) { 334 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 335 fs->s_blocksize); 336 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 337 } 338 return (0); 339 340fail: 341 if (ssize < len) 342 brelse(ebp); 343 if (sbap != &ip->i_db[0]) 344 brelse(sbp); 345 return (ENOSPC); 346 347#endif /* FANCY_REALLOC */ 348} 349 350/* 351 * Allocate an inode in the file system. 352 * 353 * we leave the actual allocation strategy to the (modified) 354 * ext2_new_inode(), to make sure we get the policies right 355 */ 356int 357ext2_valloc(pvp, mode, cred, vpp) 358 struct vnode *pvp; 359 int mode; 360 struct ucred *cred; 361 struct vnode **vpp; 362{ 363 struct inode *pip; 364 struct ext2_sb_info *fs; 365 struct inode *ip; 366 ino_t ino; 367 int i, error; 368 369 *vpp = NULL; 370 pip = VTOI(pvp); 371 fs = pip->i_e2fs; 372 if (fs->s_es->s_free_inodes_count == 0) 373 goto noinodes; 374 375 /* call the Linux routine - it returns the inode number only */ 376 ino = ext2_new_inode(pip, mode); 377 378 if (ino == 0) 379 goto noinodes; 380 error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp); 381 if (error) { 382 ext2_vfree(pvp, ino, mode); 383 return (error); 384 } 385 ip = VTOI(*vpp); 386 387 /* 388 the question is whether using VGET was such good idea at all - 389 Linux doesn't read the old inode in when it's allocating a 390 new one. I will set at least i_size & i_blocks the zero. 391 */ 392 ip->i_mode = 0; 393 ip->i_size = 0; 394 ip->i_blocks = 0; 395 ip->i_flags = 0; 396 /* now we want to make sure that the block pointers are zeroed out */ 397 for (i = 0; i < NDADDR; i++) 398 ip->i_db[i] = 0; 399 for (i = 0; i < NIADDR; i++) 400 ip->i_ib[i] = 0; 401 402 /* 403 * Set up a new generation number for this inode. 404 * XXX check if this makes sense in ext2 405 */ 406 if (ip->i_gen == 0 || ++ip->i_gen == 0) 407 ip->i_gen = random() / 2 + 1; 408/* 409printf("ext2_valloc: allocated inode %d\n", ino); 410*/ 411 return (0); 412noinodes: 413 ext2_fserr(fs, cred->cr_uid, "out of inodes"); 414 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 415 return (ENOSPC); 416} 417 418/* 419 * Select the desired position for the next block in a file. 420 * 421 * we try to mimic what Remy does in inode_getblk/block_getblk 422 * 423 * we note: blocknr == 0 means that we're about to allocate either 424 * a direct block or a pointer block at the first level of indirection 425 * (In other words, stuff that will go in i_db[] or i_ib[]) 426 * 427 * blocknr != 0 means that we're allocating a block that is none 428 * of the above. Then, blocknr tells us the number of the block 429 * that will hold the pointer 430 */ 431int32_t 432ext2_blkpref(ip, lbn, indx, bap, blocknr) 433 struct inode *ip; 434 int32_t lbn; 435 int indx; 436 int32_t *bap; 437 int32_t blocknr; 438{ 439 int tmp; 440 441 /* if the next block is actually what we thought it is, 442 then set the goal to what we thought it should be 443 */ 444 if(ip->i_next_alloc_block == lbn) 445 return ip->i_next_alloc_goal; 446 447 /* now check whether we were provided with an array that basically 448 tells us previous blocks to which we want to stay closeby 449 */ 450 if(bap) 451 for (tmp = indx - 1; tmp >= 0; tmp--) 452 if (bap[tmp]) 453 return bap[tmp]; 454 455 /* else let's fall back to the blocknr, or, if there is none, 456 follow the rule that a block should be allocated near its inode 457 */ 458 return blocknr ? blocknr : 459 (int32_t)(ip->i_block_group * 460 EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + 461 ip->i_e2fs->s_es->s_first_data_block; 462} 463 464/* 465 * Free a block or fragment. 466 * 467 * pass on to the Linux code 468 */ 469void 470ext2_blkfree(ip, bno, size) 471 struct inode *ip; 472 int32_t bno; 473 long size; 474{ 475 struct ext2_sb_info *fs; 476 477 fs = ip->i_e2fs; 478 /* 479 * call Linux code with mount *, block number, count 480 */ 481 ext2_free_blocks(ITOV(ip)->v_mount, bno, size / fs->s_frag_size); 482} 483 484/* 485 * Free an inode. 486 * 487 * the maintenance of the actual bitmaps is again up to the linux code 488 */ 489int 490ext2_vfree(pvp, ino, mode) 491 struct vnode *pvp; 492 ino_t ino; 493 int mode; 494{ 495 struct ext2_sb_info *fs; 496 struct inode *pip; 497 mode_t save_i_mode; 498 499 pip = VTOI(pvp); 500 fs = pip->i_e2fs; 501 if ((u_int)ino > fs->s_inodes_per_group * fs->s_groups_count) 502 panic("ext2_vfree: range: devvp = %p, ino = %d, fs = %s", 503 pip->i_devvp, ino, fs->fs_fsmnt); 504 505/* ext2_debug("ext2_vfree (%d, %d) called\n", pip->i_number, mode); 506 */ 507 ext2_discard_prealloc(pip); 508 509 /* we need to make sure that ext2_free_inode can adjust the 510 used_dir_counts in the group summary information - I'd 511 really like to know what the rationale behind this 512 'set i_mode to zero to denote an unused inode' is 513 */ 514 save_i_mode = pip->i_mode; 515 pip->i_mode = mode; 516 ext2_free_inode(pip); 517 pip->i_mode = save_i_mode; 518 return (0); 519} 520 521/* 522 * Fserr prints the name of a file system with an error diagnostic. 523 * 524 * The form of the error message is: 525 * fs: error message 526 */ 527static void 528ext2_fserr(fs, uid, cp) 529 struct ext2_sb_info *fs; 530 u_int uid; 531 char *cp; 532{ 533 534 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); 535} 536