1/*- 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1982, 1986, 1989, 1993 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 38 * $FreeBSD$ 39 */ 40 41#include <sys/param.h> 42#include <sys/systm.h> 43#include <sys/conf.h> 44#include <sys/vnode.h> 45#include <sys/sdt.h> 46#include <sys/stat.h> 47#include <sys/mount.h> 48#include <sys/sysctl.h> 49#include <sys/syslog.h> 50#include <sys/buf.h> 51#include <sys/endian.h> 52 53#include <fs/ext2fs/fs.h> 54#include <fs/ext2fs/inode.h> 55#include <fs/ext2fs/ext2_mount.h> 56#include <fs/ext2fs/ext2fs.h> 57#include <fs/ext2fs/ext2_extern.h> 58 59SDT_PROVIDER_DEFINE(ext2fs); 60/* 61 * ext2fs trace probe: 62 * arg0: verbosity. Higher numbers give more verbose messages 63 * arg1: Textual message 64 */ 65SDT_PROBE_DEFINE2(ext2fs, , alloc, trace, "int", "char*"); 66SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_reallocblks_realloc, 67 "ino_t", "e2fs_lbn_t", "e2fs_lbn_t"); 68SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_bap, "uint32_t"); 69SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_blkno, "e2fs_daddr_t"); 70SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "char*", "int"); 71SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, 72 "int", "daddr_t", "char*"); 73SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_blkfree_bad_block, "ino_t", "e4fs_daddr_t"); 74SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_vfree_doublefree, "char*", "ino_t"); 75 76static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); 77static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); 78static u_long ext2_dirpref(struct inode *); 79static e4fs_daddr_t ext2_hashalloc(struct inode *, int, long, int, 80 daddr_t (*)(struct inode *, int, daddr_t, 81 int)); 82static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); 83static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); 84 85/* 86 * Allocate a block in the filesystem. 87 * 88 * A preference may be optionally specified. If a preference is given 89 * the following hierarchy is used to allocate a block: 90 * 1) allocate the requested block. 91 * 2) allocate a rotationally optimal block in the same cylinder. 92 * 3) allocate a block in the same cylinder group. 93 * 4) quadradically rehash into other cylinder groups, until an 94 * available block is located. 95 * If no block preference is given the following hierarchy is used 96 * to allocate a block: 97 * 1) allocate a block in the cylinder group that contains the 98 * inode for the file. 99 * 2) quadradically rehash into other cylinder groups, until an 100 * available block is located. 101 */ 102int 103ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size, 104 struct ucred *cred, e4fs_daddr_t *bnp) 105{ 106 struct m_ext2fs *fs; 107 struct ext2mount *ump; 108 e4fs_daddr_t bno; 109 int cg; 110 111 *bnp = 0; 112 fs = ip->i_e2fs; 113 ump = ip->i_ump; 114 mtx_assert(EXT2_MTX(ump), MA_OWNED); 115#ifdef INVARIANTS 116 if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) { 117 vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n", 118 (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt); 119 panic("ext2_alloc: bad size"); 120 } 121 if (cred == NOCRED) 122 panic("ext2_alloc: missing credential"); 123#endif /* INVARIANTS */ 124 if (size == fs->e2fs_bsize && fs->e2fs_fbcount == 0) 125 goto nospace; 126 if (cred->cr_uid != 0 && 127 fs->e2fs_fbcount < fs->e2fs_rbcount) 128 goto nospace; 129 if (bpref >= fs->e2fs_bcount) 130 bpref = 0; 131 if (bpref == 0) 132 cg = ino_to_cg(fs, ip->i_number); 133 else 134 cg = dtog(fs, bpref); 135 bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 136 ext2_alloccg); 137 if (bno > 0) { 138 /* set next_alloc fields as done in block_getblk */ 139 ip->i_next_alloc_block = lbn; 140 ip->i_next_alloc_goal = bno; 141 142 ip->i_blocks += btodb(fs->e2fs_bsize); 143 ip->i_flag |= IN_CHANGE | IN_UPDATE; 144 *bnp = bno; 145 return (0); 146 } 147nospace: 148 EXT2_UNLOCK(ump); 149 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate data block"); 150 return (ENOSPC); 151} 152 153/* 154 * Allocate EA's block for inode. 155 */ 156e4fs_daddr_t 157ext2_alloc_meta(struct inode *ip) 158{ 159 struct m_ext2fs *fs; 160 daddr_t blk; 161 162 fs = ip->i_e2fs; 163 164 EXT2_LOCK(ip->i_ump); 165 blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize, 166 ext2_alloccg); 167 if (0 == blk) { 168 EXT2_UNLOCK(ip->i_ump); 169 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate meta block"); 170 } 171 172 return (blk); 173} 174 175/* 176 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 177 * 178 * The vnode and an array of buffer pointers for a range of sequential 179 * logical blocks to be made contiguous is given. The allocator attempts 180 * to find a range of sequential blocks starting as close as possible to 181 * an fs_rotdelay offset from the end of the allocation for the logical 182 * block immediately preceding the current range. If successful, the 183 * physical block numbers in the buffer pointers and in the inode are 184 * changed to reflect the new allocation. If unsuccessful, the allocation 185 * is left unchanged. The success in doing the reallocation is returned. 186 * Note that the error return is not reflected back to the user. Rather 187 * the previous block allocation will be used. 188 */ 189 190static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem"); 191 192static int doasyncfree = 1; 193 194SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, 195 "Use asynchronous writes to update block pointers when freeing blocks"); 196 197static int doreallocblks = 0; 198 199SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 200 201int 202ext2_reallocblks(struct vop_reallocblks_args *ap) 203{ 204 struct m_ext2fs *fs; 205 struct inode *ip; 206 struct vnode *vp; 207 struct buf *sbp, *ebp; 208 uint32_t *bap, *sbap, *ebap; 209 struct ext2mount *ump; 210 struct cluster_save *buflist; 211 struct indir start_ap[EXT2_NIADDR + 1], end_ap[EXT2_NIADDR + 1], *idp; 212 e2fs_lbn_t start_lbn, end_lbn; 213 int soff; 214 e2fs_daddr_t newblk, blkno; 215 int i, len, start_lvl, end_lvl, pref, ssize; 216 217 if (doreallocblks == 0) 218 return (ENOSPC); 219 220 vp = ap->a_vp; 221 ip = VTOI(vp); 222 fs = ip->i_e2fs; 223 ump = ip->i_ump; 224 225 if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS) 226 return (ENOSPC); 227 228 buflist = ap->a_buflist; 229 len = buflist->bs_nchildren; 230 start_lbn = buflist->bs_children[0]->b_lblkno; 231 end_lbn = start_lbn + len - 1; 232#ifdef INVARIANTS 233 for (i = 1; i < len; i++) 234 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 235 panic("ext2_reallocblks: non-cluster"); 236#endif 237 /* 238 * If the cluster crosses the boundary for the first indirect 239 * block, leave space for the indirect block. Indirect blocks 240 * are initially laid out in a position after the last direct 241 * block. Block reallocation would usually destroy locality by 242 * moving the indirect block out of the way to make room for 243 * data blocks if we didn't compensate here. We should also do 244 * this for other indirect block boundaries, but it is only 245 * important for the first one. 246 */ 247 if (start_lbn < EXT2_NDADDR && end_lbn >= EXT2_NDADDR) 248 return (ENOSPC); 249 /* 250 * If the latest allocation is in a new cylinder group, assume that 251 * the filesystem has decided to move and do not force it back to 252 * the previous cylinder group. 253 */ 254 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 255 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 256 return (ENOSPC); 257 if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || 258 ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) 259 return (ENOSPC); 260 /* 261 * Get the starting offset and block map for the first block. 262 */ 263 if (start_lvl == 0) { 264 sbap = &ip->i_db[0]; 265 soff = start_lbn; 266 } else { 267 idp = &start_ap[start_lvl - 1]; 268 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) { 269 brelse(sbp); 270 return (ENOSPC); 271 } 272 sbap = (u_int *)sbp->b_data; 273 soff = idp->in_off; 274 } 275 /* 276 * If the block range spans two block maps, get the second map. 277 */ 278 ebap = NULL; 279 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 280 ssize = len; 281 } else { 282#ifdef INVARIANTS 283 if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 284 panic("ext2_reallocblks: start == end"); 285#endif 286 ssize = len - (idp->in_off + 1); 287 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)) 288 goto fail; 289 ebap = (u_int *)ebp->b_data; 290 } 291 /* 292 * Find the preferred location for the cluster. 293 */ 294 EXT2_LOCK(ump); 295 pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); 296 /* 297 * Search the block map looking for an allocation of the desired size. 298 */ 299 if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref, 300 len, ext2_clusteralloc)) == 0) { 301 EXT2_UNLOCK(ump); 302 goto fail; 303 } 304 /* 305 * We have found a new contiguous block. 306 * 307 * First we have to replace the old block pointers with the new 308 * block pointers in the inode and indirect blocks associated 309 * with the file. 310 */ 311 SDT_PROBE3(ext2fs, , alloc, ext2_reallocblks_realloc, 312 ip->i_number, start_lbn, end_lbn); 313 blkno = newblk; 314 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 315 if (i == ssize) { 316 bap = ebap; 317 soff = -i; 318 } 319#ifdef INVARIANTS 320 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 321 panic("ext2_reallocblks: alloc mismatch"); 322#endif 323 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_bap, *bap); 324 *bap++ = blkno; 325 } 326 /* 327 * Next we must write out the modified inode and indirect blocks. 328 * For strict correctness, the writes should be synchronous since 329 * the old block values may have been written to disk. In practise 330 * they are almost never written, but if we are concerned about 331 * strict correctness, the `doasyncfree' flag should be set to zero. 332 * 333 * The test on `doasyncfree' should be changed to test a flag 334 * that shows whether the associated buffers and inodes have 335 * been written. The flag should be set when the cluster is 336 * started and cleared whenever the buffer or inode is flushed. 337 * We can then check below to see if it is set, and do the 338 * synchronous write only when it has been cleared. 339 */ 340 if (sbap != &ip->i_db[0]) { 341 if (doasyncfree) 342 bdwrite(sbp); 343 else 344 bwrite(sbp); 345 } else { 346 ip->i_flag |= IN_CHANGE | IN_UPDATE; 347 if (!doasyncfree) 348 ext2_update(vp, 1); 349 } 350 if (ssize < len) { 351 if (doasyncfree) 352 bdwrite(ebp); 353 else 354 bwrite(ebp); 355 } 356 /* 357 * Last, free the old blocks and assign the new blocks to the buffers. 358 */ 359 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 360 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 361 fs->e2fs_bsize); 362 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 363 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_blkno, blkno); 364 } 365 366 return (0); 367 368fail: 369 if (ssize < len) 370 brelse(ebp); 371 if (sbap != &ip->i_db[0]) 372 brelse(sbp); 373 return (ENOSPC); 374} 375 376/* 377 * Allocate an inode in the filesystem. 378 * 379 */ 380int 381ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) 382{ 383 struct timespec ts; 384 struct m_ext2fs *fs; 385 struct ext2mount *ump; 386 struct inode *pip; 387 struct inode *ip; 388 struct vnode *vp; 389 struct thread *td; 390 ino_t ino, ipref; 391 int error, cg; 392 393 *vpp = NULL; 394 pip = VTOI(pvp); 395 fs = pip->i_e2fs; 396 ump = pip->i_ump; 397 398 EXT2_LOCK(ump); 399 if (fs->e2fs->e2fs_ficount == 0) 400 goto noinodes; 401 /* 402 * If it is a directory then obtain a cylinder group based on 403 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is 404 * always the next inode. 405 */ 406 if ((mode & IFMT) == IFDIR) { 407 cg = ext2_dirpref(pip); 408 if (fs->e2fs_contigdirs[cg] < 255) 409 fs->e2fs_contigdirs[cg]++; 410 } else { 411 cg = ino_to_cg(fs, pip->i_number); 412 if (fs->e2fs_contigdirs[cg] > 0) 413 fs->e2fs_contigdirs[cg]--; 414 } 415 ipref = cg * fs->e2fs->e2fs_ipg + 1; 416 ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg); 417 if (ino == 0) 418 goto noinodes; 419 420 td = curthread; 421 error = vfs_hash_get(ump->um_mountp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 422 if (error || *vpp != NULL) { 423 return (error); 424 } 425 426 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 427 428 /* Allocate a new vnode/inode. */ 429 if ((error = getnewvnode("ext2fs", ump->um_mountp, &ext2_vnodeops, &vp)) != 0) { 430 free(ip, M_EXT2NODE); 431 return (error); 432 } 433 434 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 435 vp->v_data = ip; 436 ip->i_vnode = vp; 437 ip->i_e2fs = fs = ump->um_e2fs; 438 ip->i_ump = ump; 439 ip->i_number = ino; 440 ip->i_block_group = ino_to_cg(fs, ino); 441 ip->i_next_alloc_block = 0; 442 ip->i_next_alloc_goal = 0; 443 444 error = insmntque(vp, ump->um_mountp); 445 if (error) { 446 free(ip, M_EXT2NODE); 447 return (error); 448 } 449 450 error = vfs_hash_insert(vp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 451 if (error || *vpp != NULL) { 452 *vpp = NULL; 453 free(ip, M_EXT2NODE); 454 return (error); 455 } 456 457 if ((error = ext2_vinit(ump->um_mountp, &ext2_fifoops, &vp)) != 0) { 458 vput(vp); 459 *vpp = NULL; 460 free(ip, M_EXT2NODE); 461 return (error); 462 } 463 464 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS) 465 && (S_ISREG(mode) || S_ISDIR(mode))) 466 ext4_ext_tree_init(ip); 467 else 468 memset(ip->i_data, 0, sizeof(ip->i_data)); 469 470 471 /* 472 * Set up a new generation number for this inode. 473 * Avoid zero values. 474 */ 475 do { 476 ip->i_gen = arc4random(); 477 } while (ip->i_gen == 0); 478 479 vfs_timestamp(&ts); 480 ip->i_birthtime = ts.tv_sec; 481 ip->i_birthnsec = ts.tv_nsec; 482 483 *vpp = vp; 484 485 return (0); 486 487noinodes: 488 EXT2_UNLOCK(ump); 489 SDT_PROBE2(ext2fs, , alloc, trace, 1, "out of inodes"); 490 return (ENOSPC); 491} 492 493/* 494 * 64-bit compatible getters and setters for struct ext2_gd from ext2fs.h 495 */ 496uint64_t 497e2fs_gd_get_b_bitmap(struct ext2_gd *gd) 498{ 499 500 return (((uint64_t)(gd->ext4bgd_b_bitmap_hi) << 32) | 501 gd->ext2bgd_b_bitmap); 502} 503 504uint64_t 505e2fs_gd_get_i_bitmap(struct ext2_gd *gd) 506{ 507 508 return (((uint64_t)(gd->ext4bgd_i_bitmap_hi) << 32) | 509 gd->ext2bgd_i_bitmap); 510} 511 512uint64_t 513e2fs_gd_get_i_tables(struct ext2_gd *gd) 514{ 515 516 return (((uint64_t)(gd->ext4bgd_i_tables_hi) << 32) | 517 gd->ext2bgd_i_tables); 518} 519 520static uint32_t 521e2fs_gd_get_nbfree(struct ext2_gd *gd) 522{ 523 524 return (((uint32_t)(gd->ext4bgd_nbfree_hi) << 16) | 525 gd->ext2bgd_nbfree); 526} 527 528static void 529e2fs_gd_set_nbfree(struct ext2_gd *gd, uint32_t val) 530{ 531 532 gd->ext2bgd_nbfree = val & 0xffff; 533 gd->ext4bgd_nbfree_hi = val >> 16; 534} 535 536static uint32_t 537e2fs_gd_get_nifree(struct ext2_gd *gd) 538{ 539 540 return (((uint32_t)(gd->ext4bgd_nifree_hi) << 16) | 541 gd->ext2bgd_nifree); 542} 543 544static void 545e2fs_gd_set_nifree(struct ext2_gd *gd, uint32_t val) 546{ 547 548 gd->ext2bgd_nifree = val & 0xffff; 549 gd->ext4bgd_nifree_hi = val >> 16; 550} 551 552uint32_t 553e2fs_gd_get_ndirs(struct ext2_gd *gd) 554{ 555 556 return (((uint32_t)(gd->ext4bgd_ndirs_hi) << 16) | 557 gd->ext2bgd_ndirs); 558} 559 560static void 561e2fs_gd_set_ndirs(struct ext2_gd *gd, uint32_t val) 562{ 563 564 gd->ext2bgd_ndirs = val & 0xffff; 565 gd->ext4bgd_ndirs_hi = val >> 16; 566} 567 568static uint32_t 569e2fs_gd_get_i_unused(struct ext2_gd *gd) 570{ 571 return (((uint32_t)(gd->ext4bgd_i_unused_hi) << 16) | 572 gd->ext4bgd_i_unused); 573} 574 575static void 576e2fs_gd_set_i_unused(struct ext2_gd *gd, uint32_t val) 577{ 578 579 gd->ext4bgd_i_unused = val & 0xffff; 580 gd->ext4bgd_i_unused_hi = val >> 16; 581} 582 583/* 584 * Find a cylinder to place a directory. 585 * 586 * The policy implemented by this algorithm is to allocate a 587 * directory inode in the same cylinder group as its parent 588 * directory, but also to reserve space for its files inodes 589 * and data. Restrict the number of directories which may be 590 * allocated one after another in the same cylinder group 591 * without intervening allocation of files. 592 * 593 * If we allocate a first level directory then force allocation 594 * in another cylinder group. 595 * 596 */ 597static u_long 598ext2_dirpref(struct inode *pip) 599{ 600 struct m_ext2fs *fs; 601 int cg, prefcg, cgsize; 602 uint64_t avgbfree, minbfree; 603 u_int avgifree, avgndir, curdirsize; 604 u_int minifree, maxndir; 605 u_int mincg, minndir; 606 u_int dirsize, maxcontigdirs; 607 608 mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); 609 fs = pip->i_e2fs; 610 611 avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount; 612 avgbfree = fs->e2fs_fbcount / fs->e2fs_gcount; 613 avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; 614 615 /* 616 * Force allocation in another cg if creating a first level dir. 617 */ 618 ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); 619 if (ITOV(pip)->v_vflag & VV_ROOT) { 620 prefcg = arc4random() % fs->e2fs_gcount; 621 mincg = prefcg; 622 minndir = fs->e2fs_ipg; 623 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 624 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 625 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 626 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 627 mincg = cg; 628 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 629 } 630 for (cg = 0; cg < prefcg; cg++) 631 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 632 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 633 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 634 mincg = cg; 635 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 636 } 637 return (mincg); 638 } 639 /* 640 * Count various limits which used for 641 * optimal allocation of a directory inode. 642 */ 643 maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); 644 minifree = avgifree - avgifree / 4; 645 if (minifree < 1) 646 minifree = 1; 647 minbfree = avgbfree - avgbfree / 4; 648 if (minbfree < 1) 649 minbfree = 1; 650 cgsize = fs->e2fs_fsize * fs->e2fs_fpg; 651 dirsize = AVGDIRSIZE; 652 curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; 653 if (dirsize < curdirsize) 654 dirsize = curdirsize; 655 maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); 656 maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); 657 if (maxcontigdirs == 0) 658 maxcontigdirs = 1; 659 660 /* 661 * Limit number of dirs in one cg and reserve space for 662 * regular files, but only if we have no deficit in 663 * inodes or space. 664 */ 665 prefcg = ino_to_cg(fs, pip->i_number); 666 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 667 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 668 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 669 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 670 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 671 return (cg); 672 } 673 for (cg = 0; cg < prefcg; cg++) 674 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 675 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 676 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 677 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 678 return (cg); 679 } 680 /* 681 * This is a backstop when we have deficit in space. 682 */ 683 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 684 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 685 return (cg); 686 for (cg = 0; cg < prefcg; cg++) 687 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 688 break; 689 return (cg); 690} 691 692/* 693 * Select the desired position for the next block in a file. 694 * 695 * we try to mimic what Remy does in inode_getblk/block_getblk 696 * 697 * we note: blocknr == 0 means that we're about to allocate either 698 * a direct block or a pointer block at the first level of indirection 699 * (In other words, stuff that will go in i_db[] or i_ib[]) 700 * 701 * blocknr != 0 means that we're allocating a block that is none 702 * of the above. Then, blocknr tells us the number of the block 703 * that will hold the pointer 704 */ 705e4fs_daddr_t 706ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap, 707 e2fs_daddr_t blocknr) 708{ 709 struct m_ext2fs *fs; 710 int tmp; 711 712 fs = ip->i_e2fs; 713 714 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 715 716 /* 717 * If the next block is actually what we thought it is, then set the 718 * goal to what we thought it should be. 719 */ 720 if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0) 721 return ip->i_next_alloc_goal; 722 723 /* 724 * Now check whether we were provided with an array that basically 725 * tells us previous blocks to which we want to stay close. 726 */ 727 if (bap) 728 for (tmp = indx - 1; tmp >= 0; tmp--) 729 if (bap[tmp]) 730 return bap[tmp]; 731 732 /* 733 * Else lets fall back to the blocknr or, if there is none, follow 734 * the rule that a block should be allocated near its inode. 735 */ 736 return (blocknr ? blocknr : 737 (e2fs_daddr_t)(ip->i_block_group * 738 EXT2_BLOCKS_PER_GROUP(fs)) + fs->e2fs->e2fs_first_dblock); 739} 740 741/* 742 * Implement the cylinder overflow algorithm. 743 * 744 * The policy implemented by this algorithm is: 745 * 1) allocate the block in its requested cylinder group. 746 * 2) quadradically rehash on the cylinder group number. 747 * 3) brute force search for a free block. 748 */ 749static e4fs_daddr_t 750ext2_hashalloc(struct inode *ip, int cg, long pref, int size, 751 daddr_t (*allocator) (struct inode *, int, daddr_t, int)) 752{ 753 struct m_ext2fs *fs; 754 e4fs_daddr_t result; 755 int i, icg = cg; 756 757 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 758 fs = ip->i_e2fs; 759 /* 760 * 1: preferred cylinder group 761 */ 762 result = (*allocator)(ip, cg, pref, size); 763 if (result) 764 return (result); 765 /* 766 * 2: quadratic rehash 767 */ 768 for (i = 1; i < fs->e2fs_gcount; i *= 2) { 769 cg += i; 770 if (cg >= fs->e2fs_gcount) 771 cg -= fs->e2fs_gcount; 772 result = (*allocator)(ip, cg, 0, size); 773 if (result) 774 return (result); 775 } 776 /* 777 * 3: brute force search 778 * Note that we start at i == 2, since 0 was checked initially, 779 * and 1 is always checked in the quadratic rehash. 780 */ 781 cg = (icg + 2) % fs->e2fs_gcount; 782 for (i = 2; i < fs->e2fs_gcount; i++) { 783 result = (*allocator)(ip, cg, 0, size); 784 if (result) 785 return (result); 786 cg++; 787 if (cg == fs->e2fs_gcount) 788 cg = 0; 789 } 790 return (0); 791} 792 793static uint64_t 794ext2_cg_number_gdb_nometa(struct m_ext2fs *fs, int cg) 795{ 796 797 if (!ext2_cg_has_sb(fs, cg)) 798 return (0); 799 800 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG)) 801 return (fs->e2fs->e3fs_first_meta_bg); 802 803 return ((fs->e2fs_gcount + EXT2_DESCS_PER_BLOCK(fs) - 1) / 804 EXT2_DESCS_PER_BLOCK(fs)); 805} 806 807static uint64_t 808ext2_cg_number_gdb_meta(struct m_ext2fs *fs, int cg) 809{ 810 unsigned long metagroup; 811 int first, last; 812 813 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 814 first = metagroup * EXT2_DESCS_PER_BLOCK(fs); 815 last = first + EXT2_DESCS_PER_BLOCK(fs) - 1; 816 817 if (cg == first || cg == first + 1 || cg == last) 818 return (1); 819 820 return (0); 821} 822 823uint64_t 824ext2_cg_number_gdb(struct m_ext2fs *fs, int cg) 825{ 826 unsigned long first_meta_bg, metagroup; 827 828 first_meta_bg = fs->e2fs->e3fs_first_meta_bg; 829 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 830 831 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 832 metagroup < first_meta_bg) 833 return (ext2_cg_number_gdb_nometa(fs, cg)); 834 835 return ext2_cg_number_gdb_meta(fs, cg); 836} 837 838static int 839ext2_number_base_meta_blocks(struct m_ext2fs *fs, int cg) 840{ 841 int number; 842 843 number = ext2_cg_has_sb(fs, cg); 844 845 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 846 cg < fs->e2fs->e3fs_first_meta_bg * EXT2_DESCS_PER_BLOCK(fs)) { 847 if (number) { 848 number += ext2_cg_number_gdb(fs, cg); 849 number += fs->e2fs->e2fs_reserved_ngdb; 850 } 851 } else { 852 number += ext2_cg_number_gdb(fs, cg); 853 } 854 855 return (number); 856} 857 858static void 859ext2_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 860{ 861 int i; 862 863 if (start_bit >= end_bit) 864 return; 865 866 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) 867 setbit(bitmap, i); 868 if (i < end_bit) 869 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); 870} 871 872static int 873ext2_get_group_number(struct m_ext2fs *fs, e4fs_daddr_t block) 874{ 875 876 return ((block - fs->e2fs->e2fs_first_dblock) / fs->e2fs_bsize); 877} 878 879static int 880ext2_block_in_group(struct m_ext2fs *fs, e4fs_daddr_t block, int cg) 881{ 882 883 return ((ext2_get_group_number(fs, block) == cg) ? 1 : 0); 884} 885 886static int 887ext2_cg_block_bitmap_init(struct m_ext2fs *fs, int cg, struct buf *bp) 888{ 889 int bit, bit_max, inodes_per_block; 890 uint64_t start, tmp; 891 892 if (!(fs->e2fs_gd[cg].ext4bgd_flags & EXT2_BG_BLOCK_UNINIT)) 893 return (0); 894 895 memset(bp->b_data, 0, fs->e2fs_bsize); 896 897 bit_max = ext2_number_base_meta_blocks(fs, cg); 898 if ((bit_max >> 3) >= fs->e2fs_bsize) 899 return (EINVAL); 900 901 for (bit = 0; bit < bit_max; bit++) 902 setbit(bp->b_data, bit); 903 904 start = (uint64_t)cg * fs->e2fs->e2fs_bpg + fs->e2fs->e2fs_first_dblock; 905 906 /* Set bits for block and inode bitmaps, and inode table. */ 907 tmp = e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg]); 908 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 909 ext2_block_in_group(fs, tmp, cg)) 910 setbit(bp->b_data, tmp - start); 911 912 tmp = e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg]); 913 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 914 ext2_block_in_group(fs, tmp, cg)) 915 setbit(bp->b_data, tmp - start); 916 917 tmp = e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]); 918 inodes_per_block = fs->e2fs_bsize/EXT2_INODE_SIZE(fs); 919 while( tmp < e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + 920 fs->e2fs->e2fs_ipg / inodes_per_block ) { 921 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 922 ext2_block_in_group(fs, tmp, cg)) 923 setbit(bp->b_data, tmp - start); 924 tmp++; 925 } 926 927 /* 928 * Also if the number of blocks within the group is less than 929 * the blocksize * 8 ( which is the size of bitmap ), set rest 930 * of the block bitmap to 1 931 */ 932 ext2_mark_bitmap_end(fs->e2fs->e2fs_bpg, fs->e2fs_bsize * 8, 933 bp->b_data); 934 935 /* Clean the flag */ 936 fs->e2fs_gd[cg].ext4bgd_flags &= ~EXT2_BG_BLOCK_UNINIT; 937 938 return (0); 939} 940 941static int 942ext2_b_bitmap_validate(struct m_ext2fs *fs, struct buf *bp, int cg) 943{ 944 struct ext2_gd *gd; 945 uint64_t group_first_block; 946 unsigned int offset, max_bit; 947 948 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) { 949 /* 950 * It is not possible to check block bitmap in case of this feature, 951 * because the inode and block bitmaps and inode table 952 * blocks may not be in the group at all. 953 * So, skip check in this case. 954 */ 955 return (0); 956 } 957 958 gd = &fs->e2fs_gd[cg]; 959 max_bit = fs->e2fs_fpg; 960 group_first_block = ((uint64_t)cg) * fs->e2fs->e2fs_fpg + 961 fs->e2fs->e2fs_first_dblock; 962 963 /* Check block bitmap block number */ 964 offset = e2fs_gd_get_b_bitmap(gd) - group_first_block; 965 if (offset >= max_bit || !isset(bp->b_data, offset)) { 966 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 967 "bad block bitmap, group", cg); 968 return (EINVAL); 969 } 970 971 /* Check inode bitmap block number */ 972 offset = e2fs_gd_get_i_bitmap(gd) - group_first_block; 973 if (offset >= max_bit || !isset(bp->b_data, offset)) { 974 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 975 "bad inode bitmap", cg); 976 return (EINVAL); 977 } 978 979 /* Check inode table */ 980 offset = e2fs_gd_get_i_tables(gd) - group_first_block; 981 if (offset >= max_bit || offset + fs->e2fs_itpg >= max_bit) { 982 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 983 "bad inode table, group", cg); 984 return (EINVAL); 985 } 986 987 return (0); 988} 989 990/* 991 * Determine whether a block can be allocated. 992 * 993 * Check to see if a block of the appropriate size is available, 994 * and if it is, allocate it. 995 */ 996static daddr_t 997ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 998{ 999 struct m_ext2fs *fs; 1000 struct buf *bp; 1001 struct ext2mount *ump; 1002 daddr_t bno, runstart, runlen; 1003 int bit, loc, end, error, start; 1004 char *bbp; 1005 /* XXX ondisk32 */ 1006 fs = ip->i_e2fs; 1007 ump = ip->i_ump; 1008 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1009 return (0); 1010 1011 EXT2_UNLOCK(ump); 1012 error = bread(ip->i_devvp, fsbtodb(fs, 1013 e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1014 (int)fs->e2fs_bsize, NOCRED, &bp); 1015 if (error) 1016 goto fail; 1017 1018 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1019 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1020 error = ext2_cg_block_bitmap_init(fs, cg, bp); 1021 if (error) 1022 goto fail; 1023 1024 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1025 } 1026 error = ext2_gd_b_bitmap_csum_verify(fs, cg, bp); 1027 if (error) 1028 goto fail; 1029 1030 error = ext2_b_bitmap_validate(fs,bp, cg); 1031 if (error) 1032 goto fail; 1033 1034 /* 1035 * Check, that another thread did not not allocate the last block in this 1036 * group while we were waiting for the buffer. 1037 */ 1038 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1039 goto fail; 1040 1041 bbp = (char *)bp->b_data; 1042 1043 if (dtog(fs, bpref) != cg) 1044 bpref = 0; 1045 if (bpref != 0) { 1046 bpref = dtogd(fs, bpref); 1047 /* 1048 * if the requested block is available, use it 1049 */ 1050 if (isclr(bbp, bpref)) { 1051 bno = bpref; 1052 goto gotit; 1053 } 1054 } 1055 /* 1056 * no blocks in the requested cylinder, so take next 1057 * available one in this cylinder group. 1058 * first try to get 8 contigous blocks, then fall back to a single 1059 * block. 1060 */ 1061 if (bpref) 1062 start = dtogd(fs, bpref) / NBBY; 1063 else 1064 start = 0; 1065 end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; 1066retry: 1067 runlen = 0; 1068 runstart = 0; 1069 for (loc = start; loc < end; loc++) { 1070 if (bbp[loc] == (char)0xff) { 1071 runlen = 0; 1072 continue; 1073 } 1074 1075 /* Start of a run, find the number of high clear bits. */ 1076 if (runlen == 0) { 1077 bit = fls(bbp[loc]); 1078 runlen = NBBY - bit; 1079 runstart = loc * NBBY + bit; 1080 } else if (bbp[loc] == 0) { 1081 /* Continue a run. */ 1082 runlen += NBBY; 1083 } else { 1084 /* 1085 * Finish the current run. If it isn't long 1086 * enough, start a new one. 1087 */ 1088 bit = ffs(bbp[loc]) - 1; 1089 runlen += bit; 1090 if (runlen >= 8) { 1091 bno = runstart; 1092 goto gotit; 1093 } 1094 1095 /* Run was too short, start a new one. */ 1096 bit = fls(bbp[loc]); 1097 runlen = NBBY - bit; 1098 runstart = loc * NBBY + bit; 1099 } 1100 1101 /* If the current run is long enough, use it. */ 1102 if (runlen >= 8) { 1103 bno = runstart; 1104 goto gotit; 1105 } 1106 } 1107 if (start != 0) { 1108 end = start; 1109 start = 0; 1110 goto retry; 1111 } 1112 bno = ext2_mapsearch(fs, bbp, bpref); 1113 if (bno < 0) 1114 goto fail; 1115 1116gotit: 1117#ifdef INVARIANTS 1118 if (isset(bbp, bno)) { 1119 printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n", 1120 cg, (intmax_t)bno, fs->e2fs_fsmnt); 1121 panic("ext2fs_alloccg: dup alloc"); 1122 } 1123#endif 1124 setbit(bbp, bno); 1125 EXT2_LOCK(ump); 1126 ext2_clusteracct(fs, bbp, cg, bno, -1); 1127 fs->e2fs_fbcount--; 1128 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1129 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1130 fs->e2fs_fmod = 1; 1131 EXT2_UNLOCK(ump); 1132 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1133 bdwrite(bp); 1134 return (((uint64_t)cg) * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); 1135 1136fail: 1137 brelse(bp); 1138 EXT2_LOCK(ump); 1139 return (0); 1140} 1141 1142/* 1143 * Determine whether a cluster can be allocated. 1144 */ 1145static daddr_t 1146ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) 1147{ 1148 struct m_ext2fs *fs; 1149 struct ext2mount *ump; 1150 struct buf *bp; 1151 char *bbp; 1152 int bit, error, got, i, loc, run; 1153 int32_t *lp; 1154 daddr_t bno; 1155 1156 fs = ip->i_e2fs; 1157 ump = ip->i_ump; 1158 1159 if (fs->e2fs_maxcluster[cg] < len) 1160 return (0); 1161 1162 EXT2_UNLOCK(ump); 1163 error = bread(ip->i_devvp, 1164 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1165 (int)fs->e2fs_bsize, NOCRED, &bp); 1166 if (error) 1167 goto fail_lock; 1168 1169 bbp = (char *)bp->b_data; 1170 EXT2_LOCK(ump); 1171 /* 1172 * Check to see if a cluster of the needed size (or bigger) is 1173 * available in this cylinder group. 1174 */ 1175 lp = &fs->e2fs_clustersum[cg].cs_sum[len]; 1176 for (i = len; i <= fs->e2fs_contigsumsize; i++) 1177 if (*lp++ > 0) 1178 break; 1179 if (i > fs->e2fs_contigsumsize) { 1180 /* 1181 * Update the cluster summary information to reflect 1182 * the true maximum-sized cluster so that future cluster 1183 * allocation requests can avoid reading the bitmap only 1184 * to find no cluster. 1185 */ 1186 lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1]; 1187 for (i = len - 1; i > 0; i--) 1188 if (*lp-- > 0) 1189 break; 1190 fs->e2fs_maxcluster[cg] = i; 1191 goto fail; 1192 } 1193 EXT2_UNLOCK(ump); 1194 1195 /* Search the bitmap to find a big enough cluster like in FFS. */ 1196 if (dtog(fs, bpref) != cg) 1197 bpref = 0; 1198 if (bpref != 0) 1199 bpref = dtogd(fs, bpref); 1200 loc = bpref / NBBY; 1201 bit = 1 << (bpref % NBBY); 1202 for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) { 1203 if ((bbp[loc] & bit) != 0) 1204 run = 0; 1205 else { 1206 run++; 1207 if (run == len) 1208 break; 1209 } 1210 if ((got & (NBBY - 1)) != (NBBY - 1)) 1211 bit <<= 1; 1212 else { 1213 loc++; 1214 bit = 1; 1215 } 1216 } 1217 1218 if (got >= fs->e2fs->e2fs_fpg) 1219 goto fail_lock; 1220 1221 /* Allocate the cluster that we found. */ 1222 for (i = 1; i < len; i++) 1223 if (!isclr(bbp, got - run + i)) 1224 panic("ext2_clusteralloc: map mismatch"); 1225 1226 bno = got - run + 1; 1227 if (bno >= fs->e2fs->e2fs_fpg) 1228 panic("ext2_clusteralloc: allocated out of group"); 1229 1230 EXT2_LOCK(ump); 1231 for (i = 0; i < len; i += fs->e2fs_fpb) { 1232 setbit(bbp, bno + i); 1233 ext2_clusteracct(fs, bbp, cg, bno + i, -1); 1234 fs->e2fs_fbcount--; 1235 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1236 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1237 } 1238 fs->e2fs_fmod = 1; 1239 EXT2_UNLOCK(ump); 1240 1241 bdwrite(bp); 1242 return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); 1243 1244fail_lock: 1245 EXT2_LOCK(ump); 1246fail: 1247 brelse(bp); 1248 return (0); 1249} 1250 1251static int 1252ext2_zero_inode_table(struct inode *ip, int cg) 1253{ 1254 struct m_ext2fs *fs; 1255 struct buf *bp; 1256 int i, all_blks, used_blks; 1257 1258 fs = ip->i_e2fs; 1259 1260 if (fs->e2fs_gd[cg].ext4bgd_flags & EXT2_BG_INODE_ZEROED) 1261 return (0); 1262 1263 all_blks = fs->e2fs->e2fs_inode_size * fs->e2fs->e2fs_ipg / 1264 fs->e2fs_bsize; 1265 1266 used_blks = howmany(fs->e2fs->e2fs_ipg - 1267 e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]), 1268 fs->e2fs_bsize / EXT2_INODE_SIZE(fs)); 1269 1270 for (i = 0; i < all_blks - used_blks; i++) { 1271 bp = getblk(ip->i_devvp, fsbtodb(fs, 1272 e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + used_blks + i), 1273 fs->e2fs_bsize, 0, 0, 0); 1274 if (!bp) 1275 return (EIO); 1276 1277 vfs_bio_bzero_buf(bp, 0, fs->e2fs_bsize); 1278 bawrite(bp); 1279 } 1280 1281 fs->e2fs_gd[cg].ext4bgd_flags |= EXT2_BG_INODE_ZEROED; 1282 1283 return (0); 1284} 1285 1286static void 1287ext2_fix_bitmap_tail(unsigned char *bitmap, int first, int last) 1288{ 1289 int i; 1290 1291 for (i = first; i <= last; i++) 1292 bitmap[i] = 0xff; 1293} 1294 1295 1296/* 1297 * Determine whether an inode can be allocated. 1298 * 1299 * Check to see if an inode is available, and if it is, 1300 * allocate it using tode in the specified cylinder group. 1301 */ 1302static daddr_t 1303ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 1304{ 1305 struct m_ext2fs *fs; 1306 struct buf *bp; 1307 struct ext2mount *ump; 1308 int error, start, len, ifree, ibytes; 1309 char *ibp, *loc; 1310 1311 ipref--; /* to avoid a lot of (ipref -1) */ 1312 if (ipref == -1) 1313 ipref = 0; 1314 fs = ip->i_e2fs; 1315 ump = ip->i_ump; 1316 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) 1317 return (0); 1318 EXT2_UNLOCK(ump); 1319 error = bread(ip->i_devvp, fsbtodb(fs, 1320 e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1321 (int)fs->e2fs_bsize, NOCRED, &bp); 1322 if (error) { 1323 brelse(bp); 1324 EXT2_LOCK(ump); 1325 return (0); 1326 } 1327 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1328 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1329 if (fs->e2fs_gd[cg].ext4bgd_flags & EXT2_BG_INODE_UNINIT) { 1330 ibytes = fs->e2fs_ipg / 8; 1331 memset(bp->b_data, 0, ibytes - 1); 1332 ext2_fix_bitmap_tail(bp->b_data, ibytes, 1333 fs->e2fs_bsize - 1); 1334 fs->e2fs_gd[cg].ext4bgd_flags &= ~EXT2_BG_INODE_UNINIT; 1335 } 1336 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1337 error = ext2_zero_inode_table(ip, cg); 1338 if (error) { 1339 brelse(bp); 1340 EXT2_LOCK(ump); 1341 return (0); 1342 } 1343 } 1344 error = ext2_gd_i_bitmap_csum_verify(fs, cg, bp); 1345 if (error) { 1346 brelse(bp); 1347 EXT2_LOCK(ump); 1348 return (0); 1349 } 1350 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) { 1351 /* 1352 * Another thread allocated the last i-node in this 1353 * group while we were waiting for the buffer. 1354 */ 1355 brelse(bp); 1356 EXT2_LOCK(ump); 1357 return (0); 1358 } 1359 ibp = (char *)bp->b_data; 1360 if (ipref) { 1361 ipref %= fs->e2fs->e2fs_ipg; 1362 if (isclr(ibp, ipref)) 1363 goto gotit; 1364 } 1365 start = ipref / NBBY; 1366 len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY); 1367 loc = memcchr(&ibp[start], 0xff, len); 1368 if (loc == NULL) { 1369 len = start + 1; 1370 start = 0; 1371 loc = memcchr(&ibp[start], 0xff, len); 1372 if (loc == NULL) { 1373 SDT_PROBE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, 1374 cg, ipref, fs->e2fs_fsmnt); 1375 brelse(bp); 1376 EXT2_LOCK(ump); 1377 return (0); 1378 } 1379 } 1380 ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1; 1381gotit: 1382 setbit(ibp, ipref); 1383 EXT2_LOCK(ump); 1384 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1385 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) - 1); 1386 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1387 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1388 ifree = fs->e2fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]); 1389 if (ipref + 1 > ifree) 1390 e2fs_gd_set_i_unused(&fs->e2fs_gd[cg], 1391 fs->e2fs->e2fs_ipg - (ipref + 1)); 1392 } 1393 fs->e2fs->e2fs_ficount--; 1394 fs->e2fs_fmod = 1; 1395 if ((mode & IFMT) == IFDIR) { 1396 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1397 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) + 1); 1398 fs->e2fs_total_dir++; 1399 } 1400 EXT2_UNLOCK(ump); 1401 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1402 bdwrite(bp); 1403 return ((uint64_t)cg * fs->e2fs_ipg + ipref + 1); 1404} 1405 1406/* 1407 * Free a block or fragment. 1408 * 1409 */ 1410void 1411ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size) 1412{ 1413 struct m_ext2fs *fs; 1414 struct buf *bp; 1415 struct ext2mount *ump; 1416 int cg, error; 1417 char *bbp; 1418 1419 fs = ip->i_e2fs; 1420 ump = ip->i_ump; 1421 cg = dtog(fs, bno); 1422 if (bno >= fs->e2fs_bcount) { 1423 SDT_PROBE2(ext2fs, , alloc, ext2_blkfree_bad_block, ip->i_number, bno); 1424 return; 1425 } 1426 error = bread(ip->i_devvp, 1427 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1428 (int)fs->e2fs_bsize, NOCRED, &bp); 1429 if (error) { 1430 brelse(bp); 1431 return; 1432 } 1433 bbp = (char *)bp->b_data; 1434 bno = dtogd(fs, bno); 1435 if (isclr(bbp, bno)) { 1436 panic("ext2_blkfree: freeing free block %lld, fs=%s", 1437 (long long)bno, fs->e2fs_fsmnt); 1438 } 1439 clrbit(bbp, bno); 1440 EXT2_LOCK(ump); 1441 ext2_clusteracct(fs, bbp, cg, bno, 1); 1442 fs->e2fs_fbcount++; 1443 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1444 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) + 1); 1445 fs->e2fs_fmod = 1; 1446 EXT2_UNLOCK(ump); 1447 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1448 bdwrite(bp); 1449} 1450 1451/* 1452 * Free an inode. 1453 * 1454 */ 1455int 1456ext2_vfree(struct vnode *pvp, ino_t ino, int mode) 1457{ 1458 struct m_ext2fs *fs; 1459 struct inode *pip; 1460 struct buf *bp; 1461 struct ext2mount *ump; 1462 int error, cg; 1463 char *ibp; 1464 1465 pip = VTOI(pvp); 1466 fs = pip->i_e2fs; 1467 ump = pip->i_ump; 1468 if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount) 1469 panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s", 1470 pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt); 1471 1472 cg = ino_to_cg(fs, ino); 1473 error = bread(pip->i_devvp, 1474 fsbtodb(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1475 (int)fs->e2fs_bsize, NOCRED, &bp); 1476 if (error) { 1477 brelse(bp); 1478 return (0); 1479 } 1480 ibp = (char *)bp->b_data; 1481 ino = (ino - 1) % fs->e2fs->e2fs_ipg; 1482 if (isclr(ibp, ino)) { 1483 SDT_PROBE2(ext2fs, , alloc, ext2_vfree_doublefree, 1484 fs->e2fs_fsmnt, ino); 1485 if (fs->e2fs_ronly == 0) 1486 panic("ext2_vfree: freeing free inode"); 1487 } 1488 clrbit(ibp, ino); 1489 EXT2_LOCK(ump); 1490 fs->e2fs->e2fs_ficount++; 1491 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1492 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) + 1); 1493 if ((mode & IFMT) == IFDIR) { 1494 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1495 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) - 1); 1496 fs->e2fs_total_dir--; 1497 } 1498 fs->e2fs_fmod = 1; 1499 EXT2_UNLOCK(ump); 1500 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1501 bdwrite(bp); 1502 return (0); 1503} 1504 1505/* 1506 * Find a block in the specified cylinder group. 1507 * 1508 * It is a panic if a request is made to find a block if none are 1509 * available. 1510 */ 1511static daddr_t 1512ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) 1513{ 1514 char *loc; 1515 int start, len; 1516 1517 /* 1518 * find the fragment by searching through the free block 1519 * map for an appropriate bit pattern 1520 */ 1521 if (bpref) 1522 start = dtogd(fs, bpref) / NBBY; 1523 else 1524 start = 0; 1525 len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; 1526 loc = memcchr(&bbp[start], 0xff, len); 1527 if (loc == NULL) { 1528 len = start + 1; 1529 start = 0; 1530 loc = memcchr(&bbp[start], 0xff, len); 1531 if (loc == NULL) { 1532 panic("ext2_mapsearch: map corrupted: start=%d, len=%d, fs=%s", 1533 start, len, fs->e2fs_fsmnt); 1534 /* NOTREACHED */ 1535 } 1536 } 1537 return ((loc - bbp) * NBBY + ffs(~*loc) - 1); 1538} 1539 1540int 1541ext2_cg_has_sb(struct m_ext2fs *fs, int cg) 1542{ 1543 int a3, a5, a7; 1544 1545 if (cg == 0) 1546 return (1); 1547 1548 if (EXT2_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) { 1549 if (cg == fs->e2fs->e4fs_backup_bgs[0] || 1550 cg == fs->e2fs->e4fs_backup_bgs[1]) 1551 return (1); 1552 return (0); 1553 } 1554 1555 if ((cg <= 1) || 1556 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER)) 1557 return (1); 1558 1559 if (!(cg & 1)) 1560 return (0); 1561 1562 for (a3 = 3, a5 = 5, a7 = 7; 1563 a3 <= cg || a5 <= cg || a7 <= cg; 1564 a3 *= 3, a5 *= 5, a7 *= 7) 1565 if (cg == a3 || cg == a5 || cg == a7) 1566 return (1); 1567 return (0); 1568} 1569