1/* $NetBSD: ext2fs_alloc.c,v 1.58 2024/05/14 19:00:44 andvar Exp $ */ 2 3/* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94 32 * Modified for ext2fs by Manuel Bouyer. 33 */ 34 35/* 36 * Copyright (c) 1997 Manuel Bouyer. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 48 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 49 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 50 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 51 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 52 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 53 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 54 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 55 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 56 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 57 * 58 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94 59 * Modified for ext2fs by Manuel Bouyer. 60 */ 61 62#include <sys/cdefs.h> 63__KERNEL_RCSID(0, "$NetBSD: ext2fs_alloc.c,v 1.58 2024/05/14 19:00:44 andvar Exp $"); 64 65#include <sys/param.h> 66#include <sys/systm.h> 67#include <sys/buf.h> 68#include <sys/proc.h> 69#include <sys/vnode.h> 70#include <sys/mount.h> 71#include <sys/kernel.h> 72#include <sys/syslog.h> 73#include <sys/kauth.h> 74 75#include <lib/libkern/crc16.h> 76 77#include <ufs/ufs/inode.h> 78#include <ufs/ufs/ufs_extern.h> 79#include <ufs/ufs/ufsmount.h> 80 81#include <ufs/ext2fs/ext2fs.h> 82#include <ufs/ext2fs/ext2fs_extern.h> 83 84u_long ext2gennumber; 85 86static daddr_t ext2fs_alloccg(struct inode *, int, daddr_t, int); 87static u_long ext2fs_dirpref(struct m_ext2fs *); 88static void ext2fs_fserr(struct m_ext2fs *, u_int, const char *); 89static u_long ext2fs_hashalloc(struct inode *, int, long, int, 90 daddr_t (*)(struct inode *, int, daddr_t, int)); 91static daddr_t ext2fs_nodealloccg(struct inode *, int, daddr_t, int); 92static daddr_t ext2fs_mapsearch(struct m_ext2fs *, char *, daddr_t); 93static __inline void ext2fs_cg_update(struct m_ext2fs *, int, 94 struct ext2_gd *, int, int, int, daddr_t); 95static uint16_t ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *); 96static void ext2fs_init_bb(struct m_ext2fs *, int, struct ext2_gd *, 97 char *); 98 99/* 100 * Allocate a block in the file system. 101 * 102 * A preference may be optionally specified. If a preference is given 103 * the following hierarchy is used to allocate a block: 104 * 1) allocate the requested block. 105 * 2) allocate a rotationally optimal block in the same cylinder. 106 * 3) allocate a block in the same cylinder group. 107 * 4) quadradically rehash into other cylinder groups, until an 108 * available block is located. 109 * If no block preference is given the following hierarchy is used 110 * to allocate a block: 111 * 1) allocate a block in the cylinder group that contains the 112 * inode for the file. 113 * 2) quadradically rehash into other cylinder groups, until an 114 * available block is located. 115 */ 116int 117ext2fs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, 118 kauth_cred_t cred, daddr_t *bnp) 119{ 120 struct m_ext2fs *fs; 121 daddr_t bno; 122 int cg; 123 124 *bnp = 0; 125 fs = ip->i_e2fs; 126#ifdef DIAGNOSTIC 127 if (cred == NOCRED) 128 panic("ext2fs_alloc: missing credential"); 129#endif /* DIAGNOSTIC */ 130 if (fs->e2fs.e2fs_fbcount == 0) 131 goto nospace; 132 if (kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL, 133 NULL, NULL) != 0 && 134 freespace(fs) <= 0) 135 goto nospace; 136 if (bpref >= fs->e2fs.e2fs_bcount) 137 bpref = 0; 138 if (bpref == 0) 139 cg = ino_to_cg(fs, ip->i_number); 140 else 141 cg = dtog(fs, bpref); 142 bno = (daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 143 ext2fs_alloccg); 144 if (bno > 0) { 145 ext2fs_setnblock(ip, ext2fs_nblock(ip) + btodb(fs->e2fs_bsize)); 146 ip->i_flag |= IN_CHANGE | IN_UPDATE; 147 *bnp = bno; 148 return 0; 149 } 150nospace: 151 ext2fs_fserr(fs, kauth_cred_geteuid(cred), "file system full"); 152 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt); 153 return ENOSPC; 154} 155 156/* 157 * Allocate an inode in the file system. 158 * 159 * If allocating a directory, use ext2fs_dirpref to select the inode. 160 * If allocating in a directory, the following hierarchy is followed: 161 * 1) allocate the preferred inode. 162 * 2) allocate an inode in the same cylinder group. 163 * 3) quadradically rehash into other cylinder groups, until an 164 * available inode is located. 165 * If no inode preference is given the following hierarchy is used 166 * to allocate an inode: 167 * 1) allocate an inode in cylinder group 0. 168 * 2) quadradically rehash into other cylinder groups, until an 169 * available inode is located. 170 */ 171int 172ext2fs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, ino_t *inop) 173{ 174 struct inode *pip; 175 struct m_ext2fs *fs; 176 ino_t ino, ipref; 177 int cg; 178 179 pip = VTOI(pvp); 180 fs = pip->i_e2fs; 181 if (fs->e2fs.e2fs_ficount == 0) 182 goto noinodes; 183 184 if ((mode & IFMT) == IFDIR) 185 cg = ext2fs_dirpref(fs); 186 else 187 cg = ino_to_cg(fs, pip->i_number); 188 ipref = cg * fs->e2fs.e2fs_ipg + 1; 189 ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg); 190 if (ino == 0) 191 goto noinodes; 192 193 *inop = ino; 194 return 0; 195 196noinodes: 197 ext2fs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes"); 198 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt); 199 return ENOSPC; 200} 201 202/* 203 * Find a cylinder to place a directory. 204 * 205 * The policy implemented by this algorithm is to select from 206 * among those cylinder groups with above the average number of 207 * free inodes, the one with the smallest number of directories. 208 */ 209static u_long 210ext2fs_dirpref(struct m_ext2fs *fs) 211{ 212 int cg, maxspace, mincg, avgifree; 213 214 avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg; 215 maxspace = 0; 216 mincg = -1; 217 for (cg = 0; cg < fs->e2fs_ncg; cg++) { 218 uint32_t nifree = 219 (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree_hi) << 16) 220 | fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree); 221 if (nifree < avgifree) { 222 continue; 223 } 224 uint32_t nbfree = 225 (fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree_hi) << 16) 226 | fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree); 227 if (mincg == -1 || nbfree > maxspace) { 228 mincg = cg; 229 maxspace = nbfree; 230 } 231 } 232 return mincg; 233} 234 235/* 236 * Select the desired position for the next block in a file. The file is 237 * logically divided into sections. The first section is composed of the 238 * direct blocks. Each additional section contains fs_maxbpg blocks. 239 * 240 * If no blocks have been allocated in the first section, the policy is to 241 * request a block in the same cylinder group as the inode that describes 242 * the file. Otherwise, the policy is to try to allocate the blocks 243 * contiguously. The two fields of the ext2 inode extension (see 244 * ufs/ufs/inode.h) help this. 245 */ 246daddr_t 247ext2fs_blkpref(struct inode *ip, daddr_t lbn, int indx, 248 int32_t *bap /* XXX ondisk32 */) 249{ 250 struct m_ext2fs *fs; 251 int cg, i; 252 253 fs = ip->i_e2fs; 254 /* 255 * if we are doing contiguous lbn allocation, try to alloc blocks 256 * contiguously on disk 257 */ 258 259 if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) { 260 return ip->i_e2fs_last_blk + 1; 261 } 262 263 /* 264 * bap, if provided, gives us a list of blocks to which we want to 265 * stay close 266 */ 267 268 if (bap) { 269 for (i = indx; i >= 0 ; i--) { 270 if (bap[i]) { 271 return fs2h32(bap[i]) + 1; 272 } 273 } 274 } 275 276 /* fall back to the first block of the cylinder containing the inode */ 277 278 cg = ino_to_cg(fs, ip->i_number); 279 return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1; 280} 281 282/* 283 * Implement the cylinder overflow algorithm. 284 * 285 * The policy implemented by this algorithm is: 286 * 1) allocate the block in its requested cylinder group. 287 * 2) quadradically rehash on the cylinder group number. 288 * 3) brute force search for a free block. 289 */ 290static u_long 291ext2fs_hashalloc(struct inode *ip, int cg, long pref, int size, 292 daddr_t (*allocator)(struct inode *, int, daddr_t, int)) 293{ 294 struct m_ext2fs *fs; 295 long result; 296 int i, icg = cg; 297 298 fs = ip->i_e2fs; 299 /* 300 * 1: preferred cylinder group 301 */ 302 result = (*allocator)(ip, cg, pref, size); 303 if (result) 304 return result; 305 /* 306 * 2: quadratic rehash 307 */ 308 for (i = 1; i < fs->e2fs_ncg; i *= 2) { 309 cg += i; 310 if (cg >= fs->e2fs_ncg) 311 cg -= fs->e2fs_ncg; 312 result = (*allocator)(ip, cg, 0, size); 313 if (result) 314 return result; 315 } 316 /* 317 * 3: brute force search 318 * Note that we start at i == 2, since 0 was checked initially, 319 * and 1 is always checked in the quadratic rehash. 320 */ 321 cg = (icg + 2) % fs->e2fs_ncg; 322 for (i = 2; i < fs->e2fs_ncg; i++) { 323 result = (*allocator)(ip, cg, 0, size); 324 if (result) 325 return result; 326 cg++; 327 if (cg == fs->e2fs_ncg) 328 cg = 0; 329 } 330 return 0; 331} 332 333/* 334 * Determine whether a block can be allocated. 335 * 336 * Check to see if a block of the appropriate size is available, 337 * and if it is, allocate it. 338 */ 339 340static daddr_t 341ext2fs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 342{ 343 struct m_ext2fs *fs; 344 char *bbp; 345 struct buf *bp; 346 int error, bno, start, end, loc; 347 348 fs = ip->i_e2fs; 349 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0 && 350 fs->e2fs_gd[cg].ext2bgd_nbfree_hi == 0) 351 return 0; 352 error = bread(ip->i_devvp, EXT2_FSBTODB64(fs, 353 fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap), 354 fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)), 355 (int)fs->e2fs_bsize, B_MODIFY, &bp); 356 if (error) { 357 return 0; 358 } 359 bbp = (char *)bp->b_data; 360 361 if (dtog(fs, bpref) != cg) 362 bpref = 0; 363 364 /* initialize block bitmap now if uninit */ 365 if (__predict_false(E2FS_HAS_GD_CSUM(fs) && 366 (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)))) { 367 ext2fs_init_bb(fs, cg, &fs->e2fs_gd[cg], bbp); 368 fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_BLOCK_UNINIT); 369 } 370 371 if (bpref != 0) { 372 bpref = dtogd(fs, bpref); 373 /* 374 * if the requested block is available, use it 375 */ 376 if (isclr(bbp, bpref)) { 377 bno = bpref; 378 goto gotit; 379 } 380 } 381 /* 382 * no blocks in the requested cylinder, so take next 383 * available one in this cylinder group. 384 * first try to get 8 contiguous blocks, then fall back to a single 385 * block. 386 */ 387 if (bpref) 388 start = dtogd(fs, bpref) / NBBY; 389 else 390 start = 0; 391 end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 392 for (loc = start; loc < end; loc++) { 393 if (bbp[loc] == 0) { 394 bno = loc * NBBY; 395 goto gotit; 396 } 397 } 398 for (loc = 0; loc < start; loc++) { 399 if (bbp[loc] == 0) { 400 bno = loc * NBBY; 401 goto gotit; 402 } 403 } 404 405 bno = ext2fs_mapsearch(fs, bbp, bpref); 406#if 0 407 /* 408 * XXX jdolecek mapsearch actually never fails, it panics instead. 409 * If re-enabling, make sure to brele() before returning. 410 */ 411 if (bno < 0) 412 return 0; 413#endif 414gotit: 415#ifdef DIAGNOSTIC 416 if (isset(bbp, (daddr_t)bno)) { 417 printf("%s: cg=%d bno=%d fs=%s\n", __func__, 418 cg, bno, fs->e2fs_fsmnt); 419 panic("ext2fs_alloccg: dup alloc"); 420 } 421#endif 422 setbit(bbp, (daddr_t)bno); 423 fs->e2fs.e2fs_fbcount--; 424 ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], -1, 0, 0, 0); 425 fs->e2fs_fmod = 1; 426 bdwrite(bp); 427 return cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno; 428} 429 430/* 431 * Determine whether an inode can be allocated. 432 * 433 * Check to see if an inode is available, and if it is, 434 * allocate it using the following policy: 435 * 1) allocate the requested inode. 436 * 2) allocate the next available inode after the requested 437 * inode in the specified cylinder group. 438 */ 439static daddr_t 440ext2fs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 441{ 442 struct m_ext2fs *fs; 443 char *ibp; 444 struct buf *bp; 445 int error, start, len, loc, map, i; 446 447 ipref--; /* to avoid a lot of (ipref -1) */ 448 if (ipref == -1) 449 ipref = 0; 450 fs = ip->i_e2fs; 451 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0 && 452 fs->e2fs_gd[cg].ext2bgd_nifree_hi == 0) 453 return 0; 454 error = bread(ip->i_devvp, EXT2_FSBTODB64(fs, 455 fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap), 456 fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)), 457 (int)fs->e2fs_bsize, B_MODIFY, &bp); 458 if (error) { 459 return 0; 460 } 461 ibp = (char *)bp->b_data; 462 463 KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0); 464 465 /* initialize inode bitmap now if uninit */ 466 if (__predict_false(E2FS_HAS_GD_CSUM(fs) && 467 (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)))) { 468 KASSERT(fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) == fs->e2fs.e2fs_ipg); 469 memset(ibp, 0, fs->e2fs_bsize); 470 fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_INODE_UNINIT); 471 } 472 473 if (ipref) { 474 ipref %= fs->e2fs.e2fs_ipg; 475 if (isclr(ibp, ipref)) 476 goto gotit; 477 } 478 start = ipref / NBBY; 479 len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY); 480 loc = skpc(0xff, len, &ibp[start]); 481 if (loc == 0) { 482 len = start + 1; 483 start = 0; 484 loc = skpc(0xff, len, &ibp[0]); 485 if (loc == 0) { 486 printf("%s: cg = %d, ipref = %lld, fs = %s\n", __func__, 487 cg, (long long)ipref, fs->e2fs_fsmnt); 488 panic("%s: map corrupted", __func__); 489 /* NOTREACHED */ 490 } 491 } 492 i = start + len - loc; 493 map = ibp[i] ^ 0xff; 494 if (map == 0) { 495 printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt); 496 panic("%s: inode not in map", __func__); 497 } 498 ipref = i * NBBY + ffs(map) - 1; 499gotit: 500 setbit(ibp, ipref); 501 fs->e2fs.e2fs_ficount--; 502 ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], 503 0, -1, ((mode & IFMT) == IFDIR) ? 1 : 0, ipref); 504 fs->e2fs_fmod = 1; 505 bdwrite(bp); 506 return cg * fs->e2fs.e2fs_ipg + ipref + 1; 507} 508 509/* 510 * Free a block. 511 * 512 * The specified block is placed back in the 513 * free map. 514 */ 515void 516ext2fs_blkfree(struct inode *ip, daddr_t bno) 517{ 518 struct m_ext2fs *fs; 519 char *bbp; 520 struct buf *bp; 521 int error, cg; 522 523 fs = ip->i_e2fs; 524 cg = dtog(fs, bno); 525 526 KASSERT(!E2FS_HAS_GD_CSUM(fs) || 527 (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)) == 0); 528 529 if ((u_int)bno >= fs->e2fs.e2fs_bcount) { 530 printf("%s: bad block %jd, ino %ju\n", 531 __func__, (intmax_t)bno, (uintmax_t)ip->i_number); 532 ext2fs_fserr(fs, ip->i_uid, "bad block"); 533 return; 534 } 535 error = bread(ip->i_devvp, EXT2_FSBTODB64(fs, 536 fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap), 537 fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)), 538 (int)fs->e2fs_bsize, B_MODIFY, &bp); 539 if (error) { 540 return; 541 } 542 bbp = (char *)bp->b_data; 543 bno = dtogd(fs, bno); 544 if (isclr(bbp, bno)) { 545 printf("%s: dev = %#jx, block = %jd, fs = %s\n", __func__, 546 (uintmax_t)ip->i_dev, (intmax_t)bno, 547 fs->e2fs_fsmnt); 548 panic("%s: freeing free block", __func__); 549 } 550 clrbit(bbp, bno); 551 fs->e2fs.e2fs_fbcount++; 552 ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], 1, 0, 0, 0); 553 fs->e2fs_fmod = 1; 554 bdwrite(bp); 555} 556 557/* 558 * Free an inode. 559 * 560 * The specified inode is placed back in the free map. 561 */ 562int 563ext2fs_vfree(struct vnode *pvp, ino_t ino, int mode) 564{ 565 struct m_ext2fs *fs; 566 char *ibp; 567 struct inode *pip; 568 struct buf *bp; 569 int error, cg; 570 571 pip = VTOI(pvp); 572 fs = pip->i_e2fs; 573 574 if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO) 575 panic("%s: range: dev = %#jx, ino = %ju, fs = %s", 576 __func__, (uintmax_t)pip->i_dev, (uintmax_t)ino, 577 fs->e2fs_fsmnt); 578 579 cg = ino_to_cg(fs, ino); 580 581 KASSERT(!E2FS_HAS_GD_CSUM(fs) || 582 (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)) == 0); 583 584 error = bread(pip->i_devvp, EXT2_FSBTODB64(fs, 585 fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap), 586 fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)), 587 (int)fs->e2fs_bsize, B_MODIFY, &bp); 588 if (error) { 589 return 0; 590 } 591 ibp = (char *)bp->b_data; 592 ino = (ino - 1) % fs->e2fs.e2fs_ipg; 593 if (isclr(ibp, ino)) { 594 printf("%s: dev = %#jx, ino = %ju, fs = %s\n", __func__, 595 (uintmax_t)pip->i_dev, 596 (uintmax_t)ino, fs->e2fs_fsmnt); 597 if (fs->e2fs_ronly == 0) 598 panic("%s: freeing free inode", __func__); 599 } 600 clrbit(ibp, ino); 601 fs->e2fs.e2fs_ficount++; 602 ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], 603 0, 1, ((mode & IFMT) == IFDIR) ? -1 : 0, 0); 604 fs->e2fs_fmod = 1; 605 bdwrite(bp); 606 return 0; 607} 608 609/* 610 * Find a block in the specified cylinder group. 611 * 612 * It is a panic if a request is made to find a block if none are 613 * available. 614 */ 615 616static daddr_t 617ext2fs_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) 618{ 619 int start, len, loc, i, map; 620 621 /* 622 * find the fragment by searching through the free block 623 * map for an appropriate bit pattern 624 */ 625 if (bpref) 626 start = dtogd(fs, bpref) / NBBY; 627 else 628 start = 0; 629 len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 630 loc = skpc(0xff, len, &bbp[start]); 631 if (loc == 0) { 632 len = start + 1; 633 start = 0; 634 loc = skpc(0xff, len, &bbp[start]); 635 if (loc == 0) { 636 printf("%s: start = %d, len = %d, fs = %s\n", 637 __func__, start, len, fs->e2fs_fsmnt); 638 panic("%s: map corrupted", __func__); 639 /* NOTREACHED */ 640 } 641 } 642 i = start + len - loc; 643 map = bbp[i] ^ 0xff; 644 if (map == 0) { 645 printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt); 646 panic("%s: block not in map", __func__); 647 } 648 return i * NBBY + ffs(map) - 1; 649} 650 651/* 652 * Fserr prints the name of a file system with an error diagnostic. 653 * 654 * The form of the error message is: 655 * fs: error message 656 */ 657static void 658ext2fs_fserr(struct m_ext2fs *fs, u_int uid, const char *cp) 659{ 660 661 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp); 662} 663 664static __inline void 665ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, int nifree, int ndirs, daddr_t ioff) 666{ 667 if (nifree) { 668 uint32_t ext2bgd_nifree = fs2h16(gd->ext2bgd_nifree) | 669 (fs2h16(gd->ext2bgd_nifree_hi) << 16); 670 ext2bgd_nifree += nifree; 671 gd->ext2bgd_nifree = h2fs16(ext2bgd_nifree); 672 gd->ext2bgd_nifree_hi = h2fs16(ext2bgd_nifree >> 16); 673 /* 674 * If we allocated inode on bigger offset than what was 675 * ever used before, bump the itable_unused count. This 676 * member only ever grows, and is used only for initialization 677 * !INODE_ZEROED groups with used inodes. Of course, by the 678 * time we get here the itables are already zeroed, but 679 * e2fstools fsck.ext4 still checks this. 680 */ 681 if (E2FS_HAS_GD_CSUM(fs) && nifree < 0 && 682 (ioff + 1) >= (fs->e2fs.e2fs_ipg - 683 fs2h16(gd->ext2bgd_itable_unused_lo))) { 684 gd->ext2bgd_itable_unused_lo = 685 h2fs16(fs->e2fs.e2fs_ipg - (ioff + 1)); 686 } 687 688 KASSERT(!E2FS_HAS_GD_CSUM(fs) || 689 gd->ext2bgd_itable_unused_lo <= ext2bgd_nifree); 690 } 691 692 693 if (nbfree) { 694 uint32_t ext2bgd_nbfree = fs2h16(gd->ext2bgd_nbfree) 695 | (fs2h16(gd->ext2bgd_nbfree_hi) << 16); 696 ext2bgd_nbfree += nbfree; 697 gd->ext2bgd_nbfree = h2fs16(ext2bgd_nbfree); 698 gd->ext2bgd_nbfree_hi = h2fs16(ext2bgd_nbfree >> 16); 699 } 700 701 if (ndirs) { 702 uint32_t ext2bgd_ndirs = fs2h16(gd->ext2bgd_ndirs) 703 | (fs2h16(gd->ext2bgd_ndirs_hi) << 16); 704 ext2bgd_ndirs += ndirs; 705 gd->ext2bgd_ndirs = h2fs16(ext2bgd_ndirs); 706 gd->ext2bgd_ndirs_hi = h2fs16(ext2bgd_ndirs >> 16); 707 } 708 709 if (E2FS_HAS_GD_CSUM(fs)) 710 gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd); 711} 712 713static const uint32_t ext2fs_crc32c_table[256] = { 714 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 715 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, 716 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 717 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, 718 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 719 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, 720 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 721 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, 722 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 723 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, 724 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 725 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, 726 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 727 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, 728 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 729 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, 730 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 731 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, 732 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 733 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, 734 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 735 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, 736 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 737 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, 738 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 739 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, 740 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 741 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, 742 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 743 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, 744 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 745 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, 746 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 747 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, 748 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 749 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, 750 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 751 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, 752 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 753 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, 754 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 755 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, 756 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 757 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, 758 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 759 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, 760 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 761 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, 762 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 763 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, 764 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 765 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, 766 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 767 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, 768 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 769 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, 770 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 771 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, 772 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 773 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, 774 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 775 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, 776 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 777 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351, 778}; 779 780static uint32_t 781ext2fs_crc32c(uint32_t last, const void *vbuf, size_t len) 782{ 783 uint32_t crc = last; 784 const uint8_t *buf = vbuf; 785 786 while (len--) 787 crc = ext2fs_crc32c_table[(crc ^ *buf++) & 0xff] ^ (crc >> 8); 788 789 return crc; 790} 791 792/* 793 * Compute group description csum. Structure data must be LE (not host). 794 * Returned as LE (disk encoding). 795 */ 796static uint16_t 797ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd) 798{ 799 uint16_t crc; 800 size_t cgsize = 1 << fs->e2fs_group_desc_shift; 801 uint32_t cg_bswapped = h2fs32((uint32_t)cg); 802 size_t off; 803 804 if (EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 805 uint32_t crc32; 806 uint8_t dummy[2] = {0, 0}; 807 808 off = offsetof(struct ext2_gd, ext2bgd_checksum); 809 810 crc32 = ext2fs_crc32c(~0, fs->e2fs.e2fs_uuid, 811 sizeof(fs->e2fs.e2fs_uuid)); 812 crc32 = ext2fs_crc32c(crc32, &cg_bswapped, sizeof(cg_bswapped)); 813 crc32 = ext2fs_crc32c(crc32, gd, off); 814 crc32 = ext2fs_crc32c(crc32, dummy, 2); 815 crc32 = ext2fs_crc32c(crc32, gd + off + 2, cgsize - (off + 2)); 816 817 return h2fs16(crc32 & 0xffff); 818 } 819 820 if (!EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM)) 821 return 0; 822 823 off = offsetof(struct ext2_gd, ext2bgd_checksum); 824 825 crc = crc16(~0, (uint8_t *)fs->e2fs.e2fs_uuid, 826 sizeof(fs->e2fs.e2fs_uuid)); 827 crc = crc16(crc, (uint8_t *)&cg_bswapped, sizeof(cg_bswapped)); 828 crc = crc16(crc, (uint8_t *)gd, off); 829 crc = crc16(crc, (uint8_t *)gd + off + 2, cgsize - (off + 2)); 830 831 return h2fs16(crc); 832} 833 834static void 835ext2fs_init_bb(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, char *bbp) 836{ 837 int i; 838 839 memset(bbp, 0, fs->e2fs_bsize); 840 841 /* 842 * No block was ever allocated on this cg before, so the only used 843 * blocks are metadata blocks on start of the group. We could optimize 844 * this to set by bytes, but since this is done once per the group 845 * in lifetime of filesystem, it really is not worth it. 846 */ 847 for(i=0; i < fs->e2fs.e2fs_bpg - fs2h16(gd->ext2bgd_nbfree); i++) 848 setbit(bbp, i); 849} 850 851/* 852 * Verify csum and initialize itable if not done already 853 */ 854int 855ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ronly) 856{ 857 struct ext2_gd *gd; 858 ino_t ioff; 859 size_t boff; 860 struct buf *bp; 861 int cg, i, error; 862 863 if (!E2FS_HAS_GD_CSUM(fs)) 864 return 0; 865 866 for(cg = 0; cg < fs->e2fs_ncg; cg++) { 867 gd = &fs->e2fs_gd[cg]; 868 869 /* Verify checksum */ 870 uint16_t csum = ext2fs_cg_get_csum(fs, cg, gd); 871 if (gd->ext2bgd_checksum != csum) { 872 printf("%s: group %d invalid csum (%#x != %#x)\n", 873 __func__, cg, gd->ext2bgd_checksum, csum); 874 return EINVAL; 875 } 876 877 /* if mounting read-write, zero itable if not already done */ 878 if (ronly || 879 (gd->ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0) 880 continue; 881 882 /* 883 * We are skipping already used inodes, zero rest of itable 884 * blocks. First block to zero could be only partial wipe, all 885 * others are wiped completely. This might take a while, 886 * there could be many inode table blocks. We use 887 * delayed writes, so this shouldn't block for very 888 * long. 889 */ 890 ioff = fs->e2fs.e2fs_ipg - fs2h16(gd->ext2bgd_itable_unused_lo); 891 boff = (ioff % fs->e2fs_ipb) * EXT2_DINODE_SIZE(fs); 892 893 for(i = ioff / fs->e2fs_ipb; i < fs->e2fs_itpg; i++) { 894 if (boff) { 895 /* partial wipe, must read old data */ 896 error = bread(devvp, EXT2_FSBTODB64OFF(fs, 897 fs2h32(gd->ext2bgd_i_tables), 898 fs2h32(gd->ext2bgd_i_tables_hi), i), 899 (int)fs->e2fs_bsize, B_MODIFY, &bp); 900 if (error) { 901 printf("%s: can't read itable block", 902 __func__); 903 return error; 904 } 905 memset((char *)bp->b_data + boff, 0, 906 fs->e2fs_bsize - boff); 907 boff = 0; 908 } else { 909 /* 910 * Complete wipe, don't need to read data. This 911 * assumes nothing else is changing the data. 912 */ 913 bp = getblk(devvp, EXT2_FSBTODB64OFF(fs, 914 fs2h32(gd->ext2bgd_i_tables), 915 fs2h32(gd->ext2bgd_i_tables_hi), i), 916 (int)fs->e2fs_bsize, 0, 0); 917 clrbuf(bp); 918 } 919 920 bdwrite(bp); 921 } 922 923 gd->ext2bgd_flags |= h2fs16(E2FS_BG_INODE_ZEROED); 924 gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd); 925 fs->e2fs_fmod = 1; 926 } 927 928 return 0; 929} 930