1/* 2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1982, 1986, 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. All advertising materials mentioning features or use of this software 42 * must display the following acknowledgement: 43 * This product includes software developed by the University of 44 * California, Berkeley and its contributors. 45 * 4. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 62 */ 63#include <rev_endian_fs.h> 64#include <vm/vm_pager.h> 65 66#include <sys/param.h> 67#include <sys/systm.h> 68#include <sys/buf_internal.h> 69#include <sys/proc.h> 70#include <sys/kauth.h> 71#include <sys/vnode_internal.h> 72#include <sys/mount_internal.h> 73#include <sys/kernel.h> 74#include <sys/syslog.h> 75#include <sys/quota.h> 76 77#include <sys/vm.h> 78 79#include <ufs/ufs/quota.h> 80#include <ufs/ufs/inode.h> 81 82#include <ufs/ffs/fs.h> 83#include <ufs/ffs/ffs_extern.h> 84 85#if REV_ENDIAN_FS 86#include <ufs/ufs/ufs_byte_order.h> 87#include <libkern/OSByteOrder.h> 88#endif /* REV_ENDIAN_FS */ 89 90extern u_long nextgennumber; 91 92static ufs_daddr_t ffs_alloccg(struct inode *, int, ufs_daddr_t, int); 93static ufs_daddr_t ffs_alloccgblk(struct fs *, struct cg *, ufs_daddr_t); 94static ufs_daddr_t ffs_clusteralloc(struct inode *, int, ufs_daddr_t, int); 95static ino_t ffs_dirpref(struct inode *); 96static ufs_daddr_t ffs_fragextend(struct inode *, int, long, int, int); 97static void ffs_fserr(struct fs *, u_int, char *); 98static u_long ffs_hashalloc 99 (struct inode *, int, long, int, u_int32_t (*)()); 100static ino_t ffs_nodealloccg(struct inode *, int, ufs_daddr_t, int); 101static ufs_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs_daddr_t, int); 102static void ffs_clusteracct 103 (struct fs *fs, struct cg *cgp, ufs_daddr_t blkno, int cnt); 104 105/* 106 * Allocate a block in the file system. 107 * 108 * The size of the requested block is given, which must be some 109 * multiple of fs_fsize and <= fs_bsize. 110 * A preference may be optionally specified. If a preference is given 111 * the following hierarchy is used to allocate a block: 112 * 1) allocate the requested block. 113 * 2) allocate a rotationally optimal block in the same cylinder. 114 * 3) allocate a block in the same cylinder group. 115 * 4) quadradically rehash into other cylinder groups, until an 116 * available block is located. 117 * If no block preference is given the following heirarchy is used 118 * to allocate a block: 119 * 1) allocate a block in the cylinder group that contains the 120 * inode for the file. 121 * 2) quadradically rehash into other cylinder groups, until an 122 * available block is located. 123 */ 124ffs_alloc(ip, lbn, bpref, size, cred, bnp) 125 register struct inode *ip; 126 ufs_daddr_t lbn, bpref; 127 int size; 128 kauth_cred_t cred; 129 ufs_daddr_t *bnp; 130{ 131 register struct fs *fs; 132 ufs_daddr_t bno; 133 int cg, error; 134 int devBlockSize=0; 135 *bnp = 0; 136 fs = ip->i_fs; 137#if DIAGNOSTIC 138 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 139 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", 140 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); 141 panic("ffs_alloc: bad size"); 142 } 143 if (!IS_VALID_CRED(cred)) 144 panic("ffs_alloc: missing credential\n"); 145#endif /* DIAGNOSTIC */ 146 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 147 goto nospace; 148 if (suser(cred, NULL) && freespace(fs, fs->fs_minfree) <= 0) 149 goto nospace; 150 devBlockSize = vfs_devblocksize(vnode_mount(ITOV(ip))); 151#if QUOTA 152 if (error = chkdq(ip, (int64_t)size, cred, 0)) 153 return (error); 154#endif /* QUOTA */ 155 if (bpref >= fs->fs_size) 156 bpref = 0; 157 if (bpref == 0) 158 cg = ino_to_cg(fs, ip->i_number); 159 else 160 cg = dtog(fs, bpref); 161 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, 162 (u_int32_t (*)())ffs_alloccg); 163 if (bno > 0) { 164 ip->i_blocks += btodb(size, devBlockSize); 165 ip->i_flag |= IN_CHANGE | IN_UPDATE; 166 *bnp = bno; 167 return (0); 168 } 169#if QUOTA 170 /* 171 * Restore user's disk quota because allocation failed. 172 */ 173 (void) chkdq(ip, (int64_t)-size, cred, FORCE); 174#endif /* QUOTA */ 175nospace: 176 ffs_fserr(fs, kauth_cred_getuid(cred), "file system full"); 177 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 178 return (ENOSPC); 179} 180 181/* 182 * Reallocate a fragment to a bigger size 183 * 184 * The number and size of the old block is given, and a preference 185 * and new size is also specified. The allocator attempts to extend 186 * the original block. Failing that, the regular block allocator is 187 * invoked to get an appropriate block. 188 */ 189ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) 190 register struct inode *ip; 191 ufs_daddr_t lbprev; 192 ufs_daddr_t bpref; 193 int osize, nsize; 194 kauth_cred_t cred; 195 struct buf **bpp; 196{ 197 register struct fs *fs; 198 struct buf *bp; 199 int cg, request, error; 200 ufs_daddr_t bprev, bno; 201 int devBlockSize=0; 202 203 *bpp = 0; 204 fs = ip->i_fs; 205#if DIAGNOSTIC 206 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 207 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 208 printf( 209 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n", 210 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt); 211 panic("ffs_realloccg: bad size"); 212 } 213 if (!IS_VALID_CRED(cred)) 214 panic("ffs_realloccg: missing credential\n"); 215#endif /* DIAGNOSTIC */ 216 if (suser(cred, NULL) != 0 && freespace(fs, fs->fs_minfree) <= 0) 217 goto nospace; 218 if ((bprev = ip->i_db[lbprev]) == 0) { 219 printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n", 220 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt); 221 panic("ffs_realloccg: bad bprev"); 222 } 223 /* 224 * Allocate the extra space in the buffer. 225 */ 226 if (error = (int)buf_bread(ITOV(ip), (daddr64_t)((unsigned)lbprev), osize, NOCRED, &bp)) { 227 buf_brelse(bp); 228 return (error); 229 } 230 devBlockSize = vfs_devblocksize(vnode_mount(ITOV(ip))); 231 232#if QUOTA 233 if (error = chkdq(ip, (int64_t)(nsize - osize), cred, 0)) 234 { 235 buf_brelse(bp); 236 return (error); 237 } 238#endif /* QUOTA */ 239 /* 240 * Check for extension in the existing location. 241 */ 242 cg = dtog(fs, bprev); 243 if (bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) { 244 if ((ufs_daddr_t)buf_blkno(bp) != fsbtodb(fs, bno)) 245 panic("bad blockno"); 246 ip->i_blocks += btodb(nsize - osize, devBlockSize); 247 ip->i_flag |= IN_CHANGE | IN_UPDATE; 248 allocbuf(bp, nsize); 249 buf_setflags(bp, B_DONE); 250 bzero((char *)buf_dataptr(bp) + osize, (u_int)buf_size(bp) - osize); 251 *bpp = bp; 252 return (0); 253 } 254 /* 255 * Allocate a new disk location. 256 */ 257 if (bpref >= fs->fs_size) 258 bpref = 0; 259 switch ((int)fs->fs_optim) { 260 case FS_OPTSPACE: 261 /* 262 * Allocate an exact sized fragment. Although this makes 263 * best use of space, we will waste time relocating it if 264 * the file continues to grow. If the fragmentation is 265 * less than half of the minimum free reserve, we choose 266 * to begin optimizing for time. 267 */ 268 request = nsize; 269 if (fs->fs_minfree < 5 || 270 fs->fs_cstotal.cs_nffree > 271 fs->fs_dsize * fs->fs_minfree / (2 * 100)) 272 break; 273 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 274 fs->fs_fsmnt); 275 fs->fs_optim = FS_OPTTIME; 276 break; 277 case FS_OPTTIME: 278 /* 279 * At this point we have discovered a file that is trying to 280 * grow a small fragment to a larger fragment. To save time, 281 * we allocate a full sized block, then free the unused portion. 282 * If the file continues to grow, the `ffs_fragextend' call 283 * above will be able to grow it in place without further 284 * copying. If aberrant programs cause disk fragmentation to 285 * grow within 2% of the free reserve, we choose to begin 286 * optimizing for space. 287 */ 288 request = fs->fs_bsize; 289 if (fs->fs_cstotal.cs_nffree < 290 fs->fs_dsize * (fs->fs_minfree - 2) / 100) 291 break; 292 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 293 fs->fs_fsmnt); 294 fs->fs_optim = FS_OPTSPACE; 295 break; 296 default: 297 printf("dev = 0x%x, optim = %d, fs = %s\n", 298 ip->i_dev, fs->fs_optim, fs->fs_fsmnt); 299 panic("ffs_realloccg: bad optim"); 300 /* NOTREACHED */ 301 } 302 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, 303 (u_int32_t (*)())ffs_alloccg); 304 if (bno > 0) { 305 buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, bno))); 306 ffs_blkfree(ip, bprev, (long)osize); 307 if (nsize < request) 308 ffs_blkfree(ip, bno + numfrags(fs, nsize), 309 (long)(request - nsize)); 310 ip->i_blocks += btodb(nsize - osize, devBlockSize); 311 ip->i_flag |= IN_CHANGE | IN_UPDATE; 312 allocbuf(bp, nsize); 313 buf_setflags(bp, B_DONE); 314 bzero((char *)buf_dataptr(bp) + osize, (u_int)buf_size(bp) - osize); 315 *bpp = bp; 316 return (0); 317 } 318#if QUOTA 319 /* 320 * Restore user's disk quota because allocation failed. 321 */ 322 (void) chkdq(ip, (int64_t)-(nsize - osize), cred, FORCE); 323#endif /* QUOTA */ 324 buf_brelse(bp); 325nospace: 326 /* 327 * no space available 328 */ 329 ffs_fserr(fs, kauth_cred_getuid(cred), "file system full"); 330 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 331 return (ENOSPC); 332} 333 334/* 335 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 336 * 337 * The vnode and an array of buffer pointers for a range of sequential 338 * logical blocks to be made contiguous is given. The allocator attempts 339 * to find a range of sequential blocks starting as close as possible to 340 * an fs_rotdelay offset from the end of the allocation for the logical 341 * block immediately preceeding the current range. If successful, the 342 * physical block numbers in the buffer pointers and in the inode are 343 * changed to reflect the new allocation. If unsuccessful, the allocation 344 * is left unchanged. The success in doing the reallocation is returned. 345 * Note that the error return is not reflected back to the user. Rather 346 * the previous block allocation will be used. 347 */ 348/* Note: This routine is unused in UBC cluster I/O */ 349 350int doasyncfree = 1; 351int doreallocblks = 1; 352 353 354/* 355 * Allocate an inode in the file system. 356 * 357 * If allocating a directory, use ffs_dirpref to select the inode. 358 * If allocating in a directory, the following hierarchy is followed: 359 * 1) allocate the preferred inode. 360 * 2) allocate an inode in the same cylinder group. 361 * 3) quadradically rehash into other cylinder groups, until an 362 * available inode is located. 363 * If no inode preference is given the following heirarchy is used 364 * to allocate an inode: 365 * 1) allocate an inode in cylinder group 0. 366 * 2) quadradically rehash into other cylinder groups, until an 367 * available inode is located. 368 */ 369int 370ffs_valloc( 371 struct vnode *pvp, 372 mode_t mode, 373 kauth_cred_t cred, 374 struct vnode **vpp) 375 376{ 377 register struct inode *pip; 378 register struct fs *fs; 379 register struct inode *ip; 380 struct timeval tv; 381 ino_t ino, ipref; 382 int cg, error; 383 384 *vpp = NULL; 385 pip = VTOI(pvp); 386 fs = pip->i_fs; 387 if (fs->fs_cstotal.cs_nifree == 0) 388 goto noinodes; 389 390 if ((mode & IFMT) == IFDIR) 391 ipref = ffs_dirpref(pip); 392 else 393 ipref = pip->i_number; 394 if (ipref >= fs->fs_ncg * fs->fs_ipg) 395 ipref = 0; 396 cg = ino_to_cg(fs, ipref); 397 /* 398 * Track the number of dirs created one after another 399 * in a cg without intervening files. 400 */ 401 if ((mode & IFMT) == IFDIR) { 402 if (fs->fs_contigdirs[cg] < 255) 403 fs->fs_contigdirs[cg]++; 404 } else { 405 if (fs->fs_contigdirs[cg] > 0) 406 fs->fs_contigdirs[cg]--; 407 } 408 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg); 409 if (ino == 0) 410 goto noinodes; 411 412 error = ffs_vget_internal(pvp->v_mount, ino, vpp, NULL, NULL, mode, 0); 413 if (error) { 414 ffs_vfree(pvp, ino, mode); 415 return (error); 416 } 417 ip = VTOI(*vpp); 418 419 if (ip->i_mode) { 420 printf("mode = 0%o, inum = %d, fs = %s\n", 421 ip->i_mode, ip->i_number, fs->fs_fsmnt); 422 panic("ffs_valloc: dup alloc"); 423 } 424 if (ip->i_blocks) { /* XXX */ 425 printf("free inode %s/%d had %d blocks\n", 426 fs->fs_fsmnt, ino, ip->i_blocks); 427 ip->i_blocks = 0; 428 } 429 ip->i_flags = 0; 430 /* 431 * Set up a new generation number for this inode. 432 */ 433 microtime(&tv); 434 if (++nextgennumber < (u_long)tv.tv_sec) 435 nextgennumber = tv.tv_sec; 436 ip->i_gen = nextgennumber; 437 return (0); 438noinodes: 439 ffs_fserr(fs, kauth_cred_getuid(cred), "out of inodes"); 440 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 441 return (ENOSPC); 442} 443 444/* 445 * Find a cylinder group to place a directory. 446 * 447 * The policy implemented by this algorithm is to allocate a 448 * directory inode in the same cylinder group as its parent 449 * directory, but also to reserve space for its files inodes 450 * and data. Restrict the number of directories which may be 451 * allocated one after another in the same cylinder group 452 * without intervening allocation of files. 453 */ 454static ino_t 455ffs_dirpref(pip) 456 struct inode *pip; 457{ 458 register struct fs *fs; 459 int cg, prefcg, dirsize, cgsize; 460 int avgifree, avgbfree, avgndir, curdirsize; 461 int minifree, minbfree, maxndir; 462 int mincg, minndir; 463 int maxcontigdirs; 464 465 fs = pip->i_fs; 466 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 467 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 468 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 469 470 /* 471 * Force allocation in another cg if creating a first level dir. 472 */ 473 if (ITOV(pip)->v_flag & VROOT) { 474#ifdef __APPLE__ 475 prefcg = random() % fs->fs_ncg; 476#else 477 prefcg = arc4random() % fs->fs_ncg; 478#endif 479 mincg = prefcg; 480 minndir = fs->fs_ipg; 481 for (cg = prefcg; cg < fs->fs_ncg; cg++) 482 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 483 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 484 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 485 mincg = cg; 486 minndir = fs->fs_cs(fs, cg).cs_ndir; 487 } 488 for (cg = 0; cg < prefcg; cg++) 489 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 490 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 491 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 492 mincg = cg; 493 minndir = fs->fs_cs(fs, cg).cs_ndir; 494 } 495 return ((ino_t)(fs->fs_ipg * mincg)); 496 } 497 498 /* 499 * Count various limits which used for 500 * optimal allocation of a directory inode. 501 */ 502 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 503 minifree = avgifree - fs->fs_ipg / 4; 504 if (minifree < 0) 505 minifree = 0; 506 minbfree = avgbfree - fs->fs_fpg / fs->fs_frag / 4; 507 if (minbfree < 0) 508 minbfree = 0; 509 cgsize = fs->fs_fsize * fs->fs_fpg; 510 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; 511 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; 512 if (dirsize < curdirsize) 513 dirsize = curdirsize; 514 maxcontigdirs = min(cgsize / dirsize, 255); 515 if (fs->fs_avgfpdir > 0) 516 maxcontigdirs = min(maxcontigdirs, 517 fs->fs_ipg / fs->fs_avgfpdir); 518 if (maxcontigdirs == 0) 519 maxcontigdirs = 1; 520 521 /* 522 * Limit number of dirs in one cg and reserve space for 523 * regular files, but only if we have no deficit in 524 * inodes or space. 525 */ 526 prefcg = ino_to_cg(fs, pip->i_number); 527 for (cg = prefcg; cg < fs->fs_ncg; cg++) 528 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 529 fs->fs_cs(fs, cg).cs_nifree >= minifree && 530 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 531 if (fs->fs_contigdirs[cg] < maxcontigdirs) 532 return ((ino_t)(fs->fs_ipg * cg)); 533 } 534 for (cg = 0; cg < prefcg; cg++) 535 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 536 fs->fs_cs(fs, cg).cs_nifree >= minifree && 537 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 538 if (fs->fs_contigdirs[cg] < maxcontigdirs) 539 return ((ino_t)(fs->fs_ipg * cg)); 540 } 541 /* 542 * This is a backstop when we have deficit in space. 543 */ 544 for (cg = prefcg; cg < fs->fs_ncg; cg++) 545 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 546 return ((ino_t)(fs->fs_ipg * cg)); 547 for (cg = 0; cg < prefcg; cg++) 548 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 549 break; 550 return ((ino_t)(fs->fs_ipg * cg)); 551} 552 553/* 554 * Select the desired position for the next block in a file. The file is 555 * logically divided into sections. The first section is composed of the 556 * direct blocks. Each additional section contains fs_maxbpg blocks. 557 * 558 * If no blocks have been allocated in the first section, the policy is to 559 * request a block in the same cylinder group as the inode that describes 560 * the file. If no blocks have been allocated in any other section, the 561 * policy is to place the section in a cylinder group with a greater than 562 * average number of free blocks. An appropriate cylinder group is found 563 * by using a rotor that sweeps the cylinder groups. When a new group of 564 * blocks is needed, the sweep begins in the cylinder group following the 565 * cylinder group from which the previous allocation was made. The sweep 566 * continues until a cylinder group with greater than the average number 567 * of free blocks is found. If the allocation is for the first block in an 568 * indirect block, the information on the previous allocation is unavailable; 569 * here a best guess is made based upon the logical block number being 570 * allocated. 571 * 572 * If a section is already partially allocated, the policy is to 573 * contiguously allocate fs_maxcontig blocks. The end of one of these 574 * contiguous blocks and the beginning of the next is physically separated 575 * so that the disk head will be in transit between them for at least 576 * fs_rotdelay milliseconds. This is to allow time for the processor to 577 * schedule another I/O transfer. 578 */ 579ufs_daddr_t 580ffs_blkpref(ip, lbn, indx, bap) 581 struct inode *ip; 582 ufs_daddr_t lbn; 583 int indx; 584 ufs_daddr_t *bap; 585{ 586 register struct fs *fs; 587 register int cg; 588 int avgbfree, startcg; 589 ufs_daddr_t nextblk; 590#if REV_ENDIAN_FS 591 daddr_t prev=0; 592 struct vnode *vp=ITOV(ip); 593 struct mount *mp=vp->v_mount; 594 int rev_endian=(mp->mnt_flag & MNT_REVEND); 595#endif /* REV_ENDIAN_FS */ 596 597 fs = ip->i_fs; 598#if REV_ENDIAN_FS 599 if (indx && bap) { 600 if (rev_endian) { 601 if (bap != &ip->i_db[0]) 602 prev = OSSwapInt32(bap[indx - 1]); 603 else 604 prev = bap[indx - 1]; 605 } else prev = bap[indx - 1]; 606 } 607 if (indx % fs->fs_maxbpg == 0 || prev == 0) 608#else /* REV_ENDIAN_FS */ 609 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) 610#endif /* REV_ENDIAN_FS */ 611 { 612 if (lbn < NDADDR) { 613 cg = ino_to_cg(fs, ip->i_number); 614 return (fs->fs_fpg * cg + fs->fs_frag); 615 } 616 /* 617 * Find a cylinder with greater than average number of 618 * unused data blocks. 619 */ 620#if REV_ENDIAN_FS 621 if (indx == 0 || prev == 0) 622#else /* REV_ENDIAN_FS */ 623 if (indx == 0 || bap[indx - 1] == 0) 624#endif /* REV_ENDIAN_FS */ 625 startcg = 626 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 627 else 628#if REV_ENDIAN_FS 629 startcg = dtog(fs, prev) + 1; 630#else /* REV_ENDIAN_FS */ 631 startcg = dtog(fs, bap[indx - 1]) + 1; 632#endif /* REV_ENDIAN_FS */ 633 startcg %= fs->fs_ncg; 634 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 635 for (cg = startcg; cg < fs->fs_ncg; cg++) 636 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 637 fs->fs_cgrotor = cg; 638 return (fs->fs_fpg * cg + fs->fs_frag); 639 } 640 for (cg = 0; cg <= startcg; cg++) 641 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 642 fs->fs_cgrotor = cg; 643 return (fs->fs_fpg * cg + fs->fs_frag); 644 } 645 return (NULL); 646 } 647 /* 648 * One or more previous blocks have been laid out. If less 649 * than fs_maxcontig previous blocks are contiguous, the 650 * next block is requested contiguously, otherwise it is 651 * requested rotationally delayed by fs_rotdelay milliseconds. 652 */ 653#if REV_ENDIAN_FS 654 if (rev_endian) { 655 nextblk = prev + fs->fs_frag; 656 if (indx < fs->fs_maxcontig) { 657 return (nextblk); 658 } 659 if (bap != &ip->i_db[0]) 660 prev = OSSwapInt32(bap[indx - fs->fs_maxcontig]); 661 else 662 prev = bap[indx - fs->fs_maxcontig]; 663 if (prev + blkstofrags(fs, fs->fs_maxcontig) != nextblk) 664 return (nextblk); 665 } else { 666#endif /* REV_ENDIAN_FS */ 667 nextblk = bap[indx - 1] + fs->fs_frag; 668 if (indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] + 669 blkstofrags(fs, fs->fs_maxcontig) != nextblk) 670 return (nextblk); 671#if REV_ENDIAN_FS 672 } 673#endif /* REV_ENDIAN_FS */ 674 if (fs->fs_rotdelay != 0) 675 /* 676 * Here we convert ms of delay to frags as: 677 * (frags) = (ms) * (rev/sec) * (sect/rev) / 678 * ((sect/frag) * (ms/sec)) 679 * then round up to the next block. 680 */ 681 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / 682 (NSPF(fs) * 1000), fs->fs_frag); 683 return (nextblk); 684} 685 686/* 687 * Implement the cylinder overflow algorithm. 688 * 689 * The policy implemented by this algorithm is: 690 * 1) allocate the block in its requested cylinder group. 691 * 2) quadradically rehash on the cylinder group number. 692 * 3) brute force search for a free block. 693 */ 694/*VARARGS5*/ 695static u_long 696ffs_hashalloc(ip, cg, pref, size, allocator) 697 struct inode *ip; 698 int cg; 699 long pref; 700 int size; /* size for data blocks, mode for inodes */ 701 u_int32_t (*allocator)(); 702{ 703 register struct fs *fs; 704 long result; 705 int i, icg = cg; 706 707 fs = ip->i_fs; 708 /* 709 * 1: preferred cylinder group 710 */ 711 result = (*allocator)(ip, cg, pref, size); 712 if (result) 713 return (result); 714 /* 715 * 2: quadratic rehash 716 */ 717 for (i = 1; i < fs->fs_ncg; i *= 2) { 718 cg += i; 719 if (cg >= fs->fs_ncg) 720 cg -= fs->fs_ncg; 721 result = (*allocator)(ip, cg, 0, size); 722 if (result) 723 return (result); 724 } 725 /* 726 * 3: brute force search 727 * Note that we start at i == 2, since 0 was checked initially, 728 * and 1 is always checked in the quadratic rehash. 729 */ 730 cg = (icg + 2) % fs->fs_ncg; 731 for (i = 2; i < fs->fs_ncg; i++) { 732 result = (*allocator)(ip, cg, 0, size); 733 if (result) 734 return (result); 735 cg++; 736 if (cg == fs->fs_ncg) 737 cg = 0; 738 } 739 return (NULL); 740} 741 742/* 743 * Determine whether a fragment can be extended. 744 * 745 * Check to see if the necessary fragments are available, and 746 * if they are, allocate them. 747 */ 748static ufs_daddr_t 749ffs_fragextend(ip, cg, bprev, osize, nsize) 750 struct inode *ip; 751 int cg; 752 long bprev; 753 int osize, nsize; 754{ 755 register struct fs *fs; 756 register struct cg *cgp; 757 struct buf *bp; 758 struct timeval tv; 759 long bno; 760 int frags, bbase; 761 int i, error; 762#if REV_ENDIAN_FS 763 struct vnode *vp=ITOV(ip); 764 struct mount *mp=vp->v_mount; 765 int rev_endian=(mp->mnt_flag & MNT_REVEND); 766#endif /* REV_ENDIAN_FS */ 767 768 fs = ip->i_fs; 769 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 770 return (NULL); 771 frags = numfrags(fs, nsize); /* number of fragments needed */ 772 bbase = fragnum(fs, bprev); /* offset in a frag (it is mod fragsize */ 773 if (bbase > fragnum(fs, (bprev + frags - 1))) { 774 /* cannot extend across a block boundary */ 775 return (NULL); 776 } 777 /* read corresponding cylinder group info */ 778 error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), 779 (int)fs->fs_cgsize, NOCRED, &bp); 780 if (error) { 781 buf_brelse(bp); 782 return (NULL); 783 } 784 cgp = (struct cg *)buf_dataptr(bp); 785#if REV_ENDIAN_FS 786 if (rev_endian) { 787 byte_swap_cgin(cgp, fs); 788 } 789#endif /* REV_ENDIAN_FS */ 790 791 if (!cg_chkmagic(cgp)) { 792#if REV_ENDIAN_FS 793 if (rev_endian) 794 byte_swap_cgout(cgp,fs); 795#endif /* REV_ENDIAN_FS */ 796 buf_brelse(bp); 797 return (NULL); 798 } 799 microtime(&tv); 800 cgp->cg_time = tv.tv_sec; 801 bno = dtogd(fs, bprev); 802 for (i = numfrags(fs, osize); i < frags; i++) 803 if (isclr(cg_blksfree(cgp), bno + i)) { 804#if REV_ENDIAN_FS 805 if (rev_endian) 806 byte_swap_cgout(cgp,fs); 807#endif /* REV_ENDIAN_FS */ 808 buf_brelse(bp); 809 return (NULL); 810 } 811 /* 812 * the current fragment can be extended 813 * deduct the count on fragment being extended into 814 * increase the count on the remaining fragment (if any) 815 * allocate the extended piece 816 */ 817 for (i = frags; i < fs->fs_frag - bbase; i++) 818 if (isclr(cg_blksfree(cgp), bno + i)) 819 break; 820 cgp->cg_frsum[i - numfrags(fs, osize)]--; 821 if (i != frags) 822 cgp->cg_frsum[i - frags]++; 823 for (i = numfrags(fs, osize); i < frags; i++) { 824 clrbit(cg_blksfree(cgp), bno + i); 825 cgp->cg_cs.cs_nffree--; 826 fs->fs_cstotal.cs_nffree--; 827 fs->fs_cs(fs, cg).cs_nffree--; 828 } 829 fs->fs_fmod = 1; 830#if REV_ENDIAN_FS 831 if (rev_endian) 832 byte_swap_cgout(cgp,fs); 833#endif /* REV_ENDIAN_FS */ 834 buf_bdwrite(bp); 835 return (bprev); 836} 837 838/* 839 * Determine whether a block can be allocated. 840 * 841 * Check to see if a block of the appropriate size is available, 842 * and if it is, allocate it. 843 */ 844static ufs_daddr_t 845ffs_alloccg(ip, cg, bpref, size) 846 struct inode *ip; 847 int cg; 848 ufs_daddr_t bpref; 849 int size; 850{ 851 register struct fs *fs; 852 register struct cg *cgp; 853 struct buf *bp; 854 struct timeval tv; 855 register int i; 856 int error, bno, frags, allocsiz; 857#if REV_ENDIAN_FS 858 struct vnode *vp=ITOV(ip); 859 struct mount *mp=vp->v_mount; 860 int rev_endian=(mp->mnt_flag & MNT_REVEND); 861#endif /* REV_ENDIAN_FS */ 862 863 fs = ip->i_fs; 864 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 865 return (NULL); 866 error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), 867 (int)fs->fs_cgsize, NOCRED, &bp); 868 if (error) { 869 buf_brelse(bp); 870 return (NULL); 871 } 872 cgp = (struct cg *)buf_dataptr(bp); 873#if REV_ENDIAN_FS 874 if (rev_endian) 875 byte_swap_cgin(cgp,fs); 876#endif /* REV_ENDIAN_FS */ 877 if (!cg_chkmagic(cgp) || 878 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { 879#if REV_ENDIAN_FS 880 if (rev_endian) 881 byte_swap_cgout(cgp,fs); 882#endif /* REV_ENDIAN_FS */ 883 buf_brelse(bp); 884 return (NULL); 885 } 886 microtime(&tv); 887 cgp->cg_time = tv.tv_sec; 888 if (size == fs->fs_bsize) { 889 bno = ffs_alloccgblk(fs, cgp, bpref); 890#if REV_ENDIAN_FS 891 if (rev_endian) 892 byte_swap_cgout(cgp,fs); 893#endif /* REV_ENDIAN_FS */ 894 buf_bdwrite(bp); 895 return (bno); 896 } 897 /* 898 * check to see if any fragments are already available 899 * allocsiz is the size which will be allocated, hacking 900 * it down to a smaller size if necessary 901 */ 902 frags = numfrags(fs, size); 903 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 904 if (cgp->cg_frsum[allocsiz] != 0) 905 break; 906 if (allocsiz == fs->fs_frag) { 907 /* 908 * no fragments were available, so a block will be 909 * allocated, and hacked up 910 */ 911 if (cgp->cg_cs.cs_nbfree == 0) { 912#if REV_ENDIAN_FS 913 if (rev_endian) 914 byte_swap_cgout(cgp,fs); 915#endif /* REV_ENDIAN_FS */ 916 buf_brelse(bp); 917 return (NULL); 918 } 919 bno = ffs_alloccgblk(fs, cgp, bpref); 920 bpref = dtogd(fs, bno); 921 for (i = frags; i < fs->fs_frag; i++) 922 setbit(cg_blksfree(cgp), bpref + i); 923 i = fs->fs_frag - frags; 924 cgp->cg_cs.cs_nffree += i; 925 fs->fs_cstotal.cs_nffree += i; 926 fs->fs_cs(fs, cg).cs_nffree += i; 927 fs->fs_fmod = 1; 928 cgp->cg_frsum[i]++; 929#if REV_ENDIAN_FS 930 if (rev_endian) 931 byte_swap_cgout(cgp,fs); 932#endif /* REV_ENDIAN_FS */ 933 buf_bdwrite(bp); 934 return (bno); 935 } 936 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 937 if (bno < 0) { 938#if REV_ENDIAN_FS 939 if (rev_endian) 940 byte_swap_cgout(cgp,fs); 941#endif /* REV_ENDIAN_FS */ 942 buf_brelse(bp); 943 return (NULL); 944 } 945 for (i = 0; i < frags; i++) 946 clrbit(cg_blksfree(cgp), bno + i); 947 cgp->cg_cs.cs_nffree -= frags; 948 fs->fs_cstotal.cs_nffree -= frags; 949 fs->fs_cs(fs, cg).cs_nffree -= frags; 950 fs->fs_fmod = 1; 951 cgp->cg_frsum[allocsiz]--; 952 if (frags != allocsiz) 953 cgp->cg_frsum[allocsiz - frags]++; 954#if REV_ENDIAN_FS 955 if (rev_endian) 956 byte_swap_cgout(cgp,fs); 957#endif /* REV_ENDIAN_FS */ 958 buf_bdwrite(bp); 959 return (cg * fs->fs_fpg + bno); 960} 961 962/* 963 * Allocate a block in a cylinder group. 964 * 965 * This algorithm implements the following policy: 966 * 1) allocate the requested block. 967 * 2) allocate a rotationally optimal block in the same cylinder. 968 * 3) allocate the next available block on the block rotor for the 969 * specified cylinder group. 970 * Note that this routine only allocates fs_bsize blocks; these 971 * blocks may be fragmented by the routine that allocates them. 972 */ 973static ufs_daddr_t 974ffs_alloccgblk(fs, cgp, bpref) 975 register struct fs *fs; 976 register struct cg *cgp; 977 ufs_daddr_t bpref; 978{ 979 ufs_daddr_t bno, blkno; 980 int cylno, pos, delta; 981 short *cylbp; 982 register int i; 983 984 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 985 bpref = cgp->cg_rotor; 986 goto norot; 987 } 988 bpref = blknum(fs, bpref); 989 bpref = dtogd(fs, bpref); 990 /* 991 * if the requested block is available, use it 992 */ 993 if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) { 994 bno = bpref; 995 goto gotit; 996 } 997 if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) { 998 /* 999 * Block layout information is not available. 1000 * Leaving bpref unchanged means we take the 1001 * next available free block following the one 1002 * we just allocated. Hopefully this will at 1003 * least hit a track cache on drives of unknown 1004 * geometry (e.g. SCSI). 1005 */ 1006 goto norot; 1007 } 1008 /* 1009 * check for a block available on the same cylinder 1010 */ 1011 cylno = cbtocylno(fs, bpref); 1012 if (cg_blktot(cgp)[cylno] == 0) 1013 goto norot; 1014 /* 1015 * check the summary information to see if a block is 1016 * available in the requested cylinder starting at the 1017 * requested rotational position and proceeding around. 1018 */ 1019 cylbp = cg_blks(fs, cgp, cylno); 1020 pos = cbtorpos(fs, bpref); 1021 for (i = pos; i < fs->fs_nrpos; i++) 1022 if (cylbp[i] > 0) 1023 break; 1024 if (i == fs->fs_nrpos) 1025 for (i = 0; i < pos; i++) 1026 if (cylbp[i] > 0) 1027 break; 1028 if (cylbp[i] > 0) { 1029 /* 1030 * found a rotational position, now find the actual 1031 * block. A panic if none is actually there. 1032 */ 1033 pos = cylno % fs->fs_cpc; 1034 bno = (cylno - pos) * fs->fs_spc / NSPB(fs); 1035 if (fs_postbl(fs, pos)[i] == -1) { 1036 printf("pos = %d, i = %d, fs = %s\n", 1037 pos, i, fs->fs_fsmnt); 1038 panic("ffs_alloccgblk: cyl groups corrupted"); 1039 } 1040 for (i = fs_postbl(fs, pos)[i];; ) { 1041 if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) { 1042 bno = blkstofrags(fs, (bno + i)); 1043 goto gotit; 1044 } 1045 delta = fs_rotbl(fs)[i]; 1046 if (delta <= 0 || 1047 delta + i > fragstoblks(fs, fs->fs_fpg)) 1048 break; 1049 i += delta; 1050 } 1051 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); 1052 panic("ffs_alloccgblk: can't find blk in cyl"); 1053 } 1054norot: 1055 /* 1056 * no blocks in the requested cylinder, so take next 1057 * available one in this cylinder group. 1058 */ 1059 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1060 if (bno < 0) 1061 return (NULL); 1062 cgp->cg_rotor = bno; 1063gotit: 1064 blkno = fragstoblks(fs, bno); 1065 ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno); 1066 ffs_clusteracct(fs, cgp, blkno, -1); 1067 cgp->cg_cs.cs_nbfree--; 1068 fs->fs_cstotal.cs_nbfree--; 1069 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1070 cylno = cbtocylno(fs, bno); 1071 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; 1072 cg_blktot(cgp)[cylno]--; 1073 fs->fs_fmod = 1; 1074 return (cgp->cg_cgx * fs->fs_fpg + bno); 1075} 1076 1077/* 1078 * Determine whether a cluster can be allocated. 1079 * 1080 * We do not currently check for optimal rotational layout if there 1081 * are multiple choices in the same cylinder group. Instead we just 1082 * take the first one that we find following bpref. 1083 */ 1084static ufs_daddr_t 1085ffs_clusteralloc(ip, cg, bpref, len) 1086 struct inode *ip; 1087 int cg; 1088 ufs_daddr_t bpref; 1089 int len; 1090{ 1091 register struct fs *fs; 1092 register struct cg *cgp; 1093 struct buf *bp; 1094 int i, got, run, bno, bit, map; 1095 u_char *mapp; 1096 int32_t *lp; 1097#if REV_ENDIAN_FS 1098 struct vnode *vp=ITOV(ip); 1099 struct mount *mp=vp->v_mount; 1100 int rev_endian=(mp->mnt_flag & MNT_REVEND); 1101#endif /* REV_ENDIAN_FS */ 1102 1103 fs = ip->i_fs; 1104 if (fs->fs_maxcluster[cg] < len) 1105 return (NULL); 1106 if (buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), (int)fs->fs_cgsize, 1107 NOCRED, &bp)) 1108 goto fail; 1109 cgp = (struct cg *)buf_dataptr(bp); 1110#if REV_ENDIAN_FS 1111 if (rev_endian) 1112 byte_swap_cgin(cgp,fs); 1113#endif /* REV_ENDIAN_FS */ 1114 if (!cg_chkmagic(cgp)) { 1115#if REV_ENDIAN_FS 1116 if (rev_endian) 1117 byte_swap_cgout(cgp,fs); 1118#endif /* REV_ENDIAN_FS */ 1119 goto fail; 1120 } 1121 /* 1122 * Check to see if a cluster of the needed size (or bigger) is 1123 * available in this cylinder group. 1124 */ 1125 lp = &cg_clustersum(cgp)[len]; 1126 for (i = len; i <= fs->fs_contigsumsize; i++) 1127 if (*lp++ > 0) 1128 break; 1129 if (i > fs->fs_contigsumsize) { 1130 /* 1131 * This is the first time looking for a cluster in this 1132 * cylinder group. Update the cluster summary information 1133 * to reflect the true maximum sized cluster so that 1134 * future cluster allocation requests can avoid reading 1135 * the cylinder group map only to find no clusters. 1136 */ 1137 lp = &cg_clustersum(cgp)[len - 1]; 1138 for (i = len - 1; i > 0; i--) 1139 if (*lp-- > 0) 1140 break; 1141 fs->fs_maxcluster[cg] = i; 1142#if REV_ENDIAN_FS 1143 if (rev_endian) 1144 byte_swap_cgout(cgp,fs); 1145#endif /* REV_ENDIAN_FS */ 1146 goto fail; 1147 } 1148 /* 1149 * Search the cluster map to find a big enough cluster. 1150 * We take the first one that we find, even if it is larger 1151 * than we need as we prefer to get one close to the previous 1152 * block allocation. We do not search before the current 1153 * preference point as we do not want to allocate a block 1154 * that is allocated before the previous one (as we will 1155 * then have to wait for another pass of the elevator 1156 * algorithm before it will be read). We prefer to fail and 1157 * be recalled to try an allocation in the next cylinder group. 1158 */ 1159 if (dtog(fs, bpref) != cg) 1160 bpref = 0; 1161 else 1162 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1163 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1164 map = *mapp++; 1165 bit = 1 << (bpref % NBBY); 1166 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 1167 if ((map & bit) == 0) { 1168 run = 0; 1169 } else { 1170 run++; 1171 if (run == len) 1172 break; 1173 } 1174 if ((got & (NBBY - 1)) != (NBBY - 1)) { 1175 bit <<= 1; 1176 } else { 1177 map = *mapp++; 1178 bit = 1; 1179 } 1180 } 1181 if (got == cgp->cg_nclusterblks) { 1182#if REV_ENDIAN_FS 1183 if (rev_endian) 1184 byte_swap_cgout(cgp,fs); 1185#endif /* REV_ENDIAN_FS */ 1186 goto fail; 1187 } 1188 /* 1189 * Allocate the cluster that we have found. 1190 */ 1191 for (i = 1; i <= len; i++) 1192 if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i)) 1193 panic("ffs_clusteralloc: map mismatch"); 1194 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1); 1195 if (dtog(fs, bno) != cg) 1196 panic("ffs_clusteralloc: allocated out of group"); 1197 len = blkstofrags(fs, len); 1198 for (i = 0; i < len; i += fs->fs_frag) 1199 if ((got = ffs_alloccgblk(fs, cgp, bno + i)) != bno + i) 1200 panic("ffs_clusteralloc: lost block"); 1201#if REV_ENDIAN_FS 1202 if (rev_endian) 1203 byte_swap_cgout(cgp,fs); 1204#endif /* REV_ENDIAN_FS */ 1205 buf_bdwrite(bp); 1206 return (bno); 1207 1208fail: 1209 buf_brelse(bp); 1210 return (0); 1211} 1212 1213/* 1214 * Determine whether an inode can be allocated. 1215 * 1216 * Check to see if an inode is available, and if it is, 1217 * allocate it using the following policy: 1218 * 1) allocate the requested inode. 1219 * 2) allocate the next available inode after the requested 1220 * inode in the specified cylinder group. 1221 */ 1222static ino_t 1223ffs_nodealloccg(ip, cg, ipref, mode) 1224 struct inode *ip; 1225 int cg; 1226 ufs_daddr_t ipref; 1227 int mode; 1228{ 1229 register struct fs *fs; 1230 register struct cg *cgp; 1231 struct buf *bp; 1232 struct timeval tv; 1233 int error, start, len, loc, map, i; 1234#if REV_ENDIAN_FS 1235 struct vnode *vp=ITOV(ip); 1236 struct mount *mp=vp->v_mount; 1237 int rev_endian=(mp->mnt_flag & MNT_REVEND); 1238#endif /* REV_ENDIAN_FS */ 1239 1240 fs = ip->i_fs; 1241 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1242 return (NULL); 1243 error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), 1244 (int)fs->fs_cgsize, NOCRED, &bp); 1245 if (error) { 1246 buf_brelse(bp); 1247 return (NULL); 1248 } 1249 cgp = (struct cg *)buf_dataptr(bp); 1250#if REV_ENDIAN_FS 1251 if (rev_endian) 1252 byte_swap_cgin(cgp,fs); 1253#endif /* REV_ENDIAN_FS */ 1254 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1255#if REV_ENDIAN_FS 1256 if (rev_endian) 1257 byte_swap_cgout(cgp,fs); 1258#endif /* REV_ENDIAN_FS */ 1259 buf_brelse(bp); 1260 return (NULL); 1261 } 1262 1263 microtime(&tv); 1264 cgp->cg_time = tv.tv_sec; 1265 if (ipref) { 1266 ipref %= fs->fs_ipg; 1267 if (isclr(cg_inosused(cgp), ipref)) 1268 goto gotit; 1269 } 1270 start = cgp->cg_irotor / NBBY; 1271 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1272 loc = skpc(0xff, len, &cg_inosused(cgp)[start]); 1273 if (loc == 0) { 1274 len = start + 1; 1275 start = 0; 1276 loc = skpc(0xff, len, &cg_inosused(cgp)[0]); 1277 if (loc == 0) { 1278 printf("cg = %d, irotor = %d, fs = %s\n", 1279 cg, cgp->cg_irotor, fs->fs_fsmnt); 1280 panic("ffs_nodealloccg: map corrupted"); 1281 /* NOTREACHED */ 1282 } 1283 } 1284 i = start + len - loc; 1285 map = cg_inosused(cgp)[i]; 1286 ipref = i * NBBY; 1287 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1288 if ((map & i) == 0) { 1289 cgp->cg_irotor = ipref; 1290 goto gotit; 1291 } 1292 } 1293 printf("fs = %s\n", fs->fs_fsmnt); 1294 panic("ffs_nodealloccg: block not in map"); 1295 /* NOTREACHED */ 1296gotit: 1297 setbit(cg_inosused(cgp), ipref); 1298 cgp->cg_cs.cs_nifree--; 1299 fs->fs_cstotal.cs_nifree--; 1300 fs->fs_cs(fs, cg).cs_nifree--; 1301 fs->fs_fmod = 1; 1302 if ((mode & IFMT) == IFDIR) { 1303 cgp->cg_cs.cs_ndir++; 1304 fs->fs_cstotal.cs_ndir++; 1305 fs->fs_cs(fs, cg).cs_ndir++; 1306 } 1307#if REV_ENDIAN_FS 1308 if (rev_endian) 1309 byte_swap_cgout(cgp,fs); 1310#endif /* REV_ENDIAN_FS */ 1311 buf_bdwrite(bp); 1312 return (cg * fs->fs_ipg + ipref); 1313} 1314 1315/* 1316 * Free a block or fragment. 1317 * 1318 * The specified block or fragment is placed back in the 1319 * free map. If a fragment is deallocated, a possible 1320 * block reassembly is checked. 1321 */ 1322void 1323ffs_blkfree(ip, bno, size) 1324 register struct inode *ip; 1325 ufs_daddr_t bno; 1326 long size; 1327{ 1328 register struct fs *fs; 1329 register struct cg *cgp; 1330 struct buf *bp; 1331 struct timeval tv; 1332 ufs_daddr_t blkno; 1333 int i, error, cg, blk, frags, bbase; 1334#if REV_ENDIAN_FS 1335 struct vnode *vp=ITOV(ip); 1336 struct mount *mp=vp->v_mount; 1337 int rev_endian=(mp->mnt_flag & MNT_REVEND); 1338#endif /* REV_ENDIAN_FS */ 1339 1340 fs = ip->i_fs; 1341 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1342 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", 1343 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); 1344 panic("blkfree: bad size"); 1345 } 1346 cg = dtog(fs, bno); 1347 if ((u_int)bno >= fs->fs_size) { 1348 printf("bad block %d, ino %d\n", bno, ip->i_number); 1349 ffs_fserr(fs, ip->i_uid, "bad block"); 1350 return; 1351 } 1352 error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), 1353 (int)fs->fs_cgsize, NOCRED, &bp); 1354 if (error) { 1355 buf_brelse(bp); 1356 return; 1357 } 1358 cgp = (struct cg *)buf_dataptr(bp); 1359#if REV_ENDIAN_FS 1360 if (rev_endian) 1361 byte_swap_cgin(cgp,fs); 1362#endif /* REV_ENDIAN_FS */ 1363 if (!cg_chkmagic(cgp)) { 1364#if REV_ENDIAN_FS 1365 if (rev_endian) 1366 byte_swap_cgout(cgp,fs); 1367#endif /* REV_ENDIAN_FS */ 1368 buf_brelse(bp); 1369 return; 1370 } 1371 microtime(&tv); 1372 cgp->cg_time = tv.tv_sec; 1373 bno = dtogd(fs, bno); 1374 if (size == fs->fs_bsize) { 1375 blkno = fragstoblks(fs, bno); 1376 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { 1377 printf("dev = 0x%x, block = %d, fs = %s\n", 1378 ip->i_dev, bno, fs->fs_fsmnt); 1379 panic("blkfree: freeing free block"); 1380 } 1381 ffs_setblock(fs, cg_blksfree(cgp), blkno); 1382 ffs_clusteracct(fs, cgp, blkno, 1); 1383 cgp->cg_cs.cs_nbfree++; 1384 fs->fs_cstotal.cs_nbfree++; 1385 fs->fs_cs(fs, cg).cs_nbfree++; 1386 i = cbtocylno(fs, bno); 1387 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; 1388 cg_blktot(cgp)[i]++; 1389 } else { 1390 bbase = bno - fragnum(fs, bno); 1391 /* 1392 * decrement the counts associated with the old frags 1393 */ 1394 blk = blkmap(fs, cg_blksfree(cgp), bbase); 1395 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1396 /* 1397 * deallocate the fragment 1398 */ 1399 frags = numfrags(fs, size); 1400 for (i = 0; i < frags; i++) { 1401 if (isset(cg_blksfree(cgp), bno + i)) { 1402 printf("dev = 0x%x, block = %d, fs = %s\n", 1403 ip->i_dev, bno + i, fs->fs_fsmnt); 1404 panic("blkfree: freeing free frag"); 1405 } 1406 setbit(cg_blksfree(cgp), bno + i); 1407 } 1408 cgp->cg_cs.cs_nffree += i; 1409 fs->fs_cstotal.cs_nffree += i; 1410 fs->fs_cs(fs, cg).cs_nffree += i; 1411 /* 1412 * add back in counts associated with the new frags 1413 */ 1414 blk = blkmap(fs, cg_blksfree(cgp), bbase); 1415 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1416 /* 1417 * if a complete block has been reassembled, account for it 1418 */ 1419 blkno = fragstoblks(fs, bbase); 1420 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { 1421 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1422 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1423 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1424 ffs_clusteracct(fs, cgp, blkno, 1); 1425 cgp->cg_cs.cs_nbfree++; 1426 fs->fs_cstotal.cs_nbfree++; 1427 fs->fs_cs(fs, cg).cs_nbfree++; 1428 i = cbtocylno(fs, bbase); 1429 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; 1430 cg_blktot(cgp)[i]++; 1431 } 1432 } 1433 fs->fs_fmod = 1; 1434#if REV_ENDIAN_FS 1435 if (rev_endian) 1436 byte_swap_cgout(cgp,fs); 1437#endif /* REV_ENDIAN_FS */ 1438 buf_bdwrite(bp); 1439} 1440 1441#if DIAGNOSTIC 1442/* 1443 * Verify allocation of a block or fragment. Returns true if block or 1444 * fragment is allocated, false if it is free. 1445 */ 1446ffs_checkblk(ip, bno, size) 1447 struct inode *ip; 1448 ufs_daddr_t bno; 1449 long size; 1450{ 1451 struct fs *fs; 1452 struct cg *cgp; 1453 struct buf *bp; 1454 int i, error, frags, free; 1455#if REV_ENDIAN_FS 1456 struct vnode *vp=ITOV(ip); 1457 struct mount *mp=vp->v_mount; 1458 int rev_endian=(mp->mnt_flag & MNT_REVEND); 1459#endif /* REV_ENDIAN_FS */ 1460 1461 fs = ip->i_fs; 1462 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1463 printf("bsize = %d, size = %d, fs = %s\n", 1464 fs->fs_bsize, size, fs->fs_fsmnt); 1465 panic("checkblk: bad size"); 1466 } 1467 if ((u_int)bno >= fs->fs_size) 1468 panic("checkblk: bad block %d", bno); 1469 error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, dtog(fs, bno)))), 1470 (int)fs->fs_cgsize, NOCRED, &bp); 1471 if (error) { 1472 buf_brelse(bp); 1473 return; 1474 } 1475 cgp = (struct cg *)buf_dataptr(bp); 1476#if REV_ENDIAN_FS 1477 if (rev_endian) 1478 byte_swap_cgin(cgp,fs); 1479#endif /* REV_ENDIAN_FS */ 1480 if (!cg_chkmagic(cgp)) { 1481#if REV_ENDIAN_FS 1482 if (rev_endian) 1483 byte_swap_cgout(cgp,fs); 1484#endif /* REV_ENDIAN_FS */ 1485 buf_brelse(bp); 1486 return; 1487 } 1488 bno = dtogd(fs, bno); 1489 if (size == fs->fs_bsize) { 1490 free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno)); 1491 } else { 1492 frags = numfrags(fs, size); 1493 for (free = 0, i = 0; i < frags; i++) 1494 if (isset(cg_blksfree(cgp), bno + i)) 1495 free++; 1496 if (free != 0 && free != frags) 1497 panic("checkblk: partially free fragment"); 1498 } 1499#if REV_ENDIAN_FS 1500 if (rev_endian) 1501 byte_swap_cgout(cgp,fs); 1502#endif /* REV_ENDIAN_FS */ 1503 buf_brelse(bp); 1504 return (!free); 1505} 1506#endif /* DIAGNOSTIC */ 1507 1508/* 1509 * Free an inode. 1510 * 1511 * The specified inode is placed back in the free map. 1512 */ 1513int 1514ffs_vfree(struct vnode *vp, ino_t ino, int mode) 1515{ 1516 register struct fs *fs; 1517 register struct cg *cgp; 1518 register struct inode *pip; 1519 struct buf *bp; 1520 struct timeval tv; 1521 int error, cg; 1522#if REV_ENDIAN_FS 1523 struct mount *mp=vp->v_mount; 1524 int rev_endian=(mp->mnt_flag & MNT_REVEND); 1525#endif /* REV_ENDIAN_FS */ 1526 1527 pip = VTOI(vp); 1528 fs = pip->i_fs; 1529 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1530 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n", 1531 pip->i_dev, ino, fs->fs_fsmnt); 1532 cg = ino_to_cg(fs, ino); 1533 error = (int)buf_bread(pip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), 1534 (int)fs->fs_cgsize, NOCRED, &bp); 1535 if (error) { 1536 buf_brelse(bp); 1537 return (0); 1538 } 1539 cgp = (struct cg *)buf_dataptr(bp); 1540#if REV_ENDIAN_FS 1541 if (rev_endian) 1542 byte_swap_cgin(cgp,fs); 1543#endif /* REV_ENDIAN_FS */ 1544 if (!cg_chkmagic(cgp)) { 1545#if REV_ENDIAN_FS 1546 if (rev_endian) 1547 byte_swap_cgout(cgp,fs); 1548#endif /* REV_ENDIAN_FS */ 1549 buf_brelse(bp); 1550 return (0); 1551 } 1552 microtime(&tv); 1553 cgp->cg_time = tv.tv_sec; 1554 ino %= fs->fs_ipg; 1555 if (isclr(cg_inosused(cgp), ino)) { 1556 printf("dev = 0x%x, ino = %d, fs = %s\n", 1557 pip->i_dev, ino, fs->fs_fsmnt); 1558 if (fs->fs_ronly == 0) 1559 panic("ifree: freeing free inode"); 1560 } 1561 clrbit(cg_inosused(cgp), ino); 1562 if (ino < cgp->cg_irotor) 1563 cgp->cg_irotor = ino; 1564 cgp->cg_cs.cs_nifree++; 1565 fs->fs_cstotal.cs_nifree++; 1566 fs->fs_cs(fs, cg).cs_nifree++; 1567 if ((mode & IFMT) == IFDIR) { 1568 cgp->cg_cs.cs_ndir--; 1569 fs->fs_cstotal.cs_ndir--; 1570 fs->fs_cs(fs, cg).cs_ndir--; 1571 } 1572 fs->fs_fmod = 1; 1573#if REV_ENDIAN_FS 1574 if (rev_endian) 1575 byte_swap_cgout(cgp,fs); 1576#endif /* REV_ENDIAN_FS */ 1577 buf_bdwrite(bp); 1578 return (0); 1579} 1580 1581/* 1582 * Find a block of the specified size in the specified cylinder group. 1583 * 1584 * It is a panic if a request is made to find a block if none are 1585 * available. 1586 */ 1587static ufs_daddr_t 1588ffs_mapsearch(fs, cgp, bpref, allocsiz) 1589 register struct fs *fs; 1590 register struct cg *cgp; 1591 ufs_daddr_t bpref; 1592 int allocsiz; 1593{ 1594 ufs_daddr_t bno; 1595 int start, len, loc, i; 1596 int blk, field, subfield, pos; 1597 1598 /* 1599 * find the fragment by searching through the free block 1600 * map for an appropriate bit pattern 1601 */ 1602 if (bpref) 1603 start = dtogd(fs, bpref) / NBBY; 1604 else 1605 start = cgp->cg_frotor / NBBY; 1606 len = howmany(fs->fs_fpg, NBBY) - start; 1607 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start], 1608 (u_char *)fragtbl[fs->fs_frag], 1609 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1610 if (loc == 0) { 1611 len = start + 1; 1612 start = 0; 1613 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0], 1614 (u_char *)fragtbl[fs->fs_frag], 1615 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1616 if (loc == 0) { 1617 printf("start = %d, len = %d, fs = %s\n", 1618 start, len, fs->fs_fsmnt); 1619 panic("ffs_alloccg: map corrupted"); 1620 /* NOTREACHED */ 1621 } 1622 } 1623 bno = (start + len - loc) * NBBY; 1624 cgp->cg_frotor = bno; 1625 /* 1626 * found the byte in the map 1627 * sift through the bits to find the selected frag 1628 */ 1629 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 1630 blk = blkmap(fs, cg_blksfree(cgp), bno); 1631 blk <<= 1; 1632 field = around[allocsiz]; 1633 subfield = inside[allocsiz]; 1634 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 1635 if ((blk & field) == subfield) 1636 return (bno + pos); 1637 field <<= 1; 1638 subfield <<= 1; 1639 } 1640 } 1641 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt); 1642 panic("ffs_alloccg: block not in map"); 1643 return (-1); 1644} 1645 1646/* 1647 * Update the cluster map because of an allocation or free. 1648 * 1649 * Cnt == 1 means free; cnt == -1 means allocating. 1650 */ 1651static void 1652ffs_clusteracct(struct fs *fs, struct cg *cgp, ufs_daddr_t blkno, int cnt) 1653{ 1654 int32_t *sump; 1655 int32_t *lp; 1656 u_char *freemapp, *mapp; 1657 int i, start, end, forw, back, map, bit; 1658 1659 if (fs->fs_contigsumsize <= 0) 1660 return; 1661 freemapp = cg_clustersfree(cgp); 1662 sump = cg_clustersum(cgp); 1663 /* 1664 * Allocate or clear the actual block. 1665 */ 1666 if (cnt > 0) 1667 setbit(freemapp, blkno); 1668 else 1669 clrbit(freemapp, blkno); 1670 /* 1671 * Find the size of the cluster going forward. 1672 */ 1673 start = blkno + 1; 1674 end = start + fs->fs_contigsumsize; 1675 if (end >= cgp->cg_nclusterblks) 1676 end = cgp->cg_nclusterblks; 1677 mapp = &freemapp[start / NBBY]; 1678 map = *mapp++; 1679 bit = 1 << (start % NBBY); 1680 for (i = start; i < end; i++) { 1681 if ((map & bit) == 0) 1682 break; 1683 if ((i & (NBBY - 1)) != (NBBY - 1)) { 1684 bit <<= 1; 1685 } else { 1686 map = *mapp++; 1687 bit = 1; 1688 } 1689 } 1690 forw = i - start; 1691 /* 1692 * Find the size of the cluster going backward. 1693 */ 1694 start = blkno - 1; 1695 end = start - fs->fs_contigsumsize; 1696 if (end < 0) 1697 end = -1; 1698 mapp = &freemapp[start / NBBY]; 1699 map = *mapp--; 1700 bit = 1 << (start % NBBY); 1701 for (i = start; i > end; i--) { 1702 if ((map & bit) == 0) 1703 break; 1704 if ((i & (NBBY - 1)) != 0) { 1705 bit >>= 1; 1706 } else { 1707 map = *mapp--; 1708 bit = 1 << (NBBY - 1); 1709 } 1710 } 1711 back = start - i; 1712 /* 1713 * Account for old cluster and the possibly new forward and 1714 * back clusters. 1715 */ 1716 i = back + forw + 1; 1717 if (i > fs->fs_contigsumsize) 1718 i = fs->fs_contigsumsize; 1719 sump[i] += cnt; 1720 if (back > 0) 1721 sump[back] -= cnt; 1722 if (forw > 0) 1723 sump[forw] -= cnt; 1724 /* 1725 * Update cluster summary information. 1726 */ 1727 lp = &sump[fs->fs_contigsumsize]; 1728 for (i = fs->fs_contigsumsize; i > 0; i--) 1729 if (*lp-- > 0) 1730 break; 1731 fs->fs_maxcluster[cgp->cg_cgx] = i; 1732} 1733 1734/* 1735 * Fserr prints the name of a file system with an error diagnostic. 1736 * 1737 * The form of the error message is: 1738 * fs: error message 1739 */ 1740static void 1741ffs_fserr(fs, uid, cp) 1742 struct fs *fs; 1743 u_int uid; 1744 char *cp; 1745{ 1746 1747 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); 1748} 1749