ffs_alloc.c revision 96755
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1982, 1986, 1989, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer. 101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer in the 121541Srgrimes * documentation and/or other materials provided with the distribution. 131541Srgrimes * 3. All advertising materials mentioning features or use of this software 141541Srgrimes * must display the following acknowledgement: 151541Srgrimes * This product includes software developed by the University of 161541Srgrimes * California, Berkeley and its contributors. 171541Srgrimes * 4. Neither the name of the University nor the names of its contributors 181541Srgrimes * may be used to endorse or promote products derived from this software 191541Srgrimes * without specific prior written permission. 201541Srgrimes * 211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311541Srgrimes * SUCH DAMAGE. 321541Srgrimes * 3322521Sdyson * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 3450477Speter * $FreeBSD: head/sys/ufs/ffs/ffs_alloc.c 96755 2002-05-16 21:28:32Z trhodes $ 351541Srgrimes */ 361541Srgrimes 3713260Swollman#include "opt_quota.h" 3813260Swollman 391541Srgrimes#include <sys/param.h> 401541Srgrimes#include <sys/systm.h> 4160041Sphk#include <sys/bio.h> 421541Srgrimes#include <sys/buf.h> 4350253Sbde#include <sys/conf.h> 4474548Smckusick#include <sys/file.h> 451541Srgrimes#include <sys/proc.h> 461541Srgrimes#include <sys/vnode.h> 471541Srgrimes#include <sys/mount.h> 4841124Sdg#include <sys/kernel.h> 4912911Sphk#include <sys/sysctl.h> 501541Srgrimes#include <sys/syslog.h> 511541Srgrimes 5259241Srwatson#include <ufs/ufs/extattr.h> 531541Srgrimes#include <ufs/ufs/quota.h> 541541Srgrimes#include <ufs/ufs/inode.h> 5541124Sdg#include <ufs/ufs/ufs_extern.h> 5630474Sphk#include <ufs/ufs/ufsmount.h> 571541Srgrimes 581541Srgrimes#include <ufs/ffs/fs.h> 591541Srgrimes#include <ufs/ffs/ffs_extern.h> 601541Srgrimes 6192728Salfredtypedef ufs_daddr_t allocfcn_t(struct inode *ip, int cg, ufs_daddr_t bpref, 6292728Salfred int size); 6312590Sbde 6492728Salfredstatic ufs_daddr_t ffs_alloccg(struct inode *, int, ufs_daddr_t, int); 6534266Sjulianstatic ufs_daddr_t 6692728Salfred ffs_alloccgblk(struct inode *, struct buf *, ufs_daddr_t); 6731352Sbde#ifdef DIAGNOSTIC 6892728Salfredstatic int ffs_checkblk(struct inode *, ufs_daddr_t, long); 6931352Sbde#endif 7092728Salfredstatic ufs_daddr_t ffs_clusteralloc(struct inode *, int, ufs_daddr_t, int); 7192728Salfredstatic ino_t ffs_dirpref(struct inode *); 7292728Salfredstatic ufs_daddr_t ffs_fragextend(struct inode *, int, long, int, int); 7392728Salfredstatic void ffs_fserr(struct fs *, ino_t, char *); 741541Srgrimesstatic u_long ffs_hashalloc 7592728Salfred (struct inode *, int, long, int, allocfcn_t *); 7692728Salfredstatic ino_t ffs_nodealloccg(struct inode *, int, ufs_daddr_t, int); 7792728Salfredstatic ufs_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs_daddr_t, int); 781541Srgrimes 791541Srgrimes/* 8096755Strhodes * Allocate a block in the filesystem. 818876Srgrimes * 821541Srgrimes * The size of the requested block is given, which must be some 831541Srgrimes * multiple of fs_fsize and <= fs_bsize. 841541Srgrimes * A preference may be optionally specified. If a preference is given 851541Srgrimes * the following hierarchy is used to allocate a block: 861541Srgrimes * 1) allocate the requested block. 871541Srgrimes * 2) allocate a rotationally optimal block in the same cylinder. 881541Srgrimes * 3) allocate a block in the same cylinder group. 891541Srgrimes * 4) quadradically rehash into other cylinder groups, until an 901541Srgrimes * available block is located. 911541Srgrimes * If no block preference is given the following heirarchy is used 921541Srgrimes * to allocate a block: 931541Srgrimes * 1) allocate a block in the cylinder group that contains the 941541Srgrimes * inode for the file. 951541Srgrimes * 2) quadradically rehash into other cylinder groups, until an 961541Srgrimes * available block is located. 971541Srgrimes */ 981549Srgrimesint 991541Srgrimesffs_alloc(ip, lbn, bpref, size, cred, bnp) 10096506Sphk struct inode *ip; 10122521Sdyson ufs_daddr_t lbn, bpref; 1021541Srgrimes int size; 1031541Srgrimes struct ucred *cred; 10422521Sdyson ufs_daddr_t *bnp; 1051541Srgrimes{ 10696506Sphk struct fs *fs; 10722521Sdyson ufs_daddr_t bno; 10889637Smckusick int cg, reclaimed; 1096357Sphk#ifdef QUOTA 1106357Sphk int error; 1116357Sphk#endif 1128876Srgrimes 1131541Srgrimes *bnp = 0; 1141541Srgrimes fs = ip->i_fs; 1151541Srgrimes#ifdef DIAGNOSTIC 1161541Srgrimes if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 11750253Sbde printf("dev = %s, bsize = %ld, size = %d, fs = %s\n", 11850253Sbde devtoname(ip->i_dev), (long)fs->fs_bsize, size, 11950253Sbde fs->fs_fsmnt); 1201541Srgrimes panic("ffs_alloc: bad size"); 1211541Srgrimes } 1221541Srgrimes if (cred == NOCRED) 1237170Sdg panic("ffs_alloc: missing credential"); 1241541Srgrimes#endif /* DIAGNOSTIC */ 12589637Smckusick reclaimed = 0; 12689637Smckusickretry: 1271541Srgrimes if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 1281541Srgrimes goto nospace; 12993593Sjhb if (suser_cred(cred, PRISON_ROOT) && 13029609Sphk freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) 1311541Srgrimes goto nospace; 1321541Srgrimes#ifdef QUOTA 1333487Sphk error = chkdq(ip, (long)btodb(size), cred, 0); 1343487Sphk if (error) 1351541Srgrimes return (error); 1361541Srgrimes#endif 1371541Srgrimes if (bpref >= fs->fs_size) 1381541Srgrimes bpref = 0; 1391541Srgrimes if (bpref == 0) 1401541Srgrimes cg = ino_to_cg(fs, ip->i_number); 1411541Srgrimes else 1421541Srgrimes cg = dtog(fs, bpref); 14322521Sdyson bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, 14422521Sdyson ffs_alloccg); 1451541Srgrimes if (bno > 0) { 1461541Srgrimes ip->i_blocks += btodb(size); 1471541Srgrimes ip->i_flag |= IN_CHANGE | IN_UPDATE; 1481541Srgrimes *bnp = bno; 1491541Srgrimes return (0); 1501541Srgrimes } 1511541Srgrimes#ifdef QUOTA 1521541Srgrimes /* 1531541Srgrimes * Restore user's disk quota because allocation failed. 1541541Srgrimes */ 1551541Srgrimes (void) chkdq(ip, (long)-btodb(size), cred, FORCE); 1561541Srgrimes#endif 1571541Srgrimesnospace: 15889637Smckusick if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 15989637Smckusick reclaimed = 1; 16089637Smckusick softdep_request_cleanup(fs, ITOV(ip)); 16189637Smckusick goto retry; 16289637Smckusick } 16396755Strhodes ffs_fserr(fs, ip->i_number, "filesystem full"); 16496755Strhodes uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); 1651541Srgrimes return (ENOSPC); 1661541Srgrimes} 1671541Srgrimes 1681541Srgrimes/* 1691541Srgrimes * Reallocate a fragment to a bigger size 1701541Srgrimes * 1711541Srgrimes * The number and size of the old block is given, and a preference 1721541Srgrimes * and new size is also specified. The allocator attempts to extend 1731541Srgrimes * the original block. Failing that, the regular block allocator is 1741541Srgrimes * invoked to get an appropriate block. 1751541Srgrimes */ 1761549Srgrimesint 1771541Srgrimesffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) 17896506Sphk struct inode *ip; 17922521Sdyson ufs_daddr_t lbprev; 18022521Sdyson ufs_daddr_t bpref; 1811541Srgrimes int osize, nsize; 1821541Srgrimes struct ucred *cred; 1831541Srgrimes struct buf **bpp; 1841541Srgrimes{ 18589637Smckusick struct vnode *vp; 18689637Smckusick struct fs *fs; 1871541Srgrimes struct buf *bp; 18889637Smckusick int cg, request, error, reclaimed; 18922521Sdyson ufs_daddr_t bprev, bno; 1908876Srgrimes 1911541Srgrimes *bpp = 0; 19289637Smckusick vp = ITOV(ip); 1931541Srgrimes fs = ip->i_fs; 1941541Srgrimes#ifdef DIAGNOSTIC 19589637Smckusick if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 19662976Smckusick panic("ffs_realloccg: allocation on suspended filesystem"); 1971541Srgrimes if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 1981541Srgrimes (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 1991541Srgrimes printf( 20050253Sbde "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", 20150253Sbde devtoname(ip->i_dev), (long)fs->fs_bsize, osize, 2028456Srgrimes nsize, fs->fs_fsmnt); 2031541Srgrimes panic("ffs_realloccg: bad size"); 2041541Srgrimes } 2051541Srgrimes if (cred == NOCRED) 2067170Sdg panic("ffs_realloccg: missing credential"); 2071541Srgrimes#endif /* DIAGNOSTIC */ 20889637Smckusick reclaimed = 0; 20989637Smckusickretry: 21093593Sjhb if (suser_cred(cred, PRISON_ROOT) && 21129609Sphk freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) 2121541Srgrimes goto nospace; 2131541Srgrimes if ((bprev = ip->i_db[lbprev]) == 0) { 21450253Sbde printf("dev = %s, bsize = %ld, bprev = %ld, fs = %s\n", 21550253Sbde devtoname(ip->i_dev), (long)fs->fs_bsize, (long)bprev, 21637555Sbde fs->fs_fsmnt); 2171541Srgrimes panic("ffs_realloccg: bad bprev"); 2181541Srgrimes } 2191541Srgrimes /* 2201541Srgrimes * Allocate the extra space in the buffer. 2211541Srgrimes */ 22289637Smckusick error = bread(vp, lbprev, osize, NOCRED, &bp); 2233487Sphk if (error) { 2241541Srgrimes brelse(bp); 2251541Srgrimes return (error); 2261541Srgrimes } 2276864Sdg 2286864Sdg if( bp->b_blkno == bp->b_lblkno) { 2296864Sdg if( lbprev >= NDADDR) 2306864Sdg panic("ffs_realloccg: lbprev out of range"); 2316864Sdg bp->b_blkno = fsbtodb(fs, bprev); 2326864Sdg } 2338876Srgrimes 2341541Srgrimes#ifdef QUOTA 2353487Sphk error = chkdq(ip, (long)btodb(nsize - osize), cred, 0); 2363487Sphk if (error) { 2371541Srgrimes brelse(bp); 2381541Srgrimes return (error); 2391541Srgrimes } 2401541Srgrimes#endif 2411541Srgrimes /* 2421541Srgrimes * Check for extension in the existing location. 2431541Srgrimes */ 2441541Srgrimes cg = dtog(fs, bprev); 2453487Sphk bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize); 2463487Sphk if (bno) { 2471541Srgrimes if (bp->b_blkno != fsbtodb(fs, bno)) 24823560Smpp panic("ffs_realloccg: bad blockno"); 2491541Srgrimes ip->i_blocks += btodb(nsize - osize); 2501541Srgrimes ip->i_flag |= IN_CHANGE | IN_UPDATE; 2517399Sdg allocbuf(bp, nsize); 2521541Srgrimes bp->b_flags |= B_DONE; 2531541Srgrimes bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 2541541Srgrimes *bpp = bp; 2551541Srgrimes return (0); 2561541Srgrimes } 2571541Srgrimes /* 2581541Srgrimes * Allocate a new disk location. 2591541Srgrimes */ 2601541Srgrimes if (bpref >= fs->fs_size) 2611541Srgrimes bpref = 0; 2621541Srgrimes switch ((int)fs->fs_optim) { 2631541Srgrimes case FS_OPTSPACE: 2641541Srgrimes /* 2658876Srgrimes * Allocate an exact sized fragment. Although this makes 2668876Srgrimes * best use of space, we will waste time relocating it if 2671541Srgrimes * the file continues to grow. If the fragmentation is 2681541Srgrimes * less than half of the minimum free reserve, we choose 2691541Srgrimes * to begin optimizing for time. 2701541Srgrimes */ 2711541Srgrimes request = nsize; 2726993Sdg if (fs->fs_minfree <= 5 || 2731541Srgrimes fs->fs_cstotal.cs_nffree > 27458087Smckusick (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100)) 2751541Srgrimes break; 2761541Srgrimes log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 2771541Srgrimes fs->fs_fsmnt); 2781541Srgrimes fs->fs_optim = FS_OPTTIME; 2791541Srgrimes break; 2801541Srgrimes case FS_OPTTIME: 2811541Srgrimes /* 2821541Srgrimes * At this point we have discovered a file that is trying to 2831541Srgrimes * grow a small fragment to a larger fragment. To save time, 2841541Srgrimes * we allocate a full sized block, then free the unused portion. 2851541Srgrimes * If the file continues to grow, the `ffs_fragextend' call 2861541Srgrimes * above will be able to grow it in place without further 2871541Srgrimes * copying. If aberrant programs cause disk fragmentation to 2881541Srgrimes * grow within 2% of the free reserve, we choose to begin 2891541Srgrimes * optimizing for space. 2901541Srgrimes */ 2911541Srgrimes request = fs->fs_bsize; 2921541Srgrimes if (fs->fs_cstotal.cs_nffree < 29358087Smckusick (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100) 2941541Srgrimes break; 2951541Srgrimes log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 2961541Srgrimes fs->fs_fsmnt); 2971541Srgrimes fs->fs_optim = FS_OPTSPACE; 2981541Srgrimes break; 2991541Srgrimes default: 30050253Sbde printf("dev = %s, optim = %ld, fs = %s\n", 30150253Sbde devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt); 3021541Srgrimes panic("ffs_realloccg: bad optim"); 3031541Srgrimes /* NOTREACHED */ 3041541Srgrimes } 30522521Sdyson bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, 30622521Sdyson ffs_alloccg); 3071541Srgrimes if (bno > 0) { 3081541Srgrimes bp->b_blkno = fsbtodb(fs, bno); 30989637Smckusick if (!DOINGSOFTDEP(vp)) 31090098Smckusick ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize, 31190098Smckusick ip->i_number); 3121541Srgrimes if (nsize < request) 31390098Smckusick ffs_blkfree(fs, ip->i_devvp, bno + numfrags(fs, nsize), 31490098Smckusick (long)(request - nsize), ip->i_number); 3151541Srgrimes ip->i_blocks += btodb(nsize - osize); 3161541Srgrimes ip->i_flag |= IN_CHANGE | IN_UPDATE; 3177399Sdg allocbuf(bp, nsize); 3181541Srgrimes bp->b_flags |= B_DONE; 3191541Srgrimes bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 3201541Srgrimes *bpp = bp; 3211541Srgrimes return (0); 3221541Srgrimes } 3231541Srgrimes#ifdef QUOTA 3241541Srgrimes /* 3251541Srgrimes * Restore user's disk quota because allocation failed. 3261541Srgrimes */ 3271541Srgrimes (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE); 3281541Srgrimes#endif 3291541Srgrimes brelse(bp); 3301541Srgrimesnospace: 3311541Srgrimes /* 3321541Srgrimes * no space available 3331541Srgrimes */ 33489637Smckusick if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 33589637Smckusick reclaimed = 1; 33689637Smckusick softdep_request_cleanup(fs, vp); 33789637Smckusick goto retry; 33889637Smckusick } 33996755Strhodes ffs_fserr(fs, ip->i_number, "filesystem full"); 34096755Strhodes uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); 3411541Srgrimes return (ENOSPC); 3421541Srgrimes} 3431541Srgrimes 3441541Srgrimes/* 3451541Srgrimes * Reallocate a sequence of blocks into a contiguous sequence of blocks. 3461541Srgrimes * 3471541Srgrimes * The vnode and an array of buffer pointers for a range of sequential 3481541Srgrimes * logical blocks to be made contiguous is given. The allocator attempts 3491541Srgrimes * to find a range of sequential blocks starting as close as possible to 3501541Srgrimes * an fs_rotdelay offset from the end of the allocation for the logical 35172645Sasmodai * block immediately preceding the current range. If successful, the 3521541Srgrimes * physical block numbers in the buffer pointers and in the inode are 3531541Srgrimes * changed to reflect the new allocation. If unsuccessful, the allocation 3541541Srgrimes * is left unchanged. The success in doing the reallocation is returned. 3551541Srgrimes * Note that the error return is not reflected back to the user. Rather 3561541Srgrimes * the previous block allocation will be used. 3571541Srgrimes */ 35874548Smckusick 35974548SmckusickSYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); 36074548Smckusick 36112911Sphkstatic int doasyncfree = 1; 36274548SmckusickSYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 36322521Sdyson 36431352Sbdestatic int doreallocblks = 1; 36574548SmckusickSYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 36622521Sdyson 36742351Sbde#ifdef DEBUG 36842351Sbdestatic volatile int prtrealloc = 0; 36942351Sbde#endif 37031351Sbde 3711541Srgrimesint 3721541Srgrimesffs_reallocblks(ap) 3731541Srgrimes struct vop_reallocblks_args /* { 3741541Srgrimes struct vnode *a_vp; 3751541Srgrimes struct cluster_save *a_buflist; 3761541Srgrimes } */ *ap; 3771541Srgrimes{ 3781541Srgrimes struct fs *fs; 3791541Srgrimes struct inode *ip; 3801541Srgrimes struct vnode *vp; 3811541Srgrimes struct buf *sbp, *ebp; 38222521Sdyson ufs_daddr_t *bap, *sbap, *ebap = 0; 3831541Srgrimes struct cluster_save *buflist; 38422521Sdyson ufs_daddr_t start_lbn, end_lbn, soff, newblk, blkno; 3851541Srgrimes struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 3861541Srgrimes int i, len, start_lvl, end_lvl, pref, ssize; 3871541Srgrimes 38822521Sdyson if (doreallocblks == 0) 38922521Sdyson return (ENOSPC); 3901541Srgrimes vp = ap->a_vp; 3911541Srgrimes ip = VTOI(vp); 3921541Srgrimes fs = ip->i_fs; 3931541Srgrimes if (fs->fs_contigsumsize <= 0) 3941541Srgrimes return (ENOSPC); 3951541Srgrimes buflist = ap->a_buflist; 3961541Srgrimes len = buflist->bs_nchildren; 3971541Srgrimes start_lbn = buflist->bs_children[0]->b_lblkno; 3981541Srgrimes end_lbn = start_lbn + len - 1; 3991541Srgrimes#ifdef DIAGNOSTIC 40022521Sdyson for (i = 0; i < len; i++) 40122521Sdyson if (!ffs_checkblk(ip, 40222521Sdyson dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 40322521Sdyson panic("ffs_reallocblks: unallocated block 1"); 4041541Srgrimes for (i = 1; i < len; i++) 4051541Srgrimes if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 40622521Sdyson panic("ffs_reallocblks: non-logical cluster"); 40722521Sdyson blkno = buflist->bs_children[0]->b_blkno; 40822521Sdyson ssize = fsbtodb(fs, fs->fs_frag); 40922521Sdyson for (i = 1; i < len - 1; i++) 41022521Sdyson if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 41122521Sdyson panic("ffs_reallocblks: non-physical cluster %d", i); 4121541Srgrimes#endif 4131541Srgrimes /* 4141541Srgrimes * If the latest allocation is in a new cylinder group, assume that 4151541Srgrimes * the filesystem has decided to move and do not force it back to 4161541Srgrimes * the previous cylinder group. 4171541Srgrimes */ 4181541Srgrimes if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 4191541Srgrimes dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 4201541Srgrimes return (ENOSPC); 4211541Srgrimes if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 4221541Srgrimes ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 4231541Srgrimes return (ENOSPC); 4241541Srgrimes /* 4251541Srgrimes * Get the starting offset and block map for the first block. 4261541Srgrimes */ 4271541Srgrimes if (start_lvl == 0) { 4281541Srgrimes sbap = &ip->i_db[0]; 4291541Srgrimes soff = start_lbn; 4301541Srgrimes } else { 4311541Srgrimes idp = &start_ap[start_lvl - 1]; 4321541Srgrimes if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 4331541Srgrimes brelse(sbp); 4341541Srgrimes return (ENOSPC); 4351541Srgrimes } 43622521Sdyson sbap = (ufs_daddr_t *)sbp->b_data; 4371541Srgrimes soff = idp->in_off; 4381541Srgrimes } 4391541Srgrimes /* 4401541Srgrimes * Find the preferred location for the cluster. 4411541Srgrimes */ 4421541Srgrimes pref = ffs_blkpref(ip, start_lbn, soff, sbap); 4431541Srgrimes /* 4441541Srgrimes * If the block range spans two block maps, get the second map. 4451541Srgrimes */ 4461541Srgrimes if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 4471541Srgrimes ssize = len; 4481541Srgrimes } else { 4491541Srgrimes#ifdef DIAGNOSTIC 4501541Srgrimes if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 4511541Srgrimes panic("ffs_reallocblk: start == end"); 4521541Srgrimes#endif 4531541Srgrimes ssize = len - (idp->in_off + 1); 4541541Srgrimes if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 4551541Srgrimes goto fail; 45622521Sdyson ebap = (ufs_daddr_t *)ebp->b_data; 4571541Srgrimes } 4581541Srgrimes /* 4591541Srgrimes * Search the block map looking for an allocation of the desired size. 4601541Srgrimes */ 46122521Sdyson if ((newblk = (ufs_daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref, 46212590Sbde len, ffs_clusteralloc)) == 0) 4631541Srgrimes goto fail; 4641541Srgrimes /* 4651541Srgrimes * We have found a new contiguous block. 4661541Srgrimes * 4671541Srgrimes * First we have to replace the old block pointers with the new 4681541Srgrimes * block pointers in the inode and indirect blocks associated 4691541Srgrimes * with the file. 4701541Srgrimes */ 47122521Sdyson#ifdef DEBUG 47222521Sdyson if (prtrealloc) 47322521Sdyson printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number, 47422521Sdyson start_lbn, end_lbn); 47522521Sdyson#endif 4761541Srgrimes blkno = newblk; 4771541Srgrimes for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 47834266Sjulian if (i == ssize) { 4791541Srgrimes bap = ebap; 48034266Sjulian soff = -i; 48134266Sjulian } 4821541Srgrimes#ifdef DIAGNOSTIC 48322521Sdyson if (!ffs_checkblk(ip, 48422521Sdyson dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 48522521Sdyson panic("ffs_reallocblks: unallocated block 2"); 48622521Sdyson if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 4871541Srgrimes panic("ffs_reallocblks: alloc mismatch"); 4881541Srgrimes#endif 48922521Sdyson#ifdef DEBUG 49022521Sdyson if (prtrealloc) 49122521Sdyson printf(" %d,", *bap); 49222521Sdyson#endif 49334266Sjulian if (DOINGSOFTDEP(vp)) { 49434266Sjulian if (sbap == &ip->i_db[0] && i < ssize) 49534266Sjulian softdep_setup_allocdirect(ip, start_lbn + i, 49634266Sjulian blkno, *bap, fs->fs_bsize, fs->fs_bsize, 49734266Sjulian buflist->bs_children[i]); 49834266Sjulian else 49934266Sjulian softdep_setup_allocindir_page(ip, start_lbn + i, 50034266Sjulian i < ssize ? sbp : ebp, soff + i, blkno, 50134266Sjulian *bap, buflist->bs_children[i]); 50234266Sjulian } 5031541Srgrimes *bap++ = blkno; 5041541Srgrimes } 5051541Srgrimes /* 5061541Srgrimes * Next we must write out the modified inode and indirect blocks. 5071541Srgrimes * For strict correctness, the writes should be synchronous since 5081541Srgrimes * the old block values may have been written to disk. In practise 5098876Srgrimes * they are almost never written, but if we are concerned about 5101541Srgrimes * strict correctness, the `doasyncfree' flag should be set to zero. 5111541Srgrimes * 5121541Srgrimes * The test on `doasyncfree' should be changed to test a flag 5131541Srgrimes * that shows whether the associated buffers and inodes have 5141541Srgrimes * been written. The flag should be set when the cluster is 5151541Srgrimes * started and cleared whenever the buffer or inode is flushed. 5161541Srgrimes * We can then check below to see if it is set, and do the 5171541Srgrimes * synchronous write only when it has been cleared. 5181541Srgrimes */ 5191541Srgrimes if (sbap != &ip->i_db[0]) { 5201541Srgrimes if (doasyncfree) 5211541Srgrimes bdwrite(sbp); 5221541Srgrimes else 5231541Srgrimes bwrite(sbp); 5241541Srgrimes } else { 5251541Srgrimes ip->i_flag |= IN_CHANGE | IN_UPDATE; 52642374Sbde if (!doasyncfree) 52742374Sbde UFS_UPDATE(vp, 1); 5281541Srgrimes } 52946568Speter if (ssize < len) { 5301541Srgrimes if (doasyncfree) 5311541Srgrimes bdwrite(ebp); 5321541Srgrimes else 5331541Srgrimes bwrite(ebp); 53446568Speter } 5351541Srgrimes /* 5361541Srgrimes * Last, free the old blocks and assign the new blocks to the buffers. 5371541Srgrimes */ 53822521Sdyson#ifdef DEBUG 53922521Sdyson if (prtrealloc) 54022521Sdyson printf("\n\tnew:"); 54122521Sdyson#endif 5421541Srgrimes for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 54334266Sjulian if (!DOINGSOFTDEP(vp)) 54490098Smckusick ffs_blkfree(fs, ip->i_devvp, 54534266Sjulian dbtofsb(fs, buflist->bs_children[i]->b_blkno), 54690098Smckusick fs->fs_bsize, ip->i_number); 5471541Srgrimes buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 54850305Ssheldonh#ifdef DIAGNOSTIC 54922521Sdyson if (!ffs_checkblk(ip, 55022521Sdyson dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 55122521Sdyson panic("ffs_reallocblks: unallocated block 3"); 55250305Ssheldonh#endif 55350305Ssheldonh#ifdef DEBUG 55422521Sdyson if (prtrealloc) 55522521Sdyson printf(" %d,", blkno); 55622521Sdyson#endif 5571541Srgrimes } 55822521Sdyson#ifdef DEBUG 55922521Sdyson if (prtrealloc) { 56022521Sdyson prtrealloc--; 56122521Sdyson printf("\n"); 56222521Sdyson } 56322521Sdyson#endif 5641541Srgrimes return (0); 5651541Srgrimes 5661541Srgrimesfail: 5671541Srgrimes if (ssize < len) 5681541Srgrimes brelse(ebp); 5691541Srgrimes if (sbap != &ip->i_db[0]) 5701541Srgrimes brelse(sbp); 5711541Srgrimes return (ENOSPC); 5721541Srgrimes} 5731541Srgrimes 5741541Srgrimes/* 57596755Strhodes * Allocate an inode in the filesystem. 5768876Srgrimes * 5771541Srgrimes * If allocating a directory, use ffs_dirpref to select the inode. 5781541Srgrimes * If allocating in a directory, the following hierarchy is followed: 5791541Srgrimes * 1) allocate the preferred inode. 5801541Srgrimes * 2) allocate an inode in the same cylinder group. 5811541Srgrimes * 3) quadradically rehash into other cylinder groups, until an 5821541Srgrimes * available inode is located. 5831541Srgrimes * If no inode preference is given the following heirarchy is used 5841541Srgrimes * to allocate an inode: 5851541Srgrimes * 1) allocate an inode in cylinder group 0. 5861541Srgrimes * 2) quadradically rehash into other cylinder groups, until an 5871541Srgrimes * available inode is located. 5881541Srgrimes */ 5891549Srgrimesint 59030474Sphkffs_valloc(pvp, mode, cred, vpp) 59130474Sphk struct vnode *pvp; 59230474Sphk int mode; 59330474Sphk struct ucred *cred; 59430474Sphk struct vnode **vpp; 5951541Srgrimes{ 59696506Sphk struct inode *pip; 59796506Sphk struct fs *fs; 59896506Sphk struct inode *ip; 5991541Srgrimes ino_t ino, ipref; 6001541Srgrimes int cg, error; 6018876Srgrimes 60230474Sphk *vpp = NULL; 6031541Srgrimes pip = VTOI(pvp); 6041541Srgrimes fs = pip->i_fs; 6051541Srgrimes if (fs->fs_cstotal.cs_nifree == 0) 6061541Srgrimes goto noinodes; 6071541Srgrimes 6081541Srgrimes if ((mode & IFMT) == IFDIR) 60975377Smckusick ipref = ffs_dirpref(pip); 6101541Srgrimes else 6111541Srgrimes ipref = pip->i_number; 6121541Srgrimes if (ipref >= fs->fs_ncg * fs->fs_ipg) 6131541Srgrimes ipref = 0; 6141541Srgrimes cg = ino_to_cg(fs, ipref); 61575377Smckusick /* 61675377Smckusick * Track number of dirs created one after another 61775377Smckusick * in a same cg without intervening by files. 61875377Smckusick */ 61975377Smckusick if ((mode & IFMT) == IFDIR) { 62075377Smckusick if (fs->fs_contigdirs[cg] < 255) 62175377Smckusick fs->fs_contigdirs[cg]++; 62275377Smckusick } else { 62375377Smckusick if (fs->fs_contigdirs[cg] > 0) 62475377Smckusick fs->fs_contigdirs[cg]--; 62575377Smckusick } 62612861Speter ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, 62712861Speter (allocfcn_t *)ffs_nodealloccg); 6281541Srgrimes if (ino == 0) 6291541Srgrimes goto noinodes; 63092462Smckusick error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp); 6311541Srgrimes if (error) { 63230474Sphk UFS_VFREE(pvp, ino, mode); 6331541Srgrimes return (error); 6341541Srgrimes } 63530474Sphk ip = VTOI(*vpp); 6361541Srgrimes if (ip->i_mode) { 63737555Sbde printf("mode = 0%o, inum = %lu, fs = %s\n", 63837555Sbde ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); 6391541Srgrimes panic("ffs_valloc: dup alloc"); 6401541Srgrimes } 64174548Smckusick if (ip->i_blocks && (fs->fs_flags & FS_UNCLEAN) == 0) { /* XXX */ 64237555Sbde printf("free inode %s/%lu had %ld blocks\n", 64337555Sbde fs->fs_fsmnt, (u_long)ino, (long)ip->i_blocks); 6441541Srgrimes ip->i_blocks = 0; 6451541Srgrimes } 6461541Srgrimes ip->i_flags = 0; 6471541Srgrimes /* 6481541Srgrimes * Set up a new generation number for this inode. 6491541Srgrimes */ 65031484Sbde if (ip->i_gen == 0 || ++ip->i_gen == 0) 65124149Sguido ip->i_gen = random() / 2 + 1; 6521541Srgrimes return (0); 6531541Srgrimesnoinodes: 65490098Smckusick ffs_fserr(fs, pip->i_number, "out of inodes"); 6551541Srgrimes uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 6561541Srgrimes return (ENOSPC); 6571541Srgrimes} 6581541Srgrimes 6591541Srgrimes/* 66075377Smckusick * Find a cylinder group to place a directory. 6611541Srgrimes * 66275377Smckusick * The policy implemented by this algorithm is to allocate a 66375377Smckusick * directory inode in the same cylinder group as its parent 66475377Smckusick * directory, but also to reserve space for its files inodes 66575377Smckusick * and data. Restrict the number of directories which may be 66675377Smckusick * allocated one after another in the same cylinder group 66775377Smckusick * without intervening allocation of files. 66875377Smckusick * 66975377Smckusick * If we allocate a first level directory then force allocation 67075377Smckusick * in another cylinder group. 6711541Srgrimes */ 6721541Srgrimesstatic ino_t 67375377Smckusickffs_dirpref(pip) 67475377Smckusick struct inode *pip; 67575377Smckusick{ 67696506Sphk struct fs *fs; 67775377Smckusick int cg, prefcg, dirsize, cgsize; 67875377Smckusick int avgifree, avgbfree, avgndir, curdirsize; 67975377Smckusick int minifree, minbfree, maxndir; 68075377Smckusick int mincg, minndir; 68175377Smckusick int maxcontigdirs; 6821541Srgrimes 68375377Smckusick fs = pip->i_fs; 68475377Smckusick 6851541Srgrimes avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 68675377Smckusick avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 68775377Smckusick avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 68875377Smckusick 68975377Smckusick /* 69075377Smckusick * Force allocation in another cg if creating a first level dir. 69175377Smckusick */ 69275377Smckusick if (ITOV(pip)->v_flag & VROOT) { 69375377Smckusick prefcg = arc4random() % fs->fs_ncg; 69475377Smckusick mincg = prefcg; 69575377Smckusick minndir = fs->fs_ipg; 69675377Smckusick for (cg = prefcg; cg < fs->fs_ncg; cg++) 69775377Smckusick if (fs->fs_cs(fs, cg).cs_ndir < minndir && 69875377Smckusick fs->fs_cs(fs, cg).cs_nifree >= avgifree && 69975377Smckusick fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 70075377Smckusick mincg = cg; 70175377Smckusick minndir = fs->fs_cs(fs, cg).cs_ndir; 70275377Smckusick } 70375377Smckusick for (cg = 0; cg < prefcg; cg++) 70475377Smckusick if (fs->fs_cs(fs, cg).cs_ndir < minndir && 70575377Smckusick fs->fs_cs(fs, cg).cs_nifree >= avgifree && 70675377Smckusick fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 70775377Smckusick mincg = cg; 70875377Smckusick minndir = fs->fs_cs(fs, cg).cs_ndir; 70975377Smckusick } 71075377Smckusick return ((ino_t)(fs->fs_ipg * mincg)); 71175377Smckusick } 71275377Smckusick 71375377Smckusick /* 71475377Smckusick * Count various limits which used for 71575377Smckusick * optimal allocation of a directory inode. 71675377Smckusick */ 71775377Smckusick maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 71875377Smckusick minifree = avgifree - fs->fs_ipg / 4; 71975377Smckusick if (minifree < 0) 72075377Smckusick minifree = 0; 72175377Smckusick minbfree = avgbfree - fs->fs_fpg / fs->fs_frag / 4; 72275377Smckusick if (minbfree < 0) 72375377Smckusick minbfree = 0; 72475377Smckusick cgsize = fs->fs_fsize * fs->fs_fpg; 72575377Smckusick dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; 72675377Smckusick curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; 72775377Smckusick if (dirsize < curdirsize) 72875377Smckusick dirsize = curdirsize; 72975377Smckusick maxcontigdirs = min(cgsize / dirsize, 255); 73075377Smckusick if (fs->fs_avgfpdir > 0) 73175377Smckusick maxcontigdirs = min(maxcontigdirs, 73275377Smckusick fs->fs_ipg / fs->fs_avgfpdir); 73375377Smckusick if (maxcontigdirs == 0) 73475377Smckusick maxcontigdirs = 1; 73575377Smckusick 73675377Smckusick /* 73775377Smckusick * Limit number of dirs in one cg and reserve space for 73875377Smckusick * regular files, but only if we have no deficit in 73975377Smckusick * inodes or space. 74075377Smckusick */ 74175377Smckusick prefcg = ino_to_cg(fs, pip->i_number); 74275377Smckusick for (cg = prefcg; cg < fs->fs_ncg; cg++) 74375377Smckusick if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 74475377Smckusick fs->fs_cs(fs, cg).cs_nifree >= minifree && 74575377Smckusick fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 74675377Smckusick if (fs->fs_contigdirs[cg] < maxcontigdirs) 74775377Smckusick return ((ino_t)(fs->fs_ipg * cg)); 7481541Srgrimes } 74975377Smckusick for (cg = 0; cg < prefcg; cg++) 75075377Smckusick if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 75175377Smckusick fs->fs_cs(fs, cg).cs_nifree >= minifree && 75275377Smckusick fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 75375377Smckusick if (fs->fs_contigdirs[cg] < maxcontigdirs) 75475377Smckusick return ((ino_t)(fs->fs_ipg * cg)); 75575377Smckusick } 75675377Smckusick /* 75775377Smckusick * This is a backstop when we have deficit in space. 75875377Smckusick */ 75975377Smckusick for (cg = prefcg; cg < fs->fs_ncg; cg++) 76075377Smckusick if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 76175377Smckusick return ((ino_t)(fs->fs_ipg * cg)); 76275377Smckusick for (cg = 0; cg < prefcg; cg++) 76375377Smckusick if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 76475377Smckusick break; 76575377Smckusick return ((ino_t)(fs->fs_ipg * cg)); 7661541Srgrimes} 7671541Srgrimes 7681541Srgrimes/* 7691541Srgrimes * Select the desired position for the next block in a file. The file is 7701541Srgrimes * logically divided into sections. The first section is composed of the 7711541Srgrimes * direct blocks. Each additional section contains fs_maxbpg blocks. 7728876Srgrimes * 7731541Srgrimes * If no blocks have been allocated in the first section, the policy is to 7741541Srgrimes * request a block in the same cylinder group as the inode that describes 7751541Srgrimes * the file. If no blocks have been allocated in any other section, the 7761541Srgrimes * policy is to place the section in a cylinder group with a greater than 7771541Srgrimes * average number of free blocks. An appropriate cylinder group is found 7781541Srgrimes * by using a rotor that sweeps the cylinder groups. When a new group of 7791541Srgrimes * blocks is needed, the sweep begins in the cylinder group following the 7801541Srgrimes * cylinder group from which the previous allocation was made. The sweep 7811541Srgrimes * continues until a cylinder group with greater than the average number 7821541Srgrimes * of free blocks is found. If the allocation is for the first block in an 7831541Srgrimes * indirect block, the information on the previous allocation is unavailable; 7841541Srgrimes * here a best guess is made based upon the logical block number being 7851541Srgrimes * allocated. 7868876Srgrimes * 7871541Srgrimes * If a section is already partially allocated, the policy is to 7881541Srgrimes * contiguously allocate fs_maxcontig blocks. The end of one of these 7891541Srgrimes * contiguous blocks and the beginning of the next is physically separated 7901541Srgrimes * so that the disk head will be in transit between them for at least 7911541Srgrimes * fs_rotdelay milliseconds. This is to allow time for the processor to 7921541Srgrimes * schedule another I/O transfer. 7931541Srgrimes */ 79422521Sdysonufs_daddr_t 7951541Srgrimesffs_blkpref(ip, lbn, indx, bap) 7961541Srgrimes struct inode *ip; 79722521Sdyson ufs_daddr_t lbn; 7981541Srgrimes int indx; 79922521Sdyson ufs_daddr_t *bap; 8001541Srgrimes{ 80196506Sphk struct fs *fs; 80296506Sphk int cg; 8031541Srgrimes int avgbfree, startcg; 80422521Sdyson ufs_daddr_t nextblk; 8051541Srgrimes 8061541Srgrimes fs = ip->i_fs; 8071541Srgrimes if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 80853996Smckusick if (lbn < NDADDR + NINDIR(fs)) { 8091541Srgrimes cg = ino_to_cg(fs, ip->i_number); 8101541Srgrimes return (fs->fs_fpg * cg + fs->fs_frag); 8111541Srgrimes } 8121541Srgrimes /* 8131541Srgrimes * Find a cylinder with greater than average number of 8141541Srgrimes * unused data blocks. 8151541Srgrimes */ 8161541Srgrimes if (indx == 0 || bap[indx - 1] == 0) 8171541Srgrimes startcg = 8181541Srgrimes ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 8191541Srgrimes else 8201541Srgrimes startcg = dtog(fs, bap[indx - 1]) + 1; 8211541Srgrimes startcg %= fs->fs_ncg; 8221541Srgrimes avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 8231541Srgrimes for (cg = startcg; cg < fs->fs_ncg; cg++) 8241541Srgrimes if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 8251541Srgrimes fs->fs_cgrotor = cg; 8261541Srgrimes return (fs->fs_fpg * cg + fs->fs_frag); 8271541Srgrimes } 8281541Srgrimes for (cg = 0; cg <= startcg; cg++) 8291541Srgrimes if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 8301541Srgrimes fs->fs_cgrotor = cg; 8311541Srgrimes return (fs->fs_fpg * cg + fs->fs_frag); 8321541Srgrimes } 83317108Sbde return (0); 8341541Srgrimes } 8351541Srgrimes /* 8361541Srgrimes * One or more previous blocks have been laid out. If less 8371541Srgrimes * than fs_maxcontig previous blocks are contiguous, the 8381541Srgrimes * next block is requested contiguously, otherwise it is 8391541Srgrimes * requested rotationally delayed by fs_rotdelay milliseconds. 8401541Srgrimes */ 8411541Srgrimes nextblk = bap[indx - 1] + fs->fs_frag; 84210632Sdg if (fs->fs_rotdelay == 0 || indx < fs->fs_maxcontig || 84310632Sdg bap[indx - fs->fs_maxcontig] + 8441541Srgrimes blkstofrags(fs, fs->fs_maxcontig) != nextblk) 8451541Srgrimes return (nextblk); 84610632Sdg /* 84710632Sdg * Here we convert ms of delay to frags as: 84810632Sdg * (frags) = (ms) * (rev/sec) * (sect/rev) / 84910632Sdg * ((sect/frag) * (ms/sec)) 85010632Sdg * then round up to the next block. 85110632Sdg */ 85210632Sdg nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / 85310632Sdg (NSPF(fs) * 1000), fs->fs_frag); 8541541Srgrimes return (nextblk); 8551541Srgrimes} 8561541Srgrimes 8571541Srgrimes/* 8581541Srgrimes * Implement the cylinder overflow algorithm. 8591541Srgrimes * 8601541Srgrimes * The policy implemented by this algorithm is: 8611541Srgrimes * 1) allocate the block in its requested cylinder group. 8621541Srgrimes * 2) quadradically rehash on the cylinder group number. 8631541Srgrimes * 3) brute force search for a free block. 8641541Srgrimes */ 8651541Srgrimes/*VARARGS5*/ 8661541Srgrimesstatic u_long 8671541Srgrimesffs_hashalloc(ip, cg, pref, size, allocator) 8681541Srgrimes struct inode *ip; 8691541Srgrimes int cg; 8701541Srgrimes long pref; 8711541Srgrimes int size; /* size for data blocks, mode for inodes */ 87212590Sbde allocfcn_t *allocator; 8731541Srgrimes{ 87496506Sphk struct fs *fs; 87512590Sbde long result; /* XXX why not same type as we return? */ 8761541Srgrimes int i, icg = cg; 8771541Srgrimes 87862976Smckusick#ifdef DIAGNOSTIC 87962976Smckusick if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 88062976Smckusick panic("ffs_hashalloc: allocation on suspended filesystem"); 88162976Smckusick#endif 8821541Srgrimes fs = ip->i_fs; 8831541Srgrimes /* 8841541Srgrimes * 1: preferred cylinder group 8851541Srgrimes */ 8861541Srgrimes result = (*allocator)(ip, cg, pref, size); 8871541Srgrimes if (result) 8881541Srgrimes return (result); 8891541Srgrimes /* 8901541Srgrimes * 2: quadratic rehash 8911541Srgrimes */ 8921541Srgrimes for (i = 1; i < fs->fs_ncg; i *= 2) { 8931541Srgrimes cg += i; 8941541Srgrimes if (cg >= fs->fs_ncg) 8951541Srgrimes cg -= fs->fs_ncg; 8961541Srgrimes result = (*allocator)(ip, cg, 0, size); 8971541Srgrimes if (result) 8981541Srgrimes return (result); 8991541Srgrimes } 9001541Srgrimes /* 9011541Srgrimes * 3: brute force search 9021541Srgrimes * Note that we start at i == 2, since 0 was checked initially, 9031541Srgrimes * and 1 is always checked in the quadratic rehash. 9041541Srgrimes */ 9051541Srgrimes cg = (icg + 2) % fs->fs_ncg; 9061541Srgrimes for (i = 2; i < fs->fs_ncg; i++) { 9071541Srgrimes result = (*allocator)(ip, cg, 0, size); 9081541Srgrimes if (result) 9091541Srgrimes return (result); 9101541Srgrimes cg++; 9111541Srgrimes if (cg == fs->fs_ncg) 9121541Srgrimes cg = 0; 9131541Srgrimes } 91412590Sbde return (0); 9151541Srgrimes} 9161541Srgrimes 9171541Srgrimes/* 9181541Srgrimes * Determine whether a fragment can be extended. 9191541Srgrimes * 9208876Srgrimes * Check to see if the necessary fragments are available, and 9211541Srgrimes * if they are, allocate them. 9221541Srgrimes */ 92322521Sdysonstatic ufs_daddr_t 9241541Srgrimesffs_fragextend(ip, cg, bprev, osize, nsize) 9251541Srgrimes struct inode *ip; 9261541Srgrimes int cg; 9271541Srgrimes long bprev; 9281541Srgrimes int osize, nsize; 9291541Srgrimes{ 93096506Sphk struct fs *fs; 93196506Sphk struct cg *cgp; 9321541Srgrimes struct buf *bp; 9331541Srgrimes long bno; 9341541Srgrimes int frags, bbase; 9351541Srgrimes int i, error; 93658087Smckusick u_int8_t *blksfree; 9371541Srgrimes 9381541Srgrimes fs = ip->i_fs; 9391541Srgrimes if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 94017108Sbde return (0); 9411541Srgrimes frags = numfrags(fs, nsize); 9421541Srgrimes bbase = fragnum(fs, bprev); 9431541Srgrimes if (bbase > fragnum(fs, (bprev + frags - 1))) { 9441541Srgrimes /* cannot extend across a block boundary */ 94517108Sbde return (0); 9461541Srgrimes } 9471541Srgrimes error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 9481541Srgrimes (int)fs->fs_cgsize, NOCRED, &bp); 9491541Srgrimes if (error) { 9501541Srgrimes brelse(bp); 95117108Sbde return (0); 9521541Srgrimes } 9531541Srgrimes cgp = (struct cg *)bp->b_data; 9541541Srgrimes if (!cg_chkmagic(cgp)) { 9551541Srgrimes brelse(bp); 95617108Sbde return (0); 9571541Srgrimes } 95855697Smckusick bp->b_xflags |= BX_BKGRDWRITE; 95934961Sphk cgp->cg_time = time_second; 9601541Srgrimes bno = dtogd(fs, bprev); 96158087Smckusick blksfree = cg_blksfree(cgp); 9621541Srgrimes for (i = numfrags(fs, osize); i < frags; i++) 96358087Smckusick if (isclr(blksfree, bno + i)) { 9641541Srgrimes brelse(bp); 96517108Sbde return (0); 9661541Srgrimes } 9671541Srgrimes /* 9681541Srgrimes * the current fragment can be extended 9691541Srgrimes * deduct the count on fragment being extended into 9701541Srgrimes * increase the count on the remaining fragment (if any) 9711541Srgrimes * allocate the extended piece 9721541Srgrimes */ 9731541Srgrimes for (i = frags; i < fs->fs_frag - bbase; i++) 97458087Smckusick if (isclr(blksfree, bno + i)) 9751541Srgrimes break; 9761541Srgrimes cgp->cg_frsum[i - numfrags(fs, osize)]--; 9771541Srgrimes if (i != frags) 9781541Srgrimes cgp->cg_frsum[i - frags]++; 9791541Srgrimes for (i = numfrags(fs, osize); i < frags; i++) { 98058087Smckusick clrbit(blksfree, bno + i); 9811541Srgrimes cgp->cg_cs.cs_nffree--; 9821541Srgrimes fs->fs_cstotal.cs_nffree--; 9831541Srgrimes fs->fs_cs(fs, cg).cs_nffree--; 9841541Srgrimes } 9851541Srgrimes fs->fs_fmod = 1; 98634266Sjulian if (DOINGSOFTDEP(ITOV(ip))) 98734266Sjulian softdep_setup_blkmapdep(bp, fs, bprev); 98887827Smckusick if (fs->fs_active != 0) 98988138Smckusick atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 9901541Srgrimes bdwrite(bp); 9911541Srgrimes return (bprev); 9921541Srgrimes} 9931541Srgrimes 9941541Srgrimes/* 9951541Srgrimes * Determine whether a block can be allocated. 9961541Srgrimes * 9971541Srgrimes * Check to see if a block of the appropriate size is available, 9981541Srgrimes * and if it is, allocate it. 9991541Srgrimes */ 100022521Sdysonstatic ufs_daddr_t 10011541Srgrimesffs_alloccg(ip, cg, bpref, size) 10021541Srgrimes struct inode *ip; 10031541Srgrimes int cg; 100422521Sdyson ufs_daddr_t bpref; 10051541Srgrimes int size; 10061541Srgrimes{ 100796506Sphk struct fs *fs; 100896506Sphk struct cg *cgp; 10091541Srgrimes struct buf *bp; 101096506Sphk int i; 101134266Sjulian ufs_daddr_t bno, blkno; 101234266Sjulian int allocsiz, error, frags; 101358087Smckusick u_int8_t *blksfree; 10141541Srgrimes 10151541Srgrimes fs = ip->i_fs; 10161541Srgrimes if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 101717108Sbde return (0); 10181541Srgrimes error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 10191541Srgrimes (int)fs->fs_cgsize, NOCRED, &bp); 10201541Srgrimes if (error) { 10211541Srgrimes brelse(bp); 102217108Sbde return (0); 10231541Srgrimes } 10241541Srgrimes cgp = (struct cg *)bp->b_data; 10251541Srgrimes if (!cg_chkmagic(cgp) || 10261541Srgrimes (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { 10271541Srgrimes brelse(bp); 102817108Sbde return (0); 10291541Srgrimes } 103055697Smckusick bp->b_xflags |= BX_BKGRDWRITE; 103134961Sphk cgp->cg_time = time_second; 10321541Srgrimes if (size == fs->fs_bsize) { 103334266Sjulian bno = ffs_alloccgblk(ip, bp, bpref); 103487827Smckusick if (fs->fs_active != 0) 103588138Smckusick atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 10361541Srgrimes bdwrite(bp); 10371541Srgrimes return (bno); 10381541Srgrimes } 10391541Srgrimes /* 10401541Srgrimes * check to see if any fragments are already available 10411541Srgrimes * allocsiz is the size which will be allocated, hacking 10421541Srgrimes * it down to a smaller size if necessary 10431541Srgrimes */ 104458087Smckusick blksfree = cg_blksfree(cgp); 10451541Srgrimes frags = numfrags(fs, size); 10461541Srgrimes for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 10471541Srgrimes if (cgp->cg_frsum[allocsiz] != 0) 10481541Srgrimes break; 10491541Srgrimes if (allocsiz == fs->fs_frag) { 10501541Srgrimes /* 10518876Srgrimes * no fragments were available, so a block will be 10521541Srgrimes * allocated, and hacked up 10531541Srgrimes */ 10541541Srgrimes if (cgp->cg_cs.cs_nbfree == 0) { 10551541Srgrimes brelse(bp); 105617108Sbde return (0); 10571541Srgrimes } 105834266Sjulian bno = ffs_alloccgblk(ip, bp, bpref); 10591541Srgrimes bpref = dtogd(fs, bno); 10601541Srgrimes for (i = frags; i < fs->fs_frag; i++) 106158087Smckusick setbit(blksfree, bpref + i); 10621541Srgrimes i = fs->fs_frag - frags; 10631541Srgrimes cgp->cg_cs.cs_nffree += i; 10641541Srgrimes fs->fs_cstotal.cs_nffree += i; 10651541Srgrimes fs->fs_cs(fs, cg).cs_nffree += i; 10661541Srgrimes fs->fs_fmod = 1; 10671541Srgrimes cgp->cg_frsum[i]++; 106887827Smckusick if (fs->fs_active != 0) 106988138Smckusick atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 10701541Srgrimes bdwrite(bp); 10711541Srgrimes return (bno); 10721541Srgrimes } 10731541Srgrimes bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 10741541Srgrimes if (bno < 0) { 10751541Srgrimes brelse(bp); 107617108Sbde return (0); 10771541Srgrimes } 10781541Srgrimes for (i = 0; i < frags; i++) 107958087Smckusick clrbit(blksfree, bno + i); 10801541Srgrimes cgp->cg_cs.cs_nffree -= frags; 10811541Srgrimes fs->fs_cstotal.cs_nffree -= frags; 10821541Srgrimes fs->fs_cs(fs, cg).cs_nffree -= frags; 10831541Srgrimes fs->fs_fmod = 1; 10841541Srgrimes cgp->cg_frsum[allocsiz]--; 10851541Srgrimes if (frags != allocsiz) 10861541Srgrimes cgp->cg_frsum[allocsiz - frags]++; 108734266Sjulian blkno = cg * fs->fs_fpg + bno; 108834266Sjulian if (DOINGSOFTDEP(ITOV(ip))) 108934266Sjulian softdep_setup_blkmapdep(bp, fs, blkno); 109087827Smckusick if (fs->fs_active != 0) 109188138Smckusick atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 10921541Srgrimes bdwrite(bp); 109334266Sjulian return ((u_long)blkno); 10941541Srgrimes} 10951541Srgrimes 10961541Srgrimes/* 10971541Srgrimes * Allocate a block in a cylinder group. 10981541Srgrimes * 10991541Srgrimes * This algorithm implements the following policy: 11001541Srgrimes * 1) allocate the requested block. 11011541Srgrimes * 2) allocate a rotationally optimal block in the same cylinder. 11021541Srgrimes * 3) allocate the next available block on the block rotor for the 11031541Srgrimes * specified cylinder group. 11041541Srgrimes * Note that this routine only allocates fs_bsize blocks; these 11051541Srgrimes * blocks may be fragmented by the routine that allocates them. 11061541Srgrimes */ 110722521Sdysonstatic ufs_daddr_t 110834266Sjulianffs_alloccgblk(ip, bp, bpref) 110934266Sjulian struct inode *ip; 111034266Sjulian struct buf *bp; 111122521Sdyson ufs_daddr_t bpref; 11121541Srgrimes{ 111334266Sjulian struct fs *fs; 111434266Sjulian struct cg *cgp; 111522521Sdyson ufs_daddr_t bno, blkno; 11161541Srgrimes int cylno, pos, delta; 11171541Srgrimes short *cylbp; 111896506Sphk int i; 111958087Smckusick u_int8_t *blksfree; 11201541Srgrimes 112134266Sjulian fs = ip->i_fs; 112234266Sjulian cgp = (struct cg *)bp->b_data; 112358087Smckusick blksfree = cg_blksfree(cgp); 11241541Srgrimes if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 11251541Srgrimes bpref = cgp->cg_rotor; 11261541Srgrimes goto norot; 11271541Srgrimes } 11281541Srgrimes bpref = blknum(fs, bpref); 11291541Srgrimes bpref = dtogd(fs, bpref); 11301541Srgrimes /* 11311541Srgrimes * if the requested block is available, use it 11321541Srgrimes */ 113358087Smckusick if (ffs_isblock(fs, blksfree, fragstoblks(fs, bpref))) { 11341541Srgrimes bno = bpref; 11351541Srgrimes goto gotit; 11361541Srgrimes } 11376769Sse if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) { 11381541Srgrimes /* 11391541Srgrimes * Block layout information is not available. 11401541Srgrimes * Leaving bpref unchanged means we take the 11418876Srgrimes * next available free block following the one 11421541Srgrimes * we just allocated. Hopefully this will at 11431541Srgrimes * least hit a track cache on drives of unknown 11441541Srgrimes * geometry (e.g. SCSI). 11451541Srgrimes */ 11461541Srgrimes goto norot; 11471541Srgrimes } 11481541Srgrimes /* 11496769Sse * check for a block available on the same cylinder 11506769Sse */ 11516769Sse cylno = cbtocylno(fs, bpref); 11526769Sse if (cg_blktot(cgp)[cylno] == 0) 11536769Sse goto norot; 11546769Sse /* 11558876Srgrimes * check the summary information to see if a block is 11561541Srgrimes * available in the requested cylinder starting at the 11571541Srgrimes * requested rotational position and proceeding around. 11581541Srgrimes */ 11591541Srgrimes cylbp = cg_blks(fs, cgp, cylno); 11601541Srgrimes pos = cbtorpos(fs, bpref); 11611541Srgrimes for (i = pos; i < fs->fs_nrpos; i++) 11621541Srgrimes if (cylbp[i] > 0) 11631541Srgrimes break; 11641541Srgrimes if (i == fs->fs_nrpos) 11651541Srgrimes for (i = 0; i < pos; i++) 11661541Srgrimes if (cylbp[i] > 0) 11671541Srgrimes break; 11681541Srgrimes if (cylbp[i] > 0) { 11691541Srgrimes /* 11701541Srgrimes * found a rotational position, now find the actual 11711541Srgrimes * block. A panic if none is actually there. 11721541Srgrimes */ 11731541Srgrimes pos = cylno % fs->fs_cpc; 11741541Srgrimes bno = (cylno - pos) * fs->fs_spc / NSPB(fs); 11751541Srgrimes if (fs_postbl(fs, pos)[i] == -1) { 11761541Srgrimes printf("pos = %d, i = %d, fs = %s\n", 11771541Srgrimes pos, i, fs->fs_fsmnt); 11781541Srgrimes panic("ffs_alloccgblk: cyl groups corrupted"); 11791541Srgrimes } 11801541Srgrimes for (i = fs_postbl(fs, pos)[i];; ) { 118158087Smckusick if (ffs_isblock(fs, blksfree, bno + i)) { 11821541Srgrimes bno = blkstofrags(fs, (bno + i)); 11831541Srgrimes goto gotit; 11841541Srgrimes } 11851541Srgrimes delta = fs_rotbl(fs)[i]; 11861541Srgrimes if (delta <= 0 || 11871541Srgrimes delta + i > fragstoblks(fs, fs->fs_fpg)) 11881541Srgrimes break; 11891541Srgrimes i += delta; 11901541Srgrimes } 11911541Srgrimes printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); 11921541Srgrimes panic("ffs_alloccgblk: can't find blk in cyl"); 11931541Srgrimes } 11941541Srgrimesnorot: 11951541Srgrimes /* 11961541Srgrimes * no blocks in the requested cylinder, so take next 11971541Srgrimes * available one in this cylinder group. 11981541Srgrimes */ 11991541Srgrimes bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 12001541Srgrimes if (bno < 0) 120117108Sbde return (0); 12021541Srgrimes cgp->cg_rotor = bno; 12031541Srgrimesgotit: 12041541Srgrimes blkno = fragstoblks(fs, bno); 120558087Smckusick ffs_clrblock(fs, blksfree, (long)blkno); 12061541Srgrimes ffs_clusteracct(fs, cgp, blkno, -1); 12071541Srgrimes cgp->cg_cs.cs_nbfree--; 12081541Srgrimes fs->fs_cstotal.cs_nbfree--; 12091541Srgrimes fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 12101541Srgrimes cylno = cbtocylno(fs, bno); 12111541Srgrimes cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; 12121541Srgrimes cg_blktot(cgp)[cylno]--; 12131541Srgrimes fs->fs_fmod = 1; 121434266Sjulian blkno = cgp->cg_cgx * fs->fs_fpg + bno; 121534266Sjulian if (DOINGSOFTDEP(ITOV(ip))) 121634266Sjulian softdep_setup_blkmapdep(bp, fs, blkno); 121734266Sjulian return (blkno); 12181541Srgrimes} 12191541Srgrimes 12201541Srgrimes/* 12211541Srgrimes * Determine whether a cluster can be allocated. 12221541Srgrimes * 12231541Srgrimes * We do not currently check for optimal rotational layout if there 12241541Srgrimes * are multiple choices in the same cylinder group. Instead we just 12251541Srgrimes * take the first one that we find following bpref. 12261541Srgrimes */ 122722521Sdysonstatic ufs_daddr_t 12281541Srgrimesffs_clusteralloc(ip, cg, bpref, len) 12291541Srgrimes struct inode *ip; 12301541Srgrimes int cg; 123122521Sdyson ufs_daddr_t bpref; 12321541Srgrimes int len; 12331541Srgrimes{ 123496506Sphk struct fs *fs; 123596506Sphk struct cg *cgp; 12361541Srgrimes struct buf *bp; 123722521Sdyson int i, got, run, bno, bit, map; 12381541Srgrimes u_char *mapp; 123922521Sdyson int32_t *lp; 124058087Smckusick u_int8_t *blksfree; 12411541Srgrimes 12421541Srgrimes fs = ip->i_fs; 124322521Sdyson if (fs->fs_maxcluster[cg] < len) 124454952Seivind return (0); 12451541Srgrimes if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, 12461541Srgrimes NOCRED, &bp)) 12471541Srgrimes goto fail; 12481541Srgrimes cgp = (struct cg *)bp->b_data; 12491541Srgrimes if (!cg_chkmagic(cgp)) 12501541Srgrimes goto fail; 125155697Smckusick bp->b_xflags |= BX_BKGRDWRITE; 12521541Srgrimes /* 12531541Srgrimes * Check to see if a cluster of the needed size (or bigger) is 12541541Srgrimes * available in this cylinder group. 12551541Srgrimes */ 125622521Sdyson lp = &cg_clustersum(cgp)[len]; 12571541Srgrimes for (i = len; i <= fs->fs_contigsumsize; i++) 125822521Sdyson if (*lp++ > 0) 12591541Srgrimes break; 126022521Sdyson if (i > fs->fs_contigsumsize) { 126122521Sdyson /* 126222521Sdyson * This is the first time looking for a cluster in this 126322521Sdyson * cylinder group. Update the cluster summary information 126422521Sdyson * to reflect the true maximum sized cluster so that 126522521Sdyson * future cluster allocation requests can avoid reading 126622521Sdyson * the cylinder group map only to find no clusters. 126722521Sdyson */ 126822521Sdyson lp = &cg_clustersum(cgp)[len - 1]; 126922521Sdyson for (i = len - 1; i > 0; i--) 127022521Sdyson if (*lp-- > 0) 127122521Sdyson break; 127222521Sdyson fs->fs_maxcluster[cg] = i; 12731541Srgrimes goto fail; 127422521Sdyson } 12751541Srgrimes /* 12761541Srgrimes * Search the cluster map to find a big enough cluster. 12771541Srgrimes * We take the first one that we find, even if it is larger 12781541Srgrimes * than we need as we prefer to get one close to the previous 12791541Srgrimes * block allocation. We do not search before the current 12801541Srgrimes * preference point as we do not want to allocate a block 12811541Srgrimes * that is allocated before the previous one (as we will 12821541Srgrimes * then have to wait for another pass of the elevator 12831541Srgrimes * algorithm before it will be read). We prefer to fail and 12841541Srgrimes * be recalled to try an allocation in the next cylinder group. 12851541Srgrimes */ 12861541Srgrimes if (dtog(fs, bpref) != cg) 12871541Srgrimes bpref = 0; 12881541Srgrimes else 12891541Srgrimes bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 12901541Srgrimes mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 12911541Srgrimes map = *mapp++; 12921541Srgrimes bit = 1 << (bpref % NBBY); 129322521Sdyson for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 12941541Srgrimes if ((map & bit) == 0) { 12951541Srgrimes run = 0; 12961541Srgrimes } else { 12971541Srgrimes run++; 12981541Srgrimes if (run == len) 12991541Srgrimes break; 13001541Srgrimes } 130122521Sdyson if ((got & (NBBY - 1)) != (NBBY - 1)) { 13021541Srgrimes bit <<= 1; 13031541Srgrimes } else { 13041541Srgrimes map = *mapp++; 13051541Srgrimes bit = 1; 13061541Srgrimes } 13071541Srgrimes } 130827890Sphk if (got >= cgp->cg_nclusterblks) 13091541Srgrimes goto fail; 13101541Srgrimes /* 13111541Srgrimes * Allocate the cluster that we have found. 13121541Srgrimes */ 131358087Smckusick blksfree = cg_blksfree(cgp); 131422521Sdyson for (i = 1; i <= len; i++) 131558087Smckusick if (!ffs_isblock(fs, blksfree, got - run + i)) 131622521Sdyson panic("ffs_clusteralloc: map mismatch"); 131722521Sdyson bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1); 131822521Sdyson if (dtog(fs, bno) != cg) 131922521Sdyson panic("ffs_clusteralloc: allocated out of group"); 13201541Srgrimes len = blkstofrags(fs, len); 13211541Srgrimes for (i = 0; i < len; i += fs->fs_frag) 132234266Sjulian if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i) 13231541Srgrimes panic("ffs_clusteralloc: lost block"); 132487827Smckusick if (fs->fs_active != 0) 132588138Smckusick atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 13269980Sdg bdwrite(bp); 13271541Srgrimes return (bno); 13281541Srgrimes 13291541Srgrimesfail: 13301541Srgrimes brelse(bp); 13311541Srgrimes return (0); 13321541Srgrimes} 13331541Srgrimes 13341541Srgrimes/* 13351541Srgrimes * Determine whether an inode can be allocated. 13361541Srgrimes * 13371541Srgrimes * Check to see if an inode is available, and if it is, 13381541Srgrimes * allocate it using the following policy: 13391541Srgrimes * 1) allocate the requested inode. 13401541Srgrimes * 2) allocate the next available inode after the requested 13411541Srgrimes * inode in the specified cylinder group. 13421541Srgrimes */ 13431541Srgrimesstatic ino_t 13441541Srgrimesffs_nodealloccg(ip, cg, ipref, mode) 13451541Srgrimes struct inode *ip; 13461541Srgrimes int cg; 134722521Sdyson ufs_daddr_t ipref; 13481541Srgrimes int mode; 13491541Srgrimes{ 135096506Sphk struct fs *fs; 135196506Sphk struct cg *cgp; 13521541Srgrimes struct buf *bp; 135358087Smckusick u_int8_t *inosused; 13541541Srgrimes int error, start, len, loc, map, i; 13551541Srgrimes 13561541Srgrimes fs = ip->i_fs; 13571541Srgrimes if (fs->fs_cs(fs, cg).cs_nifree == 0) 135817108Sbde return (0); 13591541Srgrimes error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 13601541Srgrimes (int)fs->fs_cgsize, NOCRED, &bp); 13611541Srgrimes if (error) { 13621541Srgrimes brelse(bp); 136317108Sbde return (0); 13641541Srgrimes } 13651541Srgrimes cgp = (struct cg *)bp->b_data; 13661541Srgrimes if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 13671541Srgrimes brelse(bp); 136817108Sbde return (0); 13691541Srgrimes } 137055697Smckusick bp->b_xflags |= BX_BKGRDWRITE; 137134961Sphk cgp->cg_time = time_second; 137258087Smckusick inosused = cg_inosused(cgp); 13731541Srgrimes if (ipref) { 13741541Srgrimes ipref %= fs->fs_ipg; 137558087Smckusick if (isclr(inosused, ipref)) 13761541Srgrimes goto gotit; 13771541Srgrimes } 13781541Srgrimes start = cgp->cg_irotor / NBBY; 13791541Srgrimes len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 138058087Smckusick loc = skpc(0xff, len, &inosused[start]); 13811541Srgrimes if (loc == 0) { 13821541Srgrimes len = start + 1; 13831541Srgrimes start = 0; 138458087Smckusick loc = skpc(0xff, len, &inosused[0]); 13851541Srgrimes if (loc == 0) { 13866357Sphk printf("cg = %d, irotor = %ld, fs = %s\n", 138737555Sbde cg, (long)cgp->cg_irotor, fs->fs_fsmnt); 13881541Srgrimes panic("ffs_nodealloccg: map corrupted"); 13891541Srgrimes /* NOTREACHED */ 13901541Srgrimes } 13911541Srgrimes } 13921541Srgrimes i = start + len - loc; 139358087Smckusick map = inosused[i]; 13941541Srgrimes ipref = i * NBBY; 13951541Srgrimes for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 13961541Srgrimes if ((map & i) == 0) { 13971541Srgrimes cgp->cg_irotor = ipref; 13981541Srgrimes goto gotit; 13991541Srgrimes } 14001541Srgrimes } 14011541Srgrimes printf("fs = %s\n", fs->fs_fsmnt); 14021541Srgrimes panic("ffs_nodealloccg: block not in map"); 14031541Srgrimes /* NOTREACHED */ 14041541Srgrimesgotit: 140534266Sjulian if (DOINGSOFTDEP(ITOV(ip))) 140634266Sjulian softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); 140758087Smckusick setbit(inosused, ipref); 14081541Srgrimes cgp->cg_cs.cs_nifree--; 14091541Srgrimes fs->fs_cstotal.cs_nifree--; 14101541Srgrimes fs->fs_cs(fs, cg).cs_nifree--; 14111541Srgrimes fs->fs_fmod = 1; 14121541Srgrimes if ((mode & IFMT) == IFDIR) { 14131541Srgrimes cgp->cg_cs.cs_ndir++; 14141541Srgrimes fs->fs_cstotal.cs_ndir++; 14151541Srgrimes fs->fs_cs(fs, cg).cs_ndir++; 14161541Srgrimes } 141790366Smckusick if (fs->fs_active != 0) 141890366Smckusick atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 14191541Srgrimes bdwrite(bp); 14201541Srgrimes return (cg * fs->fs_ipg + ipref); 14211541Srgrimes} 14221541Srgrimes 14231541Srgrimes/* 14241541Srgrimes * Free a block or fragment. 14251541Srgrimes * 14261541Srgrimes * The specified block or fragment is placed back in the 14278876Srgrimes * free map. If a fragment is deallocated, a possible 14281541Srgrimes * block reassembly is checked. 14291541Srgrimes */ 14301549Srgrimesvoid 143190098Smckusickffs_blkfree(fs, devvp, bno, size, inum) 143290098Smckusick struct fs *fs; 143390098Smckusick struct vnode *devvp; 143422521Sdyson ufs_daddr_t bno; 14351541Srgrimes long size; 143690098Smckusick ino_t inum; 14371541Srgrimes{ 143890098Smckusick struct cg *cgp; 14391541Srgrimes struct buf *bp; 144074545Smckusick ufs_daddr_t fragno, cgbno; 14411541Srgrimes int i, error, cg, blk, frags, bbase; 144258087Smckusick u_int8_t *blksfree; 144390098Smckusick dev_t dev; 14441541Srgrimes 144590098Smckusick cg = dtog(fs, bno); 144690098Smckusick if (devvp->v_type != VCHR) { 144790098Smckusick /* devvp is a snapshot */ 144890098Smckusick dev = VTOI(devvp)->i_devvp->v_rdev; 144990098Smckusick cgbno = fragstoblks(fs, cgtod(fs, cg)); 145090098Smckusick } else { 145190098Smckusick /* devvp is a normal disk device */ 145290098Smckusick dev = devvp->v_rdev; 145390098Smckusick cgbno = fsbtodb(fs, cgtod(fs, cg)); 145490098Smckusick if ((devvp->v_flag & VCOPYONWRITE) && 145590098Smckusick ffs_snapblkfree(fs, devvp, bno, size, inum)) 145690098Smckusick return; 145790098Smckusick VOP_FREEBLKS(devvp, fsbtodb(fs, bno), size); 145890098Smckusick } 145962976Smckusick#ifdef DIAGNOSTIC 146090098Smckusick if (dev->si_mountpoint && 146190098Smckusick (dev->si_mountpoint->mnt_kern_flag & MNTK_SUSPENDED)) 146262976Smckusick panic("ffs_blkfree: deallocation on suspended filesystem"); 146334266Sjulian if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 146434266Sjulian fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 146550253Sbde printf("dev=%s, bno = %ld, bsize = %ld, size = %ld, fs = %s\n", 146690098Smckusick devtoname(dev), (long)bno, (long)fs->fs_bsize, 146790098Smckusick size, fs->fs_fsmnt); 146823560Smpp panic("ffs_blkfree: bad size"); 14691541Srgrimes } 147062976Smckusick#endif 14711541Srgrimes if ((u_int)bno >= fs->fs_size) { 147290098Smckusick printf("bad block %ld, ino %lu\n", (long)bno, (u_long)inum); 147390098Smckusick ffs_fserr(fs, inum, "bad block"); 14741541Srgrimes return; 14751541Srgrimes } 147690098Smckusick if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) { 14771541Srgrimes brelse(bp); 14781541Srgrimes return; 14791541Srgrimes } 14801541Srgrimes cgp = (struct cg *)bp->b_data; 14811541Srgrimes if (!cg_chkmagic(cgp)) { 14821541Srgrimes brelse(bp); 14831541Srgrimes return; 14841541Srgrimes } 148555697Smckusick bp->b_xflags |= BX_BKGRDWRITE; 148634961Sphk cgp->cg_time = time_second; 148774545Smckusick cgbno = dtogd(fs, bno); 148858087Smckusick blksfree = cg_blksfree(cgp); 14891541Srgrimes if (size == fs->fs_bsize) { 149074545Smckusick fragno = fragstoblks(fs, cgbno); 149174545Smckusick if (!ffs_isfreeblock(fs, blksfree, fragno)) { 149290098Smckusick if (devvp->v_type != VCHR) { 149390098Smckusick /* devvp is a snapshot */ 149490098Smckusick brelse(bp); 149590098Smckusick return; 149690098Smckusick } 149750253Sbde printf("dev = %s, block = %ld, fs = %s\n", 149890098Smckusick devtoname(dev), (long)bno, fs->fs_fsmnt); 149923560Smpp panic("ffs_blkfree: freeing free block"); 15001541Srgrimes } 150174545Smckusick ffs_setblock(fs, blksfree, fragno); 150274545Smckusick ffs_clusteracct(fs, cgp, fragno, 1); 15031541Srgrimes cgp->cg_cs.cs_nbfree++; 15041541Srgrimes fs->fs_cstotal.cs_nbfree++; 15051541Srgrimes fs->fs_cs(fs, cg).cs_nbfree++; 150674545Smckusick i = cbtocylno(fs, cgbno); 150774545Smckusick cg_blks(fs, cgp, i)[cbtorpos(fs, cgbno)]++; 15081541Srgrimes cg_blktot(cgp)[i]++; 15091541Srgrimes } else { 151074545Smckusick bbase = cgbno - fragnum(fs, cgbno); 15111541Srgrimes /* 15121541Srgrimes * decrement the counts associated with the old frags 15131541Srgrimes */ 151458087Smckusick blk = blkmap(fs, blksfree, bbase); 15151541Srgrimes ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 15161541Srgrimes /* 15171541Srgrimes * deallocate the fragment 15181541Srgrimes */ 15191541Srgrimes frags = numfrags(fs, size); 15201541Srgrimes for (i = 0; i < frags; i++) { 152174545Smckusick if (isset(blksfree, cgbno + i)) { 152250253Sbde printf("dev = %s, block = %ld, fs = %s\n", 152390098Smckusick devtoname(dev), (long)(bno + i), 152437555Sbde fs->fs_fsmnt); 152523560Smpp panic("ffs_blkfree: freeing free frag"); 15261541Srgrimes } 152774545Smckusick setbit(blksfree, cgbno + i); 15281541Srgrimes } 15291541Srgrimes cgp->cg_cs.cs_nffree += i; 15301541Srgrimes fs->fs_cstotal.cs_nffree += i; 15311541Srgrimes fs->fs_cs(fs, cg).cs_nffree += i; 15321541Srgrimes /* 15331541Srgrimes * add back in counts associated with the new frags 15341541Srgrimes */ 153558087Smckusick blk = blkmap(fs, blksfree, bbase); 15361541Srgrimes ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 15371541Srgrimes /* 15381541Srgrimes * if a complete block has been reassembled, account for it 15391541Srgrimes */ 154074545Smckusick fragno = fragstoblks(fs, bbase); 154174545Smckusick if (ffs_isblock(fs, blksfree, fragno)) { 15421541Srgrimes cgp->cg_cs.cs_nffree -= fs->fs_frag; 15431541Srgrimes fs->fs_cstotal.cs_nffree -= fs->fs_frag; 15441541Srgrimes fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 154574545Smckusick ffs_clusteracct(fs, cgp, fragno, 1); 15461541Srgrimes cgp->cg_cs.cs_nbfree++; 15471541Srgrimes fs->fs_cstotal.cs_nbfree++; 15481541Srgrimes fs->fs_cs(fs, cg).cs_nbfree++; 15491541Srgrimes i = cbtocylno(fs, bbase); 15501541Srgrimes cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; 15511541Srgrimes cg_blktot(cgp)[i]++; 15521541Srgrimes } 15531541Srgrimes } 15541541Srgrimes fs->fs_fmod = 1; 155587827Smckusick if (fs->fs_active != 0) 155688138Smckusick atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 15571541Srgrimes bdwrite(bp); 15581541Srgrimes} 15591541Srgrimes 156022521Sdyson#ifdef DIAGNOSTIC 15611541Srgrimes/* 156222521Sdyson * Verify allocation of a block or fragment. Returns true if block or 156322521Sdyson * fragment is allocated, false if it is free. 156422521Sdyson */ 156531352Sbdestatic int 156622521Sdysonffs_checkblk(ip, bno, size) 156722521Sdyson struct inode *ip; 156822521Sdyson ufs_daddr_t bno; 156922521Sdyson long size; 157022521Sdyson{ 157122521Sdyson struct fs *fs; 157222521Sdyson struct cg *cgp; 157322521Sdyson struct buf *bp; 157422521Sdyson int i, error, frags, free; 157558087Smckusick u_int8_t *blksfree; 157622521Sdyson 157722521Sdyson fs = ip->i_fs; 157822521Sdyson if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 157937555Sbde printf("bsize = %ld, size = %ld, fs = %s\n", 158037555Sbde (long)fs->fs_bsize, size, fs->fs_fsmnt); 158122544Smpp panic("ffs_checkblk: bad size"); 158222521Sdyson } 158322521Sdyson if ((u_int)bno >= fs->fs_size) 158422544Smpp panic("ffs_checkblk: bad block %d", bno); 158522521Sdyson error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), 158622521Sdyson (int)fs->fs_cgsize, NOCRED, &bp); 158722544Smpp if (error) 158822544Smpp panic("ffs_checkblk: cg bread failed"); 158922521Sdyson cgp = (struct cg *)bp->b_data; 159022544Smpp if (!cg_chkmagic(cgp)) 159122544Smpp panic("ffs_checkblk: cg magic mismatch"); 159255697Smckusick bp->b_xflags |= BX_BKGRDWRITE; 159358087Smckusick blksfree = cg_blksfree(cgp); 159422521Sdyson bno = dtogd(fs, bno); 159522521Sdyson if (size == fs->fs_bsize) { 159658087Smckusick free = ffs_isblock(fs, blksfree, fragstoblks(fs, bno)); 159722521Sdyson } else { 159822521Sdyson frags = numfrags(fs, size); 159922521Sdyson for (free = 0, i = 0; i < frags; i++) 160058087Smckusick if (isset(blksfree, bno + i)) 160122521Sdyson free++; 160222521Sdyson if (free != 0 && free != frags) 160322544Smpp panic("ffs_checkblk: partially free fragment"); 160422521Sdyson } 160522521Sdyson brelse(bp); 160622521Sdyson return (!free); 160722521Sdyson} 160822521Sdyson#endif /* DIAGNOSTIC */ 160922521Sdyson 161022521Sdyson/* 16111541Srgrimes * Free an inode. 16121541Srgrimes */ 16131541Srgrimesint 161474548Smckusickffs_vfree(pvp, ino, mode) 161530474Sphk struct vnode *pvp; 161630474Sphk ino_t ino; 161730474Sphk int mode; 16181541Srgrimes{ 161934266Sjulian if (DOINGSOFTDEP(pvp)) { 162034266Sjulian softdep_freefile(pvp, ino, mode); 162134266Sjulian return (0); 162234266Sjulian } 162390098Smckusick return (ffs_freefile(VTOI(pvp)->i_fs, VTOI(pvp)->i_devvp, ino, mode)); 162434266Sjulian} 162534266Sjulian 162634266Sjulian/* 162734266Sjulian * Do the actual free operation. 162834266Sjulian * The specified inode is placed back in the free map. 162934266Sjulian */ 163074548Smckusickint 163190098Smckusickffs_freefile(fs, devvp, ino, mode) 163290098Smckusick struct fs *fs; 163390098Smckusick struct vnode *devvp; 163434266Sjulian ino_t ino; 163534266Sjulian int mode; 163634266Sjulian{ 163790098Smckusick struct cg *cgp; 16381541Srgrimes struct buf *bp; 163990098Smckusick int error, cgbno, cg; 164058087Smckusick u_int8_t *inosused; 164190098Smckusick dev_t dev; 16421541Srgrimes 164390098Smckusick cg = ino_to_cg(fs, ino); 164490098Smckusick if (devvp->v_type != VCHR) { 164590098Smckusick /* devvp is a snapshot */ 164690098Smckusick dev = VTOI(devvp)->i_devvp->v_rdev; 164790098Smckusick cgbno = fragstoblks(fs, cgtod(fs, cg)); 164890098Smckusick } else { 164990098Smckusick /* devvp is a normal disk device */ 165090098Smckusick dev = devvp->v_rdev; 165190098Smckusick cgbno = fsbtodb(fs, cgtod(fs, cg)); 165290098Smckusick } 16531541Srgrimes if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 165490098Smckusick panic("ffs_vfree: range: dev = %s, ino = %d, fs = %s", 165590098Smckusick devtoname(dev), ino, fs->fs_fsmnt); 165690098Smckusick if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) { 16571541Srgrimes brelse(bp); 165834266Sjulian return (error); 16591541Srgrimes } 16601541Srgrimes cgp = (struct cg *)bp->b_data; 16611541Srgrimes if (!cg_chkmagic(cgp)) { 16621541Srgrimes brelse(bp); 16631541Srgrimes return (0); 16641541Srgrimes } 166555697Smckusick bp->b_xflags |= BX_BKGRDWRITE; 166634961Sphk cgp->cg_time = time_second; 166758087Smckusick inosused = cg_inosused(cgp); 16681541Srgrimes ino %= fs->fs_ipg; 166958087Smckusick if (isclr(inosused, ino)) { 167090098Smckusick printf("dev = %s, ino = %lu, fs = %s\n", devtoname(dev), 167174548Smckusick (u_long)ino + cg * fs->fs_ipg, fs->fs_fsmnt); 16721541Srgrimes if (fs->fs_ronly == 0) 167323560Smpp panic("ffs_vfree: freeing free inode"); 16741541Srgrimes } 167558087Smckusick clrbit(inosused, ino); 16761541Srgrimes if (ino < cgp->cg_irotor) 16771541Srgrimes cgp->cg_irotor = ino; 16781541Srgrimes cgp->cg_cs.cs_nifree++; 16791541Srgrimes fs->fs_cstotal.cs_nifree++; 16801541Srgrimes fs->fs_cs(fs, cg).cs_nifree++; 168130474Sphk if ((mode & IFMT) == IFDIR) { 16821541Srgrimes cgp->cg_cs.cs_ndir--; 16831541Srgrimes fs->fs_cstotal.cs_ndir--; 16841541Srgrimes fs->fs_cs(fs, cg).cs_ndir--; 16851541Srgrimes } 16861541Srgrimes fs->fs_fmod = 1; 168790366Smckusick if (fs->fs_active != 0) 168890366Smckusick atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 16891541Srgrimes bdwrite(bp); 16901541Srgrimes return (0); 16911541Srgrimes} 16921541Srgrimes 16931541Srgrimes/* 16941541Srgrimes * Find a block of the specified size in the specified cylinder group. 16951541Srgrimes * 16961541Srgrimes * It is a panic if a request is made to find a block if none are 16971541Srgrimes * available. 16981541Srgrimes */ 169922521Sdysonstatic ufs_daddr_t 17001541Srgrimesffs_mapsearch(fs, cgp, bpref, allocsiz) 170196506Sphk struct fs *fs; 170296506Sphk struct cg *cgp; 170322521Sdyson ufs_daddr_t bpref; 17041541Srgrimes int allocsiz; 17051541Srgrimes{ 170622521Sdyson ufs_daddr_t bno; 17071541Srgrimes int start, len, loc, i; 17081541Srgrimes int blk, field, subfield, pos; 170958087Smckusick u_int8_t *blksfree; 17101541Srgrimes 17111541Srgrimes /* 17121541Srgrimes * find the fragment by searching through the free block 17131541Srgrimes * map for an appropriate bit pattern 17141541Srgrimes */ 17151541Srgrimes if (bpref) 17161541Srgrimes start = dtogd(fs, bpref) / NBBY; 17171541Srgrimes else 17181541Srgrimes start = cgp->cg_frotor / NBBY; 171958087Smckusick blksfree = cg_blksfree(cgp); 17201541Srgrimes len = howmany(fs->fs_fpg, NBBY) - start; 172158087Smckusick loc = scanc((u_int)len, (u_char *)&blksfree[start], 17221541Srgrimes (u_char *)fragtbl[fs->fs_frag], 17231541Srgrimes (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 17241541Srgrimes if (loc == 0) { 17251541Srgrimes len = start + 1; 17261541Srgrimes start = 0; 172758087Smckusick loc = scanc((u_int)len, (u_char *)&blksfree[0], 17281541Srgrimes (u_char *)fragtbl[fs->fs_frag], 17291541Srgrimes (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 17301541Srgrimes if (loc == 0) { 17311541Srgrimes printf("start = %d, len = %d, fs = %s\n", 17321541Srgrimes start, len, fs->fs_fsmnt); 17331541Srgrimes panic("ffs_alloccg: map corrupted"); 17341541Srgrimes /* NOTREACHED */ 17351541Srgrimes } 17361541Srgrimes } 17371541Srgrimes bno = (start + len - loc) * NBBY; 17381541Srgrimes cgp->cg_frotor = bno; 17391541Srgrimes /* 17401541Srgrimes * found the byte in the map 17411541Srgrimes * sift through the bits to find the selected frag 17421541Srgrimes */ 17431541Srgrimes for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 174458087Smckusick blk = blkmap(fs, blksfree, bno); 17451541Srgrimes blk <<= 1; 17461541Srgrimes field = around[allocsiz]; 17471541Srgrimes subfield = inside[allocsiz]; 17481541Srgrimes for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 17491541Srgrimes if ((blk & field) == subfield) 17501541Srgrimes return (bno + pos); 17511541Srgrimes field <<= 1; 17521541Srgrimes subfield <<= 1; 17531541Srgrimes } 17541541Srgrimes } 17553487Sphk printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 17561541Srgrimes panic("ffs_alloccg: block not in map"); 17571541Srgrimes return (-1); 17581541Srgrimes} 17591541Srgrimes 17601541Srgrimes/* 17611541Srgrimes * Update the cluster map because of an allocation or free. 17621541Srgrimes * 17631541Srgrimes * Cnt == 1 means free; cnt == -1 means allocating. 17641541Srgrimes */ 176576269Smckusickvoid 17661541Srgrimesffs_clusteracct(fs, cgp, blkno, cnt) 17671541Srgrimes struct fs *fs; 17681541Srgrimes struct cg *cgp; 176922521Sdyson ufs_daddr_t blkno; 17701541Srgrimes int cnt; 17711541Srgrimes{ 177222521Sdyson int32_t *sump; 177322521Sdyson int32_t *lp; 17741541Srgrimes u_char *freemapp, *mapp; 17751541Srgrimes int i, start, end, forw, back, map, bit; 17761541Srgrimes 17771541Srgrimes if (fs->fs_contigsumsize <= 0) 17781541Srgrimes return; 17791541Srgrimes freemapp = cg_clustersfree(cgp); 17801541Srgrimes sump = cg_clustersum(cgp); 17811541Srgrimes /* 17821541Srgrimes * Allocate or clear the actual block. 17831541Srgrimes */ 17841541Srgrimes if (cnt > 0) 17851541Srgrimes setbit(freemapp, blkno); 17861541Srgrimes else 17871541Srgrimes clrbit(freemapp, blkno); 17881541Srgrimes /* 17891541Srgrimes * Find the size of the cluster going forward. 17901541Srgrimes */ 17911541Srgrimes start = blkno + 1; 17921541Srgrimes end = start + fs->fs_contigsumsize; 17931541Srgrimes if (end >= cgp->cg_nclusterblks) 17941541Srgrimes end = cgp->cg_nclusterblks; 17951541Srgrimes mapp = &freemapp[start / NBBY]; 17961541Srgrimes map = *mapp++; 17971541Srgrimes bit = 1 << (start % NBBY); 17981541Srgrimes for (i = start; i < end; i++) { 17991541Srgrimes if ((map & bit) == 0) 18001541Srgrimes break; 18011541Srgrimes if ((i & (NBBY - 1)) != (NBBY - 1)) { 18021541Srgrimes bit <<= 1; 18031541Srgrimes } else { 18041541Srgrimes map = *mapp++; 18051541Srgrimes bit = 1; 18061541Srgrimes } 18071541Srgrimes } 18081541Srgrimes forw = i - start; 18091541Srgrimes /* 18101541Srgrimes * Find the size of the cluster going backward. 18111541Srgrimes */ 18121541Srgrimes start = blkno - 1; 18131541Srgrimes end = start - fs->fs_contigsumsize; 18141541Srgrimes if (end < 0) 18151541Srgrimes end = -1; 18161541Srgrimes mapp = &freemapp[start / NBBY]; 18171541Srgrimes map = *mapp--; 18181541Srgrimes bit = 1 << (start % NBBY); 18191541Srgrimes for (i = start; i > end; i--) { 18201541Srgrimes if ((map & bit) == 0) 18211541Srgrimes break; 18221541Srgrimes if ((i & (NBBY - 1)) != 0) { 18231541Srgrimes bit >>= 1; 18241541Srgrimes } else { 18251541Srgrimes map = *mapp--; 18261541Srgrimes bit = 1 << (NBBY - 1); 18271541Srgrimes } 18281541Srgrimes } 18291541Srgrimes back = start - i; 18301541Srgrimes /* 18311541Srgrimes * Account for old cluster and the possibly new forward and 18321541Srgrimes * back clusters. 18331541Srgrimes */ 18341541Srgrimes i = back + forw + 1; 18351541Srgrimes if (i > fs->fs_contigsumsize) 18361541Srgrimes i = fs->fs_contigsumsize; 18371541Srgrimes sump[i] += cnt; 18381541Srgrimes if (back > 0) 18391541Srgrimes sump[back] -= cnt; 18401541Srgrimes if (forw > 0) 18411541Srgrimes sump[forw] -= cnt; 184222521Sdyson /* 184322521Sdyson * Update cluster summary information. 184422521Sdyson */ 184522521Sdyson lp = &sump[fs->fs_contigsumsize]; 184622521Sdyson for (i = fs->fs_contigsumsize; i > 0; i--) 184722521Sdyson if (*lp-- > 0) 184822521Sdyson break; 184922521Sdyson fs->fs_maxcluster[cgp->cg_cgx] = i; 18501541Srgrimes} 18511541Srgrimes 18521541Srgrimes/* 185396755Strhodes * Fserr prints the name of a filesystem with an error diagnostic. 18548876Srgrimes * 18551541Srgrimes * The form of the error message is: 18561541Srgrimes * fs: error message 18571541Srgrimes */ 18581541Srgrimesstatic void 185990098Smckusickffs_fserr(fs, inum, cp) 18601541Srgrimes struct fs *fs; 186190098Smckusick ino_t inum; 18621541Srgrimes char *cp; 18631541Srgrimes{ 186418330Speter struct proc *p = curproc; /* XXX */ 18651541Srgrimes 186690098Smckusick log(LOG_ERR, "pid %d (%s), uid %d inumber %d on %s: %s\n", 186790098Smckusick p ? p->p_pid : -1, p ? p->p_comm : "-", 186890098Smckusick p ? p->p_ucred->cr_uid : 0, inum, fs->fs_fsmnt, cp); 18691541Srgrimes} 187074548Smckusick 187174548Smckusick/* 187274548Smckusick * This function provides the capability for the fsck program to 187374548Smckusick * update an active filesystem. Six operations are provided: 187474548Smckusick * 187574548Smckusick * adjrefcnt(inode, amt) - adjusts the reference count on the 187674548Smckusick * specified inode by the specified amount. Under normal 187774548Smckusick * operation the count should always go down. Decrementing 187874548Smckusick * the count to zero will cause the inode to be freed. 187974548Smckusick * adjblkcnt(inode, amt) - adjust the number of blocks used to 188074548Smckusick * by the specifed amount. 188174548Smckusick * freedirs(inode, count) - directory inodes [inode..inode + count - 1] 188274548Smckusick * are marked as free. Inodes should never have to be marked 188374548Smckusick * as in use. 188474548Smckusick * freefiles(inode, count) - file inodes [inode..inode + count - 1] 188574548Smckusick * are marked as free. Inodes should never have to be marked 188674548Smckusick * as in use. 188774548Smckusick * freeblks(blockno, size) - blocks [blockno..blockno + size - 1] 188874548Smckusick * are marked as free. Blocks should never have to be marked 188974548Smckusick * as in use. 189074548Smckusick * setflags(flags, set/clear) - the fs_flags field has the specified 189174548Smckusick * flags set (second parameter +1) or cleared (second parameter -1). 189274548Smckusick */ 189374548Smckusick 189492728Salfredstatic int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS); 189574548Smckusick 189674548SmckusickSYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT, 189774548Smckusick 0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count"); 189874548Smckusick 189974548SmckusickSYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR, 190074548Smckusick sysctl_ffs_fsck, "Adjust Inode Used Blocks Count"); 190174548Smckusick 190274548SmckusickSYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR, 190374548Smckusick sysctl_ffs_fsck, "Free Range of Directory Inodes"); 190474548Smckusick 190574548SmckusickSYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR, 190674548Smckusick sysctl_ffs_fsck, "Free Range of File Inodes"); 190774548Smckusick 190874548SmckusickSYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR, 190974548Smckusick sysctl_ffs_fsck, "Free Range of Blocks"); 191074548Smckusick 191174548SmckusickSYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR, 191274548Smckusick sysctl_ffs_fsck, "Change Filesystem Flags"); 191374548Smckusick 191474548Smckusick#ifdef DEBUG 191574548Smckusickstatic int fsckcmds = 0; 191674548SmckusickSYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, ""); 191774548Smckusick#endif /* DEBUG */ 191874548Smckusick 191974548Smckusickstatic int 192074548Smckusicksysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) 192174548Smckusick{ 192274548Smckusick struct fsck_cmd cmd; 192374548Smckusick struct ufsmount *ump; 192474548Smckusick struct vnode *vp; 192574548Smckusick struct inode *ip; 192674548Smckusick struct mount *mp; 192774548Smckusick struct fs *fs; 192874548Smckusick ufs_daddr_t blkno; 192974548Smckusick long blkcnt, blksize; 193074548Smckusick struct file *fp; 193174548Smckusick int filetype, error; 193274548Smckusick 193374548Smckusick if (req->newlen > sizeof cmd) 193474548Smckusick return (EBADRPC); 193574548Smckusick if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0) 193674548Smckusick return (error); 193774548Smckusick if (cmd.version != FFS_CMD_VERSION) 193874548Smckusick return (ERPCMISMATCH); 193974548Smckusick if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0) 194074548Smckusick return (error); 194175572Smckusick vn_start_write((struct vnode *)fp->f_data, &mp, V_WAIT); 194275572Smckusick if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) { 194375572Smckusick vn_finished_write(mp); 194489306Salfred fdrop(fp, curthread); 194574705Smckusick return (EINVAL); 194675572Smckusick } 194775572Smckusick if (mp->mnt_flag & MNT_RDONLY) { 194875572Smckusick vn_finished_write(mp); 194989306Salfred fdrop(fp, curthread); 195074548Smckusick return (EROFS); 195175572Smckusick } 195274548Smckusick ump = VFSTOUFS(mp); 195374548Smckusick fs = ump->um_fs; 195474548Smckusick filetype = IFREG; 195574548Smckusick 195674548Smckusick switch (oidp->oid_number) { 195774548Smckusick 195874548Smckusick case FFS_SET_FLAGS: 195974548Smckusick#ifdef DEBUG 196074548Smckusick if (fsckcmds) 196174548Smckusick printf("%s: %s flags\n", mp->mnt_stat.f_mntonname, 196274548Smckusick cmd.size > 0 ? "set" : "clear"); 196374548Smckusick#endif /* DEBUG */ 196474548Smckusick if (cmd.size > 0) 196574548Smckusick fs->fs_flags |= (long)cmd.value; 196674548Smckusick else 196774548Smckusick fs->fs_flags &= ~(long)cmd.value; 196874548Smckusick break; 196974548Smckusick 197074548Smckusick case FFS_ADJ_REFCNT: 197174548Smckusick#ifdef DEBUG 197274548Smckusick if (fsckcmds) { 197374548Smckusick printf("%s: adjust inode %d count by %ld\n", 197474548Smckusick mp->mnt_stat.f_mntonname, (ino_t)cmd.value, 197574548Smckusick cmd.size); 197674548Smckusick } 197774548Smckusick#endif /* DEBUG */ 197892462Smckusick if ((error = VFS_VGET(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 197975572Smckusick break; 198074548Smckusick ip = VTOI(vp); 198174548Smckusick ip->i_nlink += cmd.size; 198274548Smckusick ip->i_effnlink += cmd.size; 198374548Smckusick ip->i_flag |= IN_CHANGE; 198474548Smckusick if (DOINGSOFTDEP(vp)) 198574548Smckusick softdep_change_linkcnt(ip); 198674548Smckusick vput(vp); 198774548Smckusick break; 198874548Smckusick 198974548Smckusick case FFS_ADJ_BLKCNT: 199074548Smckusick#ifdef DEBUG 199174548Smckusick if (fsckcmds) { 199274548Smckusick printf("%s: adjust inode %d block count by %ld\n", 199374548Smckusick mp->mnt_stat.f_mntonname, (ino_t)cmd.value, 199474548Smckusick cmd.size); 199574548Smckusick } 199674548Smckusick#endif /* DEBUG */ 199792462Smckusick if ((error = VFS_VGET(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 199875572Smckusick break; 199974548Smckusick ip = VTOI(vp); 200074548Smckusick ip->i_blocks += cmd.size; 200174548Smckusick ip->i_flag |= IN_CHANGE; 200274548Smckusick vput(vp); 200374548Smckusick break; 200474548Smckusick 200574548Smckusick case FFS_DIR_FREE: 200674548Smckusick filetype = IFDIR; 200774548Smckusick /* fall through */ 200874548Smckusick 200974548Smckusick case FFS_FILE_FREE: 201074548Smckusick#ifdef DEBUG 201174548Smckusick if (fsckcmds) { 201274548Smckusick if (cmd.size == 1) 201374548Smckusick printf("%s: free %s inode %d\n", 201474548Smckusick mp->mnt_stat.f_mntonname, 201574548Smckusick filetype == IFDIR ? "directory" : "file", 201674548Smckusick (ino_t)cmd.value); 201774548Smckusick else 201874548Smckusick printf("%s: free %s inodes %d-%d\n", 201974548Smckusick mp->mnt_stat.f_mntonname, 202074548Smckusick filetype == IFDIR ? "directory" : "file", 202174747Sasmodai (ino_t)cmd.value, 202278256Speter (ino_t)(cmd.value + cmd.size - 1)); 202374548Smckusick } 202474548Smckusick#endif /* DEBUG */ 202574548Smckusick while (cmd.size > 0) { 202690098Smckusick if ((error = ffs_freefile(fs, ump->um_devvp, cmd.value, 202790098Smckusick filetype))) 202875572Smckusick break; 202974548Smckusick cmd.size -= 1; 203074548Smckusick cmd.value += 1; 203174548Smckusick } 203274548Smckusick break; 203374548Smckusick 203474548Smckusick case FFS_BLK_FREE: 203574548Smckusick#ifdef DEBUG 203674548Smckusick if (fsckcmds) { 203774548Smckusick if (cmd.size == 1) 203874548Smckusick printf("%s: free block %d\n", 203974548Smckusick mp->mnt_stat.f_mntonname, 204074548Smckusick (ufs_daddr_t)cmd.value); 204174548Smckusick else 204274548Smckusick printf("%s: free blocks %d-%ld\n", 204374548Smckusick mp->mnt_stat.f_mntonname, 204474548Smckusick (ufs_daddr_t)cmd.value, 204574548Smckusick (ufs_daddr_t)cmd.value + cmd.size - 1); 204674548Smckusick } 204774548Smckusick#endif /* DEBUG */ 204874548Smckusick blkno = (ufs_daddr_t)cmd.value; 204974548Smckusick blkcnt = cmd.size; 205074548Smckusick blksize = fs->fs_frag - (blkno % fs->fs_frag); 205174548Smckusick while (blkcnt > 0) { 205274548Smckusick if (blksize > blkcnt) 205374548Smckusick blksize = blkcnt; 205490098Smckusick ffs_blkfree(fs, ump->um_devvp, blkno, 205590098Smckusick blksize * fs->fs_fsize, ROOTINO); 205674548Smckusick blkno += blksize; 205774548Smckusick blkcnt -= blksize; 205874548Smckusick blksize = fs->fs_frag; 205974548Smckusick } 206074548Smckusick break; 206174548Smckusick 206274548Smckusick default: 206374548Smckusick#ifdef DEBUG 206474548Smckusick if (fsckcmds) { 206574548Smckusick printf("Invalid request %d from fsck\n", 206674548Smckusick oidp->oid_number); 206774548Smckusick } 206874548Smckusick#endif /* DEBUG */ 206975572Smckusick error = EINVAL; 207075572Smckusick break; 207174548Smckusick 207274548Smckusick } 207389306Salfred fdrop(fp, curthread); 207475572Smckusick vn_finished_write(mp); 207575572Smckusick return (error); 207674548Smckusick} 2077