1139778Simp/*-
212115Sdyson *  modified for Lites 1.1
312115Sdyson *
412115Sdyson *  Aug 1995, Godmar Back (gback@cs.utah.edu)
512115Sdyson *  University of Utah, Department of Computer Science
612115Sdyson */
7139778Simp/*-
812115Sdyson * Copyright (c) 1982, 1986, 1989, 1993
912115Sdyson *	The Regents of the University of California.  All rights reserved.
1012115Sdyson *
1112115Sdyson * Redistribution and use in source and binary forms, with or without
1212115Sdyson * modification, are permitted provided that the following conditions
1312115Sdyson * are met:
1412115Sdyson * 1. Redistributions of source code must retain the above copyright
1512115Sdyson *    notice, this list of conditions and the following disclaimer.
1612115Sdyson * 2. Redistributions in binary form must reproduce the above copyright
1712115Sdyson *    notice, this list of conditions and the following disclaimer in the
1812115Sdyson *    documentation and/or other materials provided with the distribution.
1912115Sdyson * 4. Neither the name of the University nor the names of its contributors
2012115Sdyson *    may be used to endorse or promote products derived from this software
2112115Sdyson *    without specific prior written permission.
2212115Sdyson *
2312115Sdyson * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2412115Sdyson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2512115Sdyson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2612115Sdyson * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2712115Sdyson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2812115Sdyson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2912115Sdyson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3012115Sdyson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3112115Sdyson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3212115Sdyson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3312115Sdyson * SUCH DAMAGE.
3412115Sdyson *
3593015Sbde *	@(#)ffs_alloc.c	8.8 (Berkeley) 2/21/94
3659259Srwatson * $FreeBSD$
3712115Sdyson */
3812115Sdyson
3912115Sdyson#include <sys/param.h>
4012115Sdyson#include <sys/systm.h>
4150260Sbde#include <sys/conf.h>
4212115Sdyson#include <sys/vnode.h>
4312115Sdyson#include <sys/stat.h>
4412115Sdyson#include <sys/mount.h>
45228539Spfg#include <sys/sysctl.h>
4612115Sdyson#include <sys/syslog.h>
47202283Slulf#include <sys/buf.h>
4812115Sdyson
49251809Spfg#include <fs/ext2fs/fs.h>
50202283Slulf#include <fs/ext2fs/inode.h>
51202283Slulf#include <fs/ext2fs/ext2_mount.h>
52202283Slulf#include <fs/ext2fs/ext2fs.h>
53202283Slulf#include <fs/ext2fs/ext2_extern.h>
5412115Sdyson
55202283Slulfstatic daddr_t	ext2_alloccg(struct inode *, int, daddr_t, int);
56228539Spfgstatic daddr_t	ext2_clusteralloc(struct inode *, int, daddr_t, int);
57202283Slulfstatic u_long	ext2_dirpref(struct inode *);
58202283Slulfstatic void	ext2_fserr(struct m_ext2fs *, uid_t, char *);
59202283Slulfstatic u_long	ext2_hashalloc(struct inode *, int, long, int,
60202283Slulf				daddr_t (*)(struct inode *, int, daddr_t,
61202283Slulf						int));
62202283Slulfstatic daddr_t	ext2_nodealloccg(struct inode *, int, daddr_t, int);
63202283Slulfstatic daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
64218175Sjhb
6512115Sdyson/*
66251612Spfg * Allocate a block in the filesystem.
6712115Sdyson *
68202283Slulf * A preference may be optionally specified. If a preference is given
69202283Slulf * the following hierarchy is used to allocate a block:
70202283Slulf *   1) allocate the requested block.
71202283Slulf *   2) allocate a rotationally optimal block in the same cylinder.
72202283Slulf *   3) allocate a block in the same cylinder group.
73202283Slulf *   4) quadradically rehash into other cylinder groups, until an
74202283Slulf *        available block is located.
75202283Slulf * If no block preference is given the following hierarchy is used
76202283Slulf * to allocate a block:
77202283Slulf *   1) allocate a block in the cylinder group that contains the
78202283Slulf *        inode for the file.
79202283Slulf *   2) quadradically rehash into other cylinder groups, until an
80202283Slulf *        available block is located.
8112115Sdyson */
8212115Sdysonint
83254283Spfgext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size,
84254283Spfg    struct ucred *cred, e4fs_daddr_t *bnp)
8512115Sdyson{
86202283Slulf	struct m_ext2fs *fs;
87202283Slulf	struct ext2mount *ump;
8896877Siedowse	int32_t bno;
89202283Slulf	int cg;
9012115Sdyson	*bnp = 0;
9112115Sdyson	fs = ip->i_e2fs;
92202283Slulf	ump = ip->i_ump;
93202283Slulf	mtx_assert(EXT2_MTX(ump), MA_OWNED);
94251658Spfg#ifdef INVARIANTS
95202283Slulf	if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) {
96143677Sphk		vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n",
97202283Slulf		    (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt);
9812115Sdyson		panic("ext2_alloc: bad size");
9912115Sdyson	}
10012115Sdyson	if (cred == NOCRED)
10124492Sbde		panic("ext2_alloc: missing credential");
102251658Spfg#endif /* INVARIANTS */
103202283Slulf	if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0)
10412115Sdyson		goto nospace;
10512115Sdyson	if (cred->cr_uid != 0 &&
106202283Slulf		fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount)
10712115Sdyson		goto nospace;
108202283Slulf	if (bpref >= fs->e2fs->e2fs_bcount)
10912115Sdyson		bpref = 0;
110218175Sjhb	if (bpref == 0)
111228539Spfg		cg = ino_to_cg(fs, ip->i_number);
112228539Spfg	else
113228539Spfg		cg = dtog(fs, bpref);
114228539Spfg	bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
115228539Spfg				      ext2_alloccg);
116228539Spfg	if (bno > 0) {
117218175Sjhb		/* set next_alloc fields as done in block_getblk */
118218175Sjhb		ip->i_next_alloc_block = lbn;
119218175Sjhb		ip->i_next_alloc_goal = bno;
120218175Sjhb
121228539Spfg		ip->i_blocks += btodb(fs->e2fs_bsize);
122228539Spfg		ip->i_flag |= IN_CHANGE | IN_UPDATE;
123228539Spfg		*bnp = bno;
124228539Spfg		return (0);
12512115Sdyson        }
12612115Sdysonnospace:
127202283Slulf	EXT2_UNLOCK(ump);
128251612Spfg	ext2_fserr(fs, cred->cr_uid, "filesystem full");
129251612Spfg	uprintf("\n%s: write failed, filesystem is full\n", fs->e2fs_fsmnt);
13012115Sdyson	return (ENOSPC);
13112115Sdyson}
13212115Sdyson
13312115Sdyson/*
13412115Sdyson * Reallocate a sequence of blocks into a contiguous sequence of blocks.
13512115Sdyson *
13612115Sdyson * The vnode and an array of buffer pointers for a range of sequential
13712115Sdyson * logical blocks to be made contiguous is given. The allocator attempts
13812115Sdyson * to find a range of sequential blocks starting as close as possible to
13912115Sdyson * an fs_rotdelay offset from the end of the allocation for the logical
14072640Sasmodai * block immediately preceding the current range. If successful, the
14112115Sdyson * physical block numbers in the buffer pointers and in the inode are
14212115Sdyson * changed to reflect the new allocation. If unsuccessful, the allocation
14312115Sdyson * is left unchanged. The success in doing the reallocation is returned.
14412115Sdyson * Note that the error return is not reflected back to the user. Rather
14512115Sdyson * the previous block allocation will be used.
14612115Sdyson */
14716322Sgpalmer
148227309Sedstatic SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem");
149218175Sjhb
150245817Spfgstatic int doasyncfree = 0;
151218175SjhbSYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
152218175Sjhb    "Use asychronous writes to update block pointers when freeing blocks");
153218175Sjhb
154245817Spfgstatic int doreallocblks = 0;
155218175SjhbSYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
15616322Sgpalmer
15712115Sdysonint
158246634Spfgext2_reallocblks(struct vop_reallocblks_args *ap)
15912115Sdyson{
160202283Slulf	struct m_ext2fs *fs;
16112115Sdyson	struct inode *ip;
16212115Sdyson	struct vnode *vp;
16312115Sdyson	struct buf *sbp, *ebp;
164245820Spfg	uint32_t *bap, *sbap, *ebap = 0;
165202283Slulf	struct ext2mount *ump;
16612115Sdyson	struct cluster_save *buflist;
16712115Sdyson	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
168252103Spfg	e2fs_lbn_t start_lbn, end_lbn;
169254283Spfg	int soff;
170254283Spfg	e2fs_daddr_t newblk, blkno;
17112115Sdyson	int i, len, start_lvl, end_lvl, pref, ssize;
17212115Sdyson
173228539Spfg	if (doreallocblks == 0)
174228539Spfg		  return (ENOSPC);
175228539Spfg
17612115Sdyson	vp = ap->a_vp;
17712115Sdyson	ip = VTOI(vp);
17812115Sdyson	fs = ip->i_e2fs;
179202283Slulf	ump = ip->i_ump;
180228539Spfg
181228539Spfg	if (fs->e2fs_contigsumsize <= 0)
18212115Sdyson		return (ENOSPC);
183228539Spfg
18412115Sdyson	buflist = ap->a_buflist;
18512115Sdyson	len = buflist->bs_nchildren;
18612115Sdyson	start_lbn = buflist->bs_children[0]->b_lblkno;
18712115Sdyson	end_lbn = start_lbn + len - 1;
188251658Spfg#ifdef INVARIANTS
18912115Sdyson	for (i = 1; i < len; i++)
19012115Sdyson		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
19112115Sdyson			panic("ext2_reallocblks: non-cluster");
19212115Sdyson#endif
19312115Sdyson	/*
194243641Spfg	 * If the cluster crosses the boundary for the first indirect
195243641Spfg	 * block, leave space for the indirect block. Indirect blocks
196243641Spfg	 * are initially laid out in a position after the last direct
197243641Spfg	 * block. Block reallocation would usually destroy locality by
198243641Spfg	 * moving the indirect block out of the way to make room for
199243641Spfg	 * data blocks if we didn't compensate here. We should also do
200243641Spfg	 * this for other indirect block boundaries, but it is only
201243641Spfg	 * important for the first one.
202243641Spfg	 */
203243641Spfg	if (start_lbn < NDADDR && end_lbn >= NDADDR)
204243641Spfg		return (ENOSPC);
205243641Spfg	/*
20612115Sdyson	 * If the latest allocation is in a new cylinder group, assume that
20712115Sdyson	 * the filesystem has decided to move and do not force it back to
20812115Sdyson	 * the previous cylinder group.
20912115Sdyson	 */
21012115Sdyson	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
21112115Sdyson	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
21212115Sdyson		return (ENOSPC);
213202283Slulf	if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
214202283Slulf	    ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
21512115Sdyson		return (ENOSPC);
21612115Sdyson	/*
21712115Sdyson	 * Get the starting offset and block map for the first block.
21812115Sdyson	 */
21912115Sdyson	if (start_lvl == 0) {
22012115Sdyson		sbap = &ip->i_db[0];
22112115Sdyson		soff = start_lbn;
22212115Sdyson	} else {
22312115Sdyson		idp = &start_ap[start_lvl - 1];
224202283Slulf		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) {
22512115Sdyson			brelse(sbp);
22612115Sdyson			return (ENOSPC);
22712115Sdyson		}
228251677Spfg		sbap = (u_int *)sbp->b_data;
22912115Sdyson		soff = idp->in_off;
23012115Sdyson	}
23112115Sdyson	/*
23212115Sdyson	 * If the block range spans two block maps, get the second map.
23312115Sdyson	 */
23412115Sdyson	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
23512115Sdyson		ssize = len;
23612115Sdyson	} else {
237251658Spfg#ifdef INVARIANTS
23812115Sdyson		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
239246258Spfg			panic("ext2_reallocblks: start == end");
24012115Sdyson#endif
24112115Sdyson		ssize = len - (idp->in_off + 1);
242228539Spfg		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp))
24312115Sdyson			goto fail;
244251677Spfg		ebap = (u_int *)ebp->b_data;
24512115Sdyson	}
24612115Sdyson	/*
247228539Spfg	 * Find the preferred location for the cluster.
248228539Spfg	 */
249228539Spfg	EXT2_LOCK(ump);
250228539Spfg	pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
251228539Spfg	/*
25212115Sdyson	 * Search the block map looking for an allocation of the desired size.
25312115Sdyson	 */
254254283Spfg	if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
255202283Slulf	    len, ext2_clusteralloc)) == 0){
256202283Slulf		EXT2_UNLOCK(ump);
25712115Sdyson		goto fail;
258202283Slulf	}
25912115Sdyson	/*
26012115Sdyson	 * We have found a new contiguous block.
26112115Sdyson	 *
26212115Sdyson	 * First we have to replace the old block pointers with the new
26312115Sdyson	 * block pointers in the inode and indirect blocks associated
26412115Sdyson	 * with the file.
26512115Sdyson	 */
266228539Spfg#ifdef DEBUG
267228539Spfg	printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
268228539Spfg	    (intmax_t)start_lbn, (intmax_t)end_lbn);
269228539Spfg#endif /* DEBUG */
27012115Sdyson	blkno = newblk;
271202283Slulf	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
272228539Spfg		if (i == ssize) {
27312115Sdyson			bap = ebap;
274202283Slulf			soff = -i;
275228539Spfg		}
276251658Spfg#ifdef INVARIANTS
27712115Sdyson		if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
27812115Sdyson			panic("ext2_reallocblks: alloc mismatch");
27912115Sdyson#endif
280228539Spfg#ifdef DEBUG
281228539Spfg	printf(" %d,", *bap);
282228539Spfg#endif /* DEBUG */
28312115Sdyson		*bap++ = blkno;
28412115Sdyson	}
28512115Sdyson	/*
28612115Sdyson	 * Next we must write out the modified inode and indirect blocks.
28712115Sdyson	 * For strict correctness, the writes should be synchronous since
28812115Sdyson	 * the old block values may have been written to disk. In practise
28912115Sdyson	 * they are almost never written, but if we are concerned about
29012115Sdyson	 * strict correctness, the `doasyncfree' flag should be set to zero.
29112115Sdyson	 *
29212115Sdyson	 * The test on `doasyncfree' should be changed to test a flag
29312115Sdyson	 * that shows whether the associated buffers and inodes have
29412115Sdyson	 * been written. The flag should be set when the cluster is
29512115Sdyson	 * started and cleared whenever the buffer or inode is flushed.
29612115Sdyson	 * We can then check below to see if it is set, and do the
29712115Sdyson	 * synchronous write only when it has been cleared.
29812115Sdyson	 */
29912115Sdyson	if (sbap != &ip->i_db[0]) {
30012115Sdyson		if (doasyncfree)
30112115Sdyson			bdwrite(sbp);
30212115Sdyson		else
30312115Sdyson			bwrite(sbp);
30412115Sdyson	} else {
30512115Sdyson		ip->i_flag |= IN_CHANGE | IN_UPDATE;
30642374Sbde		if (!doasyncfree)
30796749Siedowse			ext2_update(vp, 1);
30812115Sdyson	}
309202283Slulf	if (ssize < len) {
31012115Sdyson		if (doasyncfree)
31112115Sdyson			bdwrite(ebp);
31212115Sdyson		else
31312115Sdyson			bwrite(ebp);
314202283Slulf	}
31512115Sdyson	/*
31612115Sdyson	 * Last, free the old blocks and assign the new blocks to the buffers.
31712115Sdyson	 */
318228539Spfg#ifdef DEBUG
319228539Spfg	printf("\n\tnew:");
320228539Spfg#endif /* DEBUG */
321202283Slulf	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
32212115Sdyson		ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
323202283Slulf		    fs->e2fs_bsize);
32412115Sdyson		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
325228539Spfg#ifdef DEBUG
326228539Spfg		printf(" %d,", blkno);
327228539Spfg#endif /* DEBUG */
32812115Sdyson	}
329228539Spfg#ifdef DEBUG
330228539Spfg	printf("\n");
331228539Spfg#endif /* DEBUG */
33212115Sdyson	return (0);
33312115Sdyson
33412115Sdysonfail:
33512115Sdyson	if (ssize < len)
33612115Sdyson		brelse(ebp);
33712115Sdyson	if (sbap != &ip->i_db[0])
33812115Sdyson		brelse(sbp);
33912115Sdyson	return (ENOSPC);
34012115Sdyson}
34112115Sdyson
34212115Sdyson/*
343251612Spfg * Allocate an inode in the filesystem.
34412115Sdyson *
34512115Sdyson */
34612115Sdysonint
347246634Spfgext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp)
34812115Sdyson{
349232703Spfg	struct timespec ts;
35096752Siedowse	struct inode *pip;
351202283Slulf	struct m_ext2fs *fs;
35296752Siedowse	struct inode *ip;
353202283Slulf	struct ext2mount *ump;
354202283Slulf	ino_t ino, ipref;
355202283Slulf	int i, error, cg;
35612115Sdyson
35730474Sphk	*vpp = NULL;
35812115Sdyson	pip = VTOI(pvp);
35912115Sdyson	fs = pip->i_e2fs;
360202283Slulf	ump = pip->i_ump;
361202283Slulf
362202283Slulf	EXT2_LOCK(ump);
363202283Slulf	if (fs->e2fs->e2fs_ficount == 0)
36412115Sdyson		goto noinodes;
365202283Slulf	/*
366202283Slulf	 * If it is a directory then obtain a cylinder group based on
367202283Slulf	 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
368202283Slulf	 * always the next inode.
369202283Slulf	 */
370228583Spfg	if ((mode & IFMT) == IFDIR) {
371202283Slulf		cg = ext2_dirpref(pip);
372202283Slulf		if (fs->e2fs_contigdirs[cg] < 255)
373202283Slulf			fs->e2fs_contigdirs[cg]++;
374202283Slulf	} else {
375202283Slulf		cg = ino_to_cg(fs, pip->i_number);
376202283Slulf		if (fs->e2fs_contigdirs[cg] > 0)
377202283Slulf			fs->e2fs_contigdirs[cg]--;
378202283Slulf	}
379202283Slulf	ipref = cg * fs->e2fs->e2fs_ipg + 1;
380202283Slulf	ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg);
38112115Sdyson
382202283Slulf	if (ino == 0)
38312115Sdyson		goto noinodes;
38492462Smckusick	error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
38512115Sdyson	if (error) {
38696749Siedowse		ext2_vfree(pvp, ino, mode);
38712115Sdyson		return (error);
38812115Sdyson	}
38930474Sphk	ip = VTOI(*vpp);
39012115Sdyson
391232703Spfg	/*
392232703Spfg	 * The question is whether using VGET was such good idea at all:
393232703Spfg	 * Linux doesn't read the old inode in when it is allocating a
394232703Spfg	 * new one. I will set at least i_size and i_blocks to zero.
395232703Spfg	 */
39612115Sdyson	ip->i_size = 0;
39712115Sdyson	ip->i_blocks = 0;
398232703Spfg	ip->i_mode = 0;
39912115Sdyson	ip->i_flags = 0;
40012115Sdyson        /* now we want to make sure that the block pointers are zeroed out */
40139753Sbde        for (i = 0; i < NDADDR; i++)
40212115Sdyson                ip->i_db[i] = 0;
40339753Sbde        for (i = 0; i < NIADDR; i++)
40439753Sbde                ip->i_ib[i] = 0;
40512115Sdyson
40612115Sdyson	/*
40712115Sdyson	 * Set up a new generation number for this inode.
40812115Sdyson	 * XXX check if this makes sense in ext2
40912115Sdyson	 */
41031485Sbde	if (ip->i_gen == 0 || ++ip->i_gen == 0)
41131485Sbde		ip->i_gen = random() / 2 + 1;
412232703Spfg
413232703Spfg	vfs_timestamp(&ts);
414232703Spfg	ip->i_birthtime = ts.tv_sec;
415232703Spfg	ip->i_birthnsec = ts.tv_nsec;
416232703Spfg
41712115Sdyson/*
41812115Sdysonprintf("ext2_valloc: allocated inode %d\n", ino);
41912115Sdyson*/
42012115Sdyson	return (0);
42112115Sdysonnoinodes:
422202283Slulf	EXT2_UNLOCK(ump);
42330474Sphk	ext2_fserr(fs, cred->cr_uid, "out of inodes");
424202283Slulf	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
42512115Sdyson	return (ENOSPC);
42612115Sdyson}
42712115Sdyson
42812115Sdyson/*
429202283Slulf * Find a cylinder to place a directory.
430202283Slulf *
431202283Slulf * The policy implemented by this algorithm is to allocate a
432202283Slulf * directory inode in the same cylinder group as its parent
433202283Slulf * directory, but also to reserve space for its files inodes
434202283Slulf * and data. Restrict the number of directories which may be
435202283Slulf * allocated one after another in the same cylinder group
436202283Slulf * without intervening allocation of files.
437202283Slulf *
438202283Slulf * If we allocate a first level directory then force allocation
439202283Slulf * in another cylinder group.
440202283Slulf *
441202283Slulf */
442202283Slulfstatic u_long
443202283Slulfext2_dirpref(struct inode *pip)
444202283Slulf{
445202283Slulf	struct m_ext2fs *fs;
446202283Slulf        int cg, prefcg, dirsize, cgsize;
447251677Spfg	u_int avgifree, avgbfree, avgndir, curdirsize;
448251677Spfg	u_int minifree, minbfree, maxndir;
449251677Spfg	u_int mincg, minndir;
450251677Spfg	u_int maxcontigdirs;
451202283Slulf
452202283Slulf	mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
453202283Slulf	fs = pip->i_e2fs;
454202283Slulf
455202283Slulf 	avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount;
456202283Slulf	avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount;
457202283Slulf	avgndir  = fs->e2fs_total_dir / fs->e2fs_gcount;
458202283Slulf
459202283Slulf	/*
460202283Slulf	 * Force allocation in another cg if creating a first level dir.
461202283Slulf	 */
462202283Slulf	ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
463202283Slulf	if (ITOV(pip)->v_vflag & VV_ROOT) {
464202283Slulf		prefcg = arc4random() % fs->e2fs_gcount;
465202283Slulf		mincg = prefcg;
466202283Slulf		minndir = fs->e2fs_ipg;
467202283Slulf		for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
468202283Slulf			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
469202283Slulf			    fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
470202283Slulf			    fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
471202283Slulf				mincg = cg;
472202283Slulf				minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
473202283Slulf			}
474202283Slulf		for (cg = 0; cg < prefcg; cg++)
475202283Slulf			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
476202283Slulf                            fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
477202283Slulf                            fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
478202283Slulf                                mincg = cg;
479202283Slulf                                minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
480202283Slulf                        }
481202283Slulf
482202283Slulf		return (mincg);
483202283Slulf	}
484202283Slulf
485202283Slulf	/*
486202283Slulf	 * Count various limits which used for
487202283Slulf	 * optimal allocation of a directory inode.
488202283Slulf	 */
489202283Slulf	maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
490202283Slulf	minifree = avgifree - avgifree / 4;
491202283Slulf	if (minifree < 1)
492202283Slulf		minifree = 1;
493202283Slulf	minbfree = avgbfree - avgbfree / 4;
494202283Slulf	if (minbfree < 1)
495202283Slulf		minbfree = 1;
496202283Slulf	cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
497202283Slulf	dirsize = AVGDIRSIZE;
498202283Slulf	curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0;
499202283Slulf	if (dirsize < curdirsize)
500202283Slulf		dirsize = curdirsize;
501202283Slulf	if (dirsize <= 0)
502202283Slulf		maxcontigdirs = 0;		/* dirsize overflowed */
503202283Slulf	else
504202283Slulf		maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
505202283Slulf	maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
506202283Slulf	if (maxcontigdirs == 0)
507202283Slulf		maxcontigdirs = 1;
508202283Slulf
509202283Slulf	/*
510202283Slulf	 * Limit number of dirs in one cg and reserve space for
511202283Slulf	 * regular files, but only if we have no deficit in
512202283Slulf	 * inodes or space.
513202283Slulf	 */
514202283Slulf	prefcg = ino_to_cg(fs, pip->i_number);
515202283Slulf	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
516202283Slulf		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
517202283Slulf		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
518202283Slulf	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
519202283Slulf			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
520202283Slulf				return (cg);
521202283Slulf		}
522202283Slulf	for (cg = 0; cg < prefcg; cg++)
523202283Slulf		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
524202283Slulf		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
525202283Slulf	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
526202283Slulf			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
527202283Slulf				return (cg);
528202283Slulf		}
529202283Slulf	/*
530202283Slulf	 * This is a backstop when we have deficit in space.
531202283Slulf	 */
532202283Slulf	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
533202283Slulf		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
534202283Slulf			return (cg);
535202283Slulf	for (cg = 0; cg < prefcg; cg++)
536202283Slulf		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
537202283Slulf			break;
538202283Slulf	return (cg);
539202283Slulf}
540202283Slulf
541202283Slulf/*
54212115Sdyson * Select the desired position for the next block in a file.
54312115Sdyson *
54412115Sdyson * we try to mimic what Remy does in inode_getblk/block_getblk
54512115Sdyson *
54612115Sdyson * we note: blocknr == 0 means that we're about to allocate either
54712115Sdyson * a direct block or a pointer block at the first level of indirection
54812115Sdyson * (In other words, stuff that will go in i_db[] or i_ib[])
54912115Sdyson *
55012115Sdyson * blocknr != 0 means that we're allocating a block that is none
55112115Sdyson * of the above. Then, blocknr tells us the number of the block
55212115Sdyson * that will hold the pointer
55312115Sdyson */
554254283Spfge4fs_daddr_t
555254283Spfgext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap,
556254283Spfg    e2fs_daddr_t blocknr)
55712115Sdyson{
55812115Sdyson	int	tmp;
559202283Slulf	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
56012115Sdyson
56112115Sdyson	/* if the next block is actually what we thought it is,
56212115Sdyson	   then set the goal to what we thought it should be
56312115Sdyson	*/
564228583Spfg	if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
56512115Sdyson		return ip->i_next_alloc_goal;
56612115Sdyson
56712115Sdyson	/* now check whether we were provided with an array that basically
56812115Sdyson	   tells us previous blocks to which we want to stay closeby
56912115Sdyson	*/
570228583Spfg	if (bap)
57112115Sdyson                for (tmp = indx - 1; tmp >= 0; tmp--)
57212115Sdyson			if (bap[tmp])
57312115Sdyson				return bap[tmp];
57412115Sdyson
57512115Sdyson	/* else let's fall back to the blocknr, or, if there is none,
57635256Sdes	   follow the rule that a block should be allocated near its inode
57712115Sdyson	*/
57812115Sdyson	return blocknr ? blocknr :
579254283Spfg			(e2fs_daddr_t)(ip->i_block_group *
58012115Sdyson			EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) +
581202283Slulf			ip->i_e2fs->e2fs->e2fs_first_dblock;
58212115Sdyson}
58312115Sdyson
58412115Sdyson/*
585202283Slulf * Implement the cylinder overflow algorithm.
586202283Slulf *
587202283Slulf * The policy implemented by this algorithm is:
588202283Slulf *   1) allocate the block in its requested cylinder group.
589202283Slulf *   2) quadradically rehash on the cylinder group number.
590202283Slulf *   3) brute force search for a free block.
591202283Slulf */
592202283Slulfstatic u_long
593202283Slulfext2_hashalloc(struct inode *ip, int cg, long pref, int size,
594202283Slulf                daddr_t (*allocator)(struct inode *, int, daddr_t, int))
595202283Slulf{
596202283Slulf	struct m_ext2fs *fs;
597202283Slulf	ino_t result;
598202283Slulf	int i, icg = cg;
599202283Slulf
600202283Slulf	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
601202283Slulf	fs = ip->i_e2fs;
602202283Slulf	/*
603202283Slulf	 * 1: preferred cylinder group
604202283Slulf	 */
605202283Slulf	result = (*allocator)(ip, cg, pref, size);
606202283Slulf	if (result)
607202283Slulf		return (result);
608202283Slulf	/*
609202283Slulf	 * 2: quadratic rehash
610202283Slulf	 */
611202283Slulf	for (i = 1; i < fs->e2fs_gcount; i *= 2) {
612202283Slulf		cg += i;
613202283Slulf		if (cg >= fs->e2fs_gcount)
614202283Slulf			cg -= fs->e2fs_gcount;
615202283Slulf		result = (*allocator)(ip, cg, 0, size);
616202283Slulf		if (result)
617202283Slulf			return (result);
618202283Slulf	}
619202283Slulf	/*
620202283Slulf	 * 3: brute force search
621202283Slulf	 * Note that we start at i == 2, since 0 was checked initially,
622202283Slulf	 * and 1 is always checked in the quadratic rehash.
623202283Slulf	 */
624202283Slulf	cg = (icg + 2) % fs->e2fs_gcount;
625202283Slulf	for (i = 2; i < fs->e2fs_gcount; i++) {
626202283Slulf		result = (*allocator)(ip, cg, 0, size);
627202283Slulf		if (result)
628202283Slulf			return (result);
629202283Slulf		cg++;
630202283Slulf		if (cg == fs->e2fs_gcount)
631202283Slulf			cg = 0;
632202283Slulf	}
633202283Slulf	return (0);
634202283Slulf}
635202283Slulf
636202283Slulf/*
637202283Slulf * Determine whether a block can be allocated.
638202283Slulf *
639202283Slulf * Check to see if a block of the appropriate size is available,
640202283Slulf * and if it is, allocate it.
641202283Slulf */
642202283Slulfstatic daddr_t
643202283Slulfext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
644202283Slulf{
645202283Slulf	struct m_ext2fs *fs;
646202283Slulf	struct buf *bp;
647202283Slulf	struct ext2mount *ump;
648218175Sjhb	daddr_t bno, runstart, runlen;
649218175Sjhb	int bit, loc, end, error, start;
650202283Slulf	char *bbp;
651202283Slulf	/* XXX ondisk32 */
652202283Slulf	fs = ip->i_e2fs;
653202283Slulf	ump = ip->i_ump;
654202283Slulf	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
655202283Slulf		return (0);
656202283Slulf	EXT2_UNLOCK(ump);
657202283Slulf	error = bread(ip->i_devvp, fsbtodb(fs,
658202283Slulf		fs->e2fs_gd[cg].ext2bgd_b_bitmap),
659202283Slulf		(int)fs->e2fs_bsize, NOCRED, &bp);
660202283Slulf	if (error) {
661202283Slulf		brelse(bp);
662202283Slulf		EXT2_LOCK(ump);
663202283Slulf		return (0);
664202283Slulf	}
665218438Sjhb	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) {
666218438Sjhb		/*
667218438Sjhb		 * Another thread allocated the last block in this
668218438Sjhb		 * group while we were waiting for the buffer.
669218438Sjhb		 */
670218438Sjhb		brelse(bp);
671218438Sjhb		EXT2_LOCK(ump);
672218438Sjhb		return (0);
673218438Sjhb	}
674202283Slulf	bbp = (char *)bp->b_data;
675202283Slulf
676202283Slulf	if (dtog(fs, bpref) != cg)
677202283Slulf		bpref = 0;
678202283Slulf	if (bpref != 0) {
679202283Slulf		bpref = dtogd(fs, bpref);
680202283Slulf		/*
681202283Slulf		 * if the requested block is available, use it
682202283Slulf		 */
683202283Slulf		if (isclr(bbp, bpref)) {
684202283Slulf			bno = bpref;
685202283Slulf			goto gotit;
686202283Slulf		}
687202283Slulf	}
688202283Slulf	/*
689202283Slulf	 * no blocks in the requested cylinder, so take next
690202283Slulf	 * available one in this cylinder group.
691202283Slulf	 * first try to get 8 contigous blocks, then fall back to a single
692202283Slulf	 * block.
693202283Slulf	 */
694202283Slulf	if (bpref)
695202283Slulf		start = dtogd(fs, bpref) / NBBY;
696202283Slulf	else
697202283Slulf		start = 0;
698202283Slulf	end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
699218175Sjhbretry:
700218175Sjhb	runlen = 0;
701218175Sjhb	runstart = 0;
702202283Slulf	for (loc = start; loc < end; loc++) {
703218175Sjhb		if (bbp[loc] == (char)0xff) {
704218175Sjhb			runlen = 0;
705218175Sjhb			continue;
706202283Slulf		}
707218175Sjhb
708218175Sjhb		/* Start of a run, find the number of high clear bits. */
709218175Sjhb		if (runlen == 0) {
710218175Sjhb			bit = fls(bbp[loc]);
711218175Sjhb			runlen = NBBY - bit;
712218175Sjhb			runstart = loc * NBBY + bit;
713218175Sjhb		} else if (bbp[loc] == 0) {
714218175Sjhb			/* Continue a run. */
715218175Sjhb			runlen += NBBY;
716218175Sjhb		} else {
717218175Sjhb			/*
718218175Sjhb			 * Finish the current run.  If it isn't long
719218175Sjhb			 * enough, start a new one.
720218175Sjhb			 */
721218175Sjhb			bit = ffs(bbp[loc]) - 1;
722218175Sjhb			runlen += bit;
723218175Sjhb			if (runlen >= 8) {
724218175Sjhb				bno = runstart;
725218175Sjhb				goto gotit;
726218175Sjhb			}
727218175Sjhb
728218175Sjhb			/* Run was too short, start a new one. */
729218175Sjhb			bit = fls(bbp[loc]);
730218175Sjhb			runlen = NBBY - bit;
731218175Sjhb			runstart = loc * NBBY + bit;
732218175Sjhb		}
733218175Sjhb
734218175Sjhb		/* If the current run is long enough, use it. */
735218175Sjhb		if (runlen >= 8) {
736218175Sjhb			bno = runstart;
737202283Slulf			goto gotit;
738202283Slulf		}
739202283Slulf	}
740218175Sjhb	if (start != 0) {
741218175Sjhb		end = start;
742218175Sjhb		start = 0;
743218175Sjhb		goto retry;
744218175Sjhb	}
745202283Slulf
746202283Slulf	bno = ext2_mapsearch(fs, bbp, bpref);
747202283Slulf	if (bno < 0){
748202283Slulf		brelse(bp);
749202283Slulf		EXT2_LOCK(ump);
750202283Slulf		return (0);
751202283Slulf	}
752202283Slulfgotit:
753251658Spfg#ifdef INVARIANTS
754218190Sjhb	if (isset(bbp, bno)) {
755218190Sjhb		printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n",
756218190Sjhb			cg, (intmax_t)bno, fs->e2fs_fsmnt);
757202283Slulf		panic("ext2fs_alloccg: dup alloc");
758202283Slulf	}
759202283Slulf#endif
760218190Sjhb	setbit(bbp, bno);
761202283Slulf	EXT2_LOCK(ump);
762228539Spfg	ext2_clusteracct(fs, bbp, cg, bno, -1);
763202283Slulf	fs->e2fs->e2fs_fbcount--;
764202283Slulf	fs->e2fs_gd[cg].ext2bgd_nbfree--;
765202283Slulf	fs->e2fs_fmod = 1;
766202283Slulf	EXT2_UNLOCK(ump);
767202283Slulf	bdwrite(bp);
768202283Slulf	return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
769202283Slulf}
770202283Slulf
771202283Slulf/*
772228539Spfg * Determine whether a cluster can be allocated.
773228539Spfg */
774228539Spfgstatic daddr_t
775228539Spfgext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
776228539Spfg{
777228539Spfg	struct m_ext2fs *fs;
778228539Spfg	struct ext2mount *ump;
779228539Spfg	struct buf *bp;
780228539Spfg	char *bbp;
781228539Spfg	int bit, error, got, i, loc, run;
782228539Spfg	int32_t *lp;
783228539Spfg	daddr_t bno;
784228539Spfg
785228539Spfg	fs = ip->i_e2fs;
786228539Spfg	ump = ip->i_ump;
787228539Spfg
788228539Spfg	if (fs->e2fs_maxcluster[cg] < len)
789228539Spfg		return (0);
790228539Spfg
791228539Spfg	EXT2_UNLOCK(ump);
792228539Spfg	error = bread(ip->i_devvp,
793228539Spfg	    fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
794228539Spfg	    (int)fs->e2fs_bsize, NOCRED, &bp);
795228539Spfg	if (error)
796228539Spfg		goto fail_lock;
797228539Spfg
798228539Spfg	bbp = (char *)bp->b_data;
799228539Spfg	EXT2_LOCK(ump);
800228539Spfg	/*
801228539Spfg	 * Check to see if a cluster of the needed size (or bigger) is
802228539Spfg	 * available in this cylinder group.
803228539Spfg	 */
804228539Spfg	lp = &fs->e2fs_clustersum[cg].cs_sum[len];
805228539Spfg	for (i = len; i <= fs->e2fs_contigsumsize; i++)
806228539Spfg		if (*lp++ > 0)
807228539Spfg			break;
808228539Spfg	if (i > fs->e2fs_contigsumsize) {
809228539Spfg		/*
810228539Spfg		 * Update the cluster summary information to reflect
811228539Spfg		 * the true maximum-sized cluster so that future cluster
812228539Spfg		 * allocation requests can avoid reading the bitmap only
813228539Spfg		 * to find no cluster.
814228539Spfg		 */
815228539Spfg		lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1];
816228539Spfg			for (i = len - 1; i > 0; i--)
817228539Spfg				if (*lp-- > 0)
818228539Spfg					break;
819228539Spfg		fs->e2fs_maxcluster[cg] = i;
820228539Spfg		goto fail;
821228539Spfg	}
822228539Spfg	EXT2_UNLOCK(ump);
823228539Spfg
824228539Spfg	/* Search the bitmap to find a big enough cluster like in FFS. */
825228539Spfg	if (dtog(fs, bpref) != cg)
826228539Spfg		bpref = 0;
827228539Spfg	if (bpref != 0)
828228539Spfg		bpref = dtogd(fs, bpref);
829228539Spfg	loc = bpref / NBBY;
830228539Spfg	bit = 1 << (bpref % NBBY);
831228539Spfg	for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) {
832228539Spfg		if ((bbp[loc] & bit) != 0)
833228539Spfg			run = 0;
834228539Spfg		else {
835228539Spfg			run++;
836228539Spfg			if (run == len)
837228539Spfg				break;
838228539Spfg		}
839228539Spfg		if ((got & (NBBY - 1)) != (NBBY - 1))
840228539Spfg			bit <<= 1;
841228539Spfg		else {
842228539Spfg			loc++;
843228539Spfg			bit = 1;
844228539Spfg		}
845228539Spfg	}
846228539Spfg
847228539Spfg	if (got >= fs->e2fs->e2fs_fpg)
848228539Spfg		goto fail_lock;
849228539Spfg
850228539Spfg	/* Allocate the cluster that we found. */
851228539Spfg	for (i = 1; i < len; i++)
852228539Spfg		if (!isclr(bbp, got - run + i))
853228539Spfg			panic("ext2_clusteralloc: map mismatch");
854228539Spfg
855228539Spfg	bno = got - run + 1;
856228539Spfg	if (bno >= fs->e2fs->e2fs_fpg)
857228539Spfg		panic("ext2_clusteralloc: allocated out of group");
858228539Spfg
859228539Spfg	EXT2_LOCK(ump);
860228539Spfg	for (i = 0; i < len; i += fs->e2fs_fpb) {
861228539Spfg		setbit(bbp, bno + i);
862228539Spfg		ext2_clusteracct(fs, bbp, cg, bno + i, -1);
863228539Spfg		fs->e2fs->e2fs_fbcount--;
864228539Spfg		fs->e2fs_gd[cg].ext2bgd_nbfree--;
865228539Spfg	}
866228539Spfg	fs->e2fs_fmod = 1;
867228539Spfg	EXT2_UNLOCK(ump);
868228539Spfg
869228539Spfg	bdwrite(bp);
870228539Spfg	return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
871228539Spfg
872228539Spfgfail_lock:
873228539Spfg	EXT2_LOCK(ump);
874228539Spfgfail:
875228539Spfg	brelse(bp);
876228539Spfg	return (0);
877228539Spfg}
878228539Spfg
879228539Spfg/*
880202283Slulf * Determine whether an inode can be allocated.
881202283Slulf *
882202283Slulf * Check to see if an inode is available, and if it is,
883202283Slulf * allocate it using tode in the specified cylinder group.
884202283Slulf */
885202283Slulfstatic daddr_t
886202283Slulfext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
887202283Slulf{
888202283Slulf	struct m_ext2fs *fs;
889202283Slulf	struct buf *bp;
890202283Slulf	struct ext2mount *ump;
891229200Sed	int error, start, len;
892229200Sed	char *ibp, *loc;
893202283Slulf	ipref--; /* to avoid a lot of (ipref -1) */
894202283Slulf	if (ipref == -1)
895202283Slulf		ipref = 0;
896202283Slulf	fs = ip->i_e2fs;
897202283Slulf	ump = ip->i_ump;
898202283Slulf	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
899202283Slulf		return (0);
900202283Slulf	EXT2_UNLOCK(ump);
901202283Slulf	error = bread(ip->i_devvp, fsbtodb(fs,
902202283Slulf		fs->e2fs_gd[cg].ext2bgd_i_bitmap),
903202283Slulf		(int)fs->e2fs_bsize, NOCRED, &bp);
904202283Slulf	if (error) {
905202283Slulf		brelse(bp);
906202283Slulf		EXT2_LOCK(ump);
907202283Slulf		return (0);
908202283Slulf	}
909218438Sjhb	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) {
910218438Sjhb		/*
911218438Sjhb		 * Another thread allocated the last i-node in this
912218438Sjhb		 * group while we were waiting for the buffer.
913218438Sjhb		 */
914218438Sjhb		brelse(bp);
915218438Sjhb		EXT2_LOCK(ump);
916218438Sjhb		return (0);
917218438Sjhb	}
918202283Slulf	ibp = (char *)bp->b_data;
919202283Slulf	if (ipref) {
920202283Slulf		ipref %= fs->e2fs->e2fs_ipg;
921202283Slulf		if (isclr(ibp, ipref))
922202283Slulf			goto gotit;
923202283Slulf	}
924202283Slulf	start = ipref / NBBY;
925202283Slulf	len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY);
926229200Sed	loc = memcchr(&ibp[start], 0xff, len);
927229200Sed	if (loc == NULL) {
928202283Slulf		len = start + 1;
929202283Slulf		start = 0;
930229200Sed		loc = memcchr(&ibp[start], 0xff, len);
931229200Sed		if (loc == NULL) {
932202283Slulf			printf("cg = %d, ipref = %lld, fs = %s\n",
933202283Slulf				cg, (long long)ipref, fs->e2fs_fsmnt);
934202283Slulf			panic("ext2fs_nodealloccg: map corrupted");
935202283Slulf			/* NOTREACHED */
936202283Slulf		}
937202283Slulf	}
938229200Sed	ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1;
939202283Slulfgotit:
940202283Slulf	setbit(ibp, ipref);
941202283Slulf	EXT2_LOCK(ump);
942202283Slulf	fs->e2fs_gd[cg].ext2bgd_nifree--;
943202283Slulf	fs->e2fs->e2fs_ficount--;
944202283Slulf	fs->e2fs_fmod = 1;
945202283Slulf	if ((mode & IFMT) == IFDIR) {
946202283Slulf		fs->e2fs_gd[cg].ext2bgd_ndirs++;
947202283Slulf		fs->e2fs_total_dir++;
948202283Slulf	}
949202283Slulf	EXT2_UNLOCK(ump);
950202283Slulf	bdwrite(bp);
951202283Slulf	return (cg * fs->e2fs->e2fs_ipg + ipref +1);
952202283Slulf}
953202283Slulf
954202283Slulf/*
95512115Sdyson * Free a block or fragment.
95612115Sdyson *
95712115Sdyson */
95812115Sdysonvoid
959254283Spfgext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size)
96012115Sdyson{
961202283Slulf	struct m_ext2fs *fs;
962202283Slulf	struct buf *bp;
963202283Slulf	struct ext2mount *ump;
964202283Slulf	int cg, error;
965202283Slulf	char *bbp;
96612115Sdyson
96712115Sdyson	fs = ip->i_e2fs;
968202283Slulf	ump = ip->i_ump;
969202283Slulf	cg = dtog(fs, bno);
970202283Slulf	if ((u_int)bno >= fs->e2fs->e2fs_bcount) {
971202283Slulf                printf("bad block %lld, ino %llu\n", (long long)bno,
972202283Slulf                    (unsigned long long)ip->i_number);
973202283Slulf                ext2_fserr(fs, ip->i_uid, "bad block");
974202283Slulf                return;
975202283Slulf        }
976202283Slulf        error = bread(ip->i_devvp,
977202283Slulf                fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
978202283Slulf                (int)fs->e2fs_bsize, NOCRED, &bp);
979202283Slulf        if (error) {
980202283Slulf                brelse(bp);
981202283Slulf                return;
982202283Slulf        }
983202283Slulf        bbp = (char *)bp->b_data;
984202283Slulf        bno = dtogd(fs, bno);
985202283Slulf        if (isclr(bbp, bno)) {
986202283Slulf                printf("block = %lld, fs = %s\n",
987202283Slulf                     (long long)bno, fs->e2fs_fsmnt);
988246258Spfg                panic("ext2_blkfree: freeing free block");
989202283Slulf        }
990202283Slulf        clrbit(bbp, bno);
991202283Slulf	EXT2_LOCK(ump);
992228539Spfg	ext2_clusteracct(fs, bbp, cg, bno, 1);
993202283Slulf        fs->e2fs->e2fs_fbcount++;
994202283Slulf        fs->e2fs_gd[cg].ext2bgd_nbfree++;
995202283Slulf        fs->e2fs_fmod = 1;
996202283Slulf	EXT2_UNLOCK(ump);
997202283Slulf        bdwrite(bp);
99812115Sdyson}
99912115Sdyson
100012115Sdyson/*
100112115Sdyson * Free an inode.
100212115Sdyson *
100312115Sdyson */
100412115Sdysonint
1005246634Spfgext2_vfree(struct vnode *pvp, ino_t ino, int mode)
100612115Sdyson{
1007202283Slulf	struct m_ext2fs *fs;
100896752Siedowse	struct inode *pip;
1009202283Slulf	struct buf *bp;
1010202283Slulf	struct ext2mount *ump;
1011202283Slulf	int error, cg;
1012202283Slulf	char * ibp;
101312115Sdyson
101430474Sphk	pip = VTOI(pvp);
101512115Sdyson	fs = pip->i_e2fs;
1016202283Slulf	ump = pip->i_ump;
1017202283Slulf	if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount)
1018241011Smdf		panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s",
1019241011Smdf		    pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt);
102012115Sdyson
1021202283Slulf	cg = ino_to_cg(fs, ino);
1022202283Slulf	error = bread(pip->i_devvp,
1023202283Slulf		fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
1024202283Slulf		(int)fs->e2fs_bsize, NOCRED, &bp);
1025202283Slulf	if (error) {
1026202283Slulf		brelse(bp);
1027202283Slulf		return (0);
1028202283Slulf	}
1029202283Slulf	ibp = (char *)bp->b_data;
1030202283Slulf	ino = (ino - 1) % fs->e2fs->e2fs_ipg;
1031202283Slulf	if (isclr(ibp, ino)) {
1032202283Slulf		printf("ino = %llu, fs = %s\n",
1033202283Slulf			 (unsigned long long)ino, fs->e2fs_fsmnt);
1034202283Slulf		if (fs->e2fs_ronly == 0)
1035246258Spfg			panic("ext2_vfree: freeing free inode");
1036202283Slulf	}
1037202283Slulf	clrbit(ibp, ino);
1038202283Slulf	EXT2_LOCK(ump);
1039202283Slulf	fs->e2fs->e2fs_ficount++;
1040202283Slulf	fs->e2fs_gd[cg].ext2bgd_nifree++;
1041202283Slulf	if ((mode & IFMT) == IFDIR) {
1042202283Slulf		fs->e2fs_gd[cg].ext2bgd_ndirs--;
1043202283Slulf		fs->e2fs_total_dir--;
1044202283Slulf	}
1045202283Slulf	fs->e2fs_fmod = 1;
1046202283Slulf	EXT2_UNLOCK(ump);
1047202283Slulf	bdwrite(bp);
1048202283Slulf	return (0);
1049202283Slulf}
1050202283Slulf
1051202283Slulf/*
1052202283Slulf * Find a block in the specified cylinder group.
1053202283Slulf *
1054202283Slulf * It is a panic if a request is made to find a block if none are
1055202283Slulf * available.
105612115Sdyson */
1057202283Slulfstatic daddr_t
1058202283Slulfext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
1059202283Slulf{
1060229200Sed	char *loc;
1061229200Sed	int start, len;
106212115Sdyson
1063202283Slulf	/*
1064202283Slulf	 * find the fragment by searching through the free block
1065202283Slulf	 * map for an appropriate bit pattern
106612115Sdyson	 */
1067202283Slulf	if (bpref)
1068202283Slulf		start = dtogd(fs, bpref) / NBBY;
1069202283Slulf	else
1070202283Slulf		start = 0;
1071202283Slulf	len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
1072229200Sed	loc = memcchr(&bbp[start], 0xff, len);
1073229200Sed	if (loc == NULL) {
1074202283Slulf		len = start + 1;
1075202283Slulf		start = 0;
1076229200Sed		loc = memcchr(&bbp[start], 0xff, len);
1077229200Sed		if (loc == NULL) {
1078202283Slulf			printf("start = %d, len = %d, fs = %s\n",
1079202283Slulf				start, len, fs->e2fs_fsmnt);
1080246258Spfg			panic("ext2_mapsearch: map corrupted");
1081202283Slulf			/* NOTREACHED */
1082202283Slulf		}
1083202283Slulf	}
1084229200Sed	return ((loc - bbp) * NBBY + ffs(~*loc) - 1);
108512115Sdyson}
108612115Sdyson
108712115Sdyson/*
1088251612Spfg * Fserr prints the name of a filesystem with an error diagnostic.
108912115Sdyson *
109012115Sdyson * The form of the error message is:
109112115Sdyson *	fs: error message
109212115Sdyson */
109312115Sdysonstatic void
1094246634Spfgext2_fserr(struct m_ext2fs *fs, uid_t uid, char *cp)
109512115Sdyson{
109612115Sdyson
1097202283Slulf	log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
109812115Sdyson}
1099202283Slulf
1100202283Slulfint
1101202283Slulfcg_has_sb(int i)
1102202283Slulf{
1103202283Slulf        int a3, a5, a7;
1104202283Slulf
1105202283Slulf        if (i == 0 || i == 1)
1106202283Slulf                return 1;
1107202283Slulf        for (a3 = 3, a5 = 5, a7 = 7;
1108202283Slulf            a3 <= i || a5 <= i || a7 <= i;
1109202283Slulf            a3 *= 3, a5 *= 5, a7 *= 7)
1110202283Slulf                if (i == a3 || i == a5 || i == a7)
1111202283Slulf                        return 1;
1112202283Slulf        return 0;
1113202283Slulf}
1114