ext2_alloc.c revision 218190
1/*-
2 *  modified for Lites 1.1
3 *
4 *  Aug 1995, Godmar Back (gback@cs.utah.edu)
5 *  University of Utah, Department of Computer Science
6 */
7/*-
8 * Copyright (c) 1982, 1986, 1989, 1993
9 *	The Regents of the University of California.  All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)ffs_alloc.c	8.8 (Berkeley) 2/21/94
36 * $FreeBSD: head/sys/fs/ext2fs/ext2_alloc.c 218190 2011-02-02 14:59:05Z jhb $
37 */
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/conf.h>
42#include <sys/vnode.h>
43#include <sys/stat.h>
44#include <sys/mount.h>
45#include <sys/syslog.h>
46#include <sys/buf.h>
47
48#include <fs/ext2fs/inode.h>
49#include <fs/ext2fs/ext2_mount.h>
50#include <fs/ext2fs/ext2fs.h>
51#include <fs/ext2fs/fs.h>
52#include <fs/ext2fs/ext2_extern.h>
53
54static daddr_t	ext2_alloccg(struct inode *, int, daddr_t, int);
55static u_long	ext2_dirpref(struct inode *);
56static void	ext2_fserr(struct m_ext2fs *, uid_t, char *);
57static u_long	ext2_hashalloc(struct inode *, int, long, int,
58				daddr_t (*)(struct inode *, int, daddr_t,
59						int));
60static daddr_t	ext2_nodealloccg(struct inode *, int, daddr_t, int);
61static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
62#ifdef FANCY_REALLOC
63static int	ext2_reallocblks(struct vop_reallocblks_args *);
64#endif
65
66/*
67 * Allocate a block in the file system.
68 *
69 * A preference may be optionally specified. If a preference is given
70 * the following hierarchy is used to allocate a block:
71 *   1) allocate the requested block.
72 *   2) allocate a rotationally optimal block in the same cylinder.
73 *   3) allocate a block in the same cylinder group.
74 *   4) quadradically rehash into other cylinder groups, until an
75 *        available block is located.
76 * If no block preference is given the following hierarchy is used
77 * to allocate a block:
78 *   1) allocate a block in the cylinder group that contains the
79 *        inode for the file.
80 *   2) quadradically rehash into other cylinder groups, until an
81 *        available block is located.
82 */
83int
84ext2_alloc(ip, lbn, bpref, size, cred, bnp)
85	struct inode *ip;
86	int32_t lbn, bpref;
87	int size;
88	struct ucred *cred;
89	int32_t *bnp;
90{
91	struct m_ext2fs *fs;
92	struct ext2mount *ump;
93	int32_t bno;
94	int cg;
95	*bnp = 0;
96	fs = ip->i_e2fs;
97	ump = ip->i_ump;
98	mtx_assert(EXT2_MTX(ump), MA_OWNED);
99#ifdef DIAGNOSTIC
100	if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) {
101		vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n",
102		    (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt);
103		panic("ext2_alloc: bad size");
104	}
105	if (cred == NOCRED)
106		panic("ext2_alloc: missing credential");
107#endif /* DIAGNOSTIC */
108	if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0)
109		goto nospace;
110	if (cred->cr_uid != 0 &&
111		fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount)
112		goto nospace;
113	if (bpref >= fs->e2fs->e2fs_bcount)
114		bpref = 0;
115	if (bpref == 0)
116                cg = ino_to_cg(fs, ip->i_number);
117        else
118                cg = dtog(fs, bpref);
119        bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
120                                                 ext2_alloccg);
121        if (bno > 0) {
122		/* set next_alloc fields as done in block_getblk */
123		ip->i_next_alloc_block = lbn;
124		ip->i_next_alloc_goal = bno;
125
126                ip->i_blocks += btodb(fs->e2fs_bsize);
127                ip->i_flag |= IN_CHANGE | IN_UPDATE;
128                *bnp = bno;
129                return (0);
130        }
131nospace:
132	EXT2_UNLOCK(ump);
133	ext2_fserr(fs, cred->cr_uid, "file system full");
134	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
135	return (ENOSPC);
136}
137
138/*
139 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
140 *
141 * The vnode and an array of buffer pointers for a range of sequential
142 * logical blocks to be made contiguous is given. The allocator attempts
143 * to find a range of sequential blocks starting as close as possible to
144 * an fs_rotdelay offset from the end of the allocation for the logical
145 * block immediately preceding the current range. If successful, the
146 * physical block numbers in the buffer pointers and in the inode are
147 * changed to reflect the new allocation. If unsuccessful, the allocation
148 * is left unchanged. The success in doing the reallocation is returned.
149 * Note that the error return is not reflected back to the user. Rather
150 * the previous block allocation will be used.
151 */
152
153#ifdef FANCY_REALLOC
154SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem");
155
156static int doasyncfree = 1;
157SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
158    "Use asychronous writes to update block pointers when freeing blocks");
159
160static int doreallocblks = 1;
161SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
162#endif
163
164int
165ext2_reallocblks(ap)
166	struct vop_reallocblks_args /* {
167		struct vnode *a_vp;
168		struct cluster_save *a_buflist;
169	} */ *ap;
170{
171#ifndef FANCY_REALLOC
172/* printf("ext2_reallocblks not implemented\n"); */
173return ENOSPC;
174#else
175
176	struct m_ext2fs *fs;
177	struct inode *ip;
178	struct vnode *vp;
179	struct buf *sbp, *ebp;
180	int32_t *bap, *sbap, *ebap = 0;
181	struct ext2mount *ump;
182	struct cluster_save *buflist;
183	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
184	int32_t start_lbn, end_lbn, soff, newblk, blkno =0;
185	int i, len, start_lvl, end_lvl, pref, ssize;
186
187	vp = ap->a_vp;
188	ip = VTOI(vp);
189	fs = ip->i_e2fs;
190	ump = ip->i_ump;
191#ifdef UNKLAR
192	if (fs->fs_contigsumsize <= 0)
193		return (ENOSPC);
194#endif
195	buflist = ap->a_buflist;
196	len = buflist->bs_nchildren;
197	start_lbn = buflist->bs_children[0]->b_lblkno;
198	end_lbn = start_lbn + len - 1;
199#ifdef DIAGNOSTIC
200	for (i = 1; i < len; i++)
201		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
202			panic("ext2_reallocblks: non-cluster");
203#endif
204	/*
205	 * If the latest allocation is in a new cylinder group, assume that
206	 * the filesystem has decided to move and do not force it back to
207	 * the previous cylinder group.
208	 */
209	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
210	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
211		return (ENOSPC);
212	if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
213	    ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
214		return (ENOSPC);
215	/*
216	 * Get the starting offset and block map for the first block.
217	 */
218	if (start_lvl == 0) {
219		sbap = &ip->i_db[0];
220		soff = start_lbn;
221	} else {
222		idp = &start_ap[start_lvl - 1];
223		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) {
224			brelse(sbp);
225			return (ENOSPC);
226		}
227		sbap = (int32_t *)sbp->b_data;
228		soff = idp->in_off;
229	}
230	/*
231	 * Find the preferred location for the cluster.
232	 */
233	EXT2_LOCK(ump);
234	pref = ext2_blkpref(ip, start_lbn, soff, sbap, blkno);
235	/*
236	 * If the block range spans two block maps, get the second map.
237	 */
238	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
239		ssize = len;
240	} else {
241#ifdef DIAGNOSTIC
242		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
243			panic("ext2_reallocblk: start == end");
244#endif
245		ssize = len - (idp->in_off + 1);
246		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)){
247			EXT2_UNLOCK(ump);
248			goto fail;
249		}
250		ebap = (int32_t *)ebp->b_data;
251	}
252	/*
253	 * Search the block map looking for an allocation of the desired size.
254	 */
255	if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
256	    len, ext2_clusteralloc)) == 0){
257		EXT2_UNLOCK(ump);
258		goto fail;
259	}
260	/*
261	 * We have found a new contiguous block.
262	 *
263	 * First we have to replace the old block pointers with the new
264	 * block pointers in the inode and indirect blocks associated
265	 * with the file.
266	 */
267	blkno = newblk;
268	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
269		if (i == ssize)
270			bap = ebap;
271			soff = -i;
272#ifdef DIAGNOSTIC
273		if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
274			panic("ext2_reallocblks: alloc mismatch");
275#endif
276		*bap++ = blkno;
277	}
278	/*
279	 * Next we must write out the modified inode and indirect blocks.
280	 * For strict correctness, the writes should be synchronous since
281	 * the old block values may have been written to disk. In practise
282	 * they are almost never written, but if we are concerned about
283	 * strict correctness, the `doasyncfree' flag should be set to zero.
284	 *
285	 * The test on `doasyncfree' should be changed to test a flag
286	 * that shows whether the associated buffers and inodes have
287	 * been written. The flag should be set when the cluster is
288	 * started and cleared whenever the buffer or inode is flushed.
289	 * We can then check below to see if it is set, and do the
290	 * synchronous write only when it has been cleared.
291	 */
292	if (sbap != &ip->i_db[0]) {
293		if (doasyncfree)
294			bdwrite(sbp);
295		else
296			bwrite(sbp);
297	} else {
298		ip->i_flag |= IN_CHANGE | IN_UPDATE;
299		if (!doasyncfree)
300			ext2_update(vp, 1);
301	}
302	if (ssize < len) {
303		if (doasyncfree)
304			bdwrite(ebp);
305		else
306			bwrite(ebp);
307	}
308	/*
309	 * Last, free the old blocks and assign the new blocks to the buffers.
310	 */
311	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
312		ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
313		    fs->e2fs_bsize);
314		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
315	}
316	return (0);
317
318fail:
319	if (ssize < len)
320		brelse(ebp);
321	if (sbap != &ip->i_db[0])
322		brelse(sbp);
323	return (ENOSPC);
324
325#endif /* FANCY_REALLOC */
326}
327
328/*
329 * Allocate an inode in the file system.
330 *
331 */
332int
333ext2_valloc(pvp, mode, cred, vpp)
334	struct vnode *pvp;
335	int mode;
336	struct ucred *cred;
337	struct vnode **vpp;
338{
339	struct inode *pip;
340	struct m_ext2fs *fs;
341	struct inode *ip;
342	struct ext2mount *ump;
343	ino_t ino, ipref;
344	int i, error, cg;
345
346	*vpp = NULL;
347	pip = VTOI(pvp);
348	fs = pip->i_e2fs;
349	ump = pip->i_ump;
350
351	EXT2_LOCK(ump);
352	if (fs->e2fs->e2fs_ficount == 0)
353		goto noinodes;
354	/*
355	 * If it is a directory then obtain a cylinder group based on
356	 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
357	 * always the next inode.
358	 */
359	if((mode & IFMT) == IFDIR) {
360		cg = ext2_dirpref(pip);
361		if (fs->e2fs_contigdirs[cg] < 255)
362			fs->e2fs_contigdirs[cg]++;
363	} else {
364		cg = ino_to_cg(fs, pip->i_number);
365		if (fs->e2fs_contigdirs[cg] > 0)
366			fs->e2fs_contigdirs[cg]--;
367	}
368	ipref = cg * fs->e2fs->e2fs_ipg + 1;
369	ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg);
370
371	if (ino == 0)
372		goto noinodes;
373	error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
374	if (error) {
375		ext2_vfree(pvp, ino, mode);
376		return (error);
377	}
378	ip = VTOI(*vpp);
379
380	/*
381	  the question is whether using VGET was such good idea at all -
382	  Linux doesn't read the old inode in when it's allocating a
383	  new one. I will set at least i_size & i_blocks the zero.
384	*/
385	ip->i_mode = 0;
386	ip->i_size = 0;
387	ip->i_blocks = 0;
388	ip->i_flags = 0;
389        /* now we want to make sure that the block pointers are zeroed out */
390        for (i = 0; i < NDADDR; i++)
391                ip->i_db[i] = 0;
392        for (i = 0; i < NIADDR; i++)
393                ip->i_ib[i] = 0;
394
395	/*
396	 * Set up a new generation number for this inode.
397	 * XXX check if this makes sense in ext2
398	 */
399	if (ip->i_gen == 0 || ++ip->i_gen == 0)
400		ip->i_gen = random() / 2 + 1;
401/*
402printf("ext2_valloc: allocated inode %d\n", ino);
403*/
404	return (0);
405noinodes:
406	EXT2_UNLOCK(ump);
407	ext2_fserr(fs, cred->cr_uid, "out of inodes");
408	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
409	return (ENOSPC);
410}
411
412/*
413 * Find a cylinder to place a directory.
414 *
415 * The policy implemented by this algorithm is to allocate a
416 * directory inode in the same cylinder group as its parent
417 * directory, but also to reserve space for its files inodes
418 * and data. Restrict the number of directories which may be
419 * allocated one after another in the same cylinder group
420 * without intervening allocation of files.
421 *
422 * If we allocate a first level directory then force allocation
423 * in another cylinder group.
424 *
425 */
426static u_long
427ext2_dirpref(struct inode *pip)
428{
429	struct m_ext2fs *fs;
430        int cg, prefcg, dirsize, cgsize;
431	int avgifree, avgbfree, avgndir, curdirsize;
432	int minifree, minbfree, maxndir;
433	int mincg, minndir;
434	int maxcontigdirs;
435
436	mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
437	fs = pip->i_e2fs;
438
439 	avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount;
440	avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount;
441	avgndir  = fs->e2fs_total_dir / fs->e2fs_gcount;
442
443	/*
444	 * Force allocation in another cg if creating a first level dir.
445	 */
446	ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
447	if (ITOV(pip)->v_vflag & VV_ROOT) {
448		prefcg = arc4random() % fs->e2fs_gcount;
449		mincg = prefcg;
450		minndir = fs->e2fs_ipg;
451		for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
452			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
453			    fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
454			    fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
455				mincg = cg;
456				minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
457			}
458		for (cg = 0; cg < prefcg; cg++)
459			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
460                            fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
461                            fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
462                                mincg = cg;
463                                minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
464                        }
465
466		return (mincg);
467	}
468
469	/*
470	 * Count various limits which used for
471	 * optimal allocation of a directory inode.
472	 */
473	maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
474	minifree = avgifree - avgifree / 4;
475	if (minifree < 1)
476		minifree = 1;
477	minbfree = avgbfree - avgbfree / 4;
478	if (minbfree < 1)
479		minbfree = 1;
480	cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
481	dirsize = AVGDIRSIZE;
482	curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0;
483	if (dirsize < curdirsize)
484		dirsize = curdirsize;
485	if (dirsize <= 0)
486		maxcontigdirs = 0;		/* dirsize overflowed */
487	else
488		maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
489	maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
490	if (maxcontigdirs == 0)
491		maxcontigdirs = 1;
492
493	/*
494	 * Limit number of dirs in one cg and reserve space for
495	 * regular files, but only if we have no deficit in
496	 * inodes or space.
497	 */
498	prefcg = ino_to_cg(fs, pip->i_number);
499	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
500		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
501		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
502	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
503			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
504				return (cg);
505		}
506	for (cg = 0; cg < prefcg; cg++)
507		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
508		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
509	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
510			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
511				return (cg);
512		}
513	/*
514	 * This is a backstop when we have deficit in space.
515	 */
516	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
517		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
518			return (cg);
519	for (cg = 0; cg < prefcg; cg++)
520		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
521			break;
522	return (cg);
523}
524
525/*
526 * Select the desired position for the next block in a file.
527 *
528 * we try to mimic what Remy does in inode_getblk/block_getblk
529 *
530 * we note: blocknr == 0 means that we're about to allocate either
531 * a direct block or a pointer block at the first level of indirection
532 * (In other words, stuff that will go in i_db[] or i_ib[])
533 *
534 * blocknr != 0 means that we're allocating a block that is none
535 * of the above. Then, blocknr tells us the number of the block
536 * that will hold the pointer
537 */
538int32_t
539ext2_blkpref(ip, lbn, indx, bap, blocknr)
540	struct inode *ip;
541	int32_t lbn;
542	int indx;
543	int32_t *bap;
544	int32_t blocknr;
545{
546	int	tmp;
547	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
548
549	/* if the next block is actually what we thought it is,
550	   then set the goal to what we thought it should be
551	*/
552	if(ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
553		return ip->i_next_alloc_goal;
554
555	/* now check whether we were provided with an array that basically
556	   tells us previous blocks to which we want to stay closeby
557	*/
558	if(bap)
559                for (tmp = indx - 1; tmp >= 0; tmp--)
560			if (bap[tmp])
561				return bap[tmp];
562
563	/* else let's fall back to the blocknr, or, if there is none,
564	   follow the rule that a block should be allocated near its inode
565	*/
566	return blocknr ? blocknr :
567			(int32_t)(ip->i_block_group *
568			EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) +
569			ip->i_e2fs->e2fs->e2fs_first_dblock;
570}
571
572/*
573 * Implement the cylinder overflow algorithm.
574 *
575 * The policy implemented by this algorithm is:
576 *   1) allocate the block in its requested cylinder group.
577 *   2) quadradically rehash on the cylinder group number.
578 *   3) brute force search for a free block.
579 */
580static u_long
581ext2_hashalloc(struct inode *ip, int cg, long pref, int size,
582                daddr_t (*allocator)(struct inode *, int, daddr_t, int))
583{
584	struct m_ext2fs *fs;
585	ino_t result;
586	int i, icg = cg;
587
588	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
589	fs = ip->i_e2fs;
590	/*
591	 * 1: preferred cylinder group
592	 */
593	result = (*allocator)(ip, cg, pref, size);
594	if (result)
595		return (result);
596	/*
597	 * 2: quadratic rehash
598	 */
599	for (i = 1; i < fs->e2fs_gcount; i *= 2) {
600		cg += i;
601		if (cg >= fs->e2fs_gcount)
602			cg -= fs->e2fs_gcount;
603		result = (*allocator)(ip, cg, 0, size);
604		if (result)
605			return (result);
606	}
607	/*
608	 * 3: brute force search
609	 * Note that we start at i == 2, since 0 was checked initially,
610	 * and 1 is always checked in the quadratic rehash.
611	 */
612	cg = (icg + 2) % fs->e2fs_gcount;
613	for (i = 2; i < fs->e2fs_gcount; i++) {
614		result = (*allocator)(ip, cg, 0, size);
615		if (result)
616			return (result);
617		cg++;
618		if (cg == fs->e2fs_gcount)
619			cg = 0;
620	}
621	return (0);
622}
623
624/*
625 * Determine whether a block can be allocated.
626 *
627 * Check to see if a block of the appropriate size is available,
628 * and if it is, allocate it.
629 */
630static daddr_t
631ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
632{
633	struct m_ext2fs *fs;
634	struct buf *bp;
635	struct ext2mount *ump;
636	daddr_t bno, runstart, runlen;
637	int bit, loc, end, error, start;
638	char *bbp;
639	/* XXX ondisk32 */
640	fs = ip->i_e2fs;
641	ump = ip->i_ump;
642	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
643		return (0);
644	EXT2_UNLOCK(ump);
645	error = bread(ip->i_devvp, fsbtodb(fs,
646		fs->e2fs_gd[cg].ext2bgd_b_bitmap),
647		(int)fs->e2fs_bsize, NOCRED, &bp);
648	if (error) {
649		brelse(bp);
650		EXT2_LOCK(ump);
651		return (0);
652	}
653	bbp = (char *)bp->b_data;
654
655	if (dtog(fs, bpref) != cg)
656		bpref = 0;
657	if (bpref != 0) {
658		bpref = dtogd(fs, bpref);
659		/*
660		 * if the requested block is available, use it
661		 */
662		if (isclr(bbp, bpref)) {
663			bno = bpref;
664			goto gotit;
665		}
666	}
667	/*
668	 * no blocks in the requested cylinder, so take next
669	 * available one in this cylinder group.
670	 * first try to get 8 contigous blocks, then fall back to a single
671	 * block.
672	 */
673	if (bpref)
674		start = dtogd(fs, bpref) / NBBY;
675	else
676		start = 0;
677	end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
678retry:
679	runlen = 0;
680	runstart = 0;
681	for (loc = start; loc < end; loc++) {
682		if (bbp[loc] == (char)0xff) {
683			runlen = 0;
684			continue;
685		}
686
687		/* Start of a run, find the number of high clear bits. */
688		if (runlen == 0) {
689			bit = fls(bbp[loc]);
690			runlen = NBBY - bit;
691			runstart = loc * NBBY + bit;
692		} else if (bbp[loc] == 0) {
693			/* Continue a run. */
694			runlen += NBBY;
695		} else {
696			/*
697			 * Finish the current run.  If it isn't long
698			 * enough, start a new one.
699			 */
700			bit = ffs(bbp[loc]) - 1;
701			runlen += bit;
702			if (runlen >= 8) {
703				bno = runstart;
704				goto gotit;
705			}
706
707			/* Run was too short, start a new one. */
708			bit = fls(bbp[loc]);
709			runlen = NBBY - bit;
710			runstart = loc * NBBY + bit;
711		}
712
713		/* If the current run is long enough, use it. */
714		if (runlen >= 8) {
715			bno = runstart;
716			goto gotit;
717		}
718	}
719	if (start != 0) {
720		end = start;
721		start = 0;
722		goto retry;
723	}
724
725	bno = ext2_mapsearch(fs, bbp, bpref);
726	if (bno < 0){
727		brelse(bp);
728		EXT2_LOCK(ump);
729		return (0);
730	}
731gotit:
732#ifdef DIAGNOSTIC
733	if (isset(bbp, bno)) {
734		printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n",
735			cg, (intmax_t)bno, fs->e2fs_fsmnt);
736		panic("ext2fs_alloccg: dup alloc");
737	}
738#endif
739	setbit(bbp, bno);
740	EXT2_LOCK(ump);
741	fs->e2fs->e2fs_fbcount--;
742	fs->e2fs_gd[cg].ext2bgd_nbfree--;
743	fs->e2fs_fmod = 1;
744	EXT2_UNLOCK(ump);
745	bdwrite(bp);
746	return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
747}
748
749/*
750 * Determine whether an inode can be allocated.
751 *
752 * Check to see if an inode is available, and if it is,
753 * allocate it using tode in the specified cylinder group.
754 */
755static daddr_t
756ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
757{
758	struct m_ext2fs *fs;
759	struct buf *bp;
760	struct ext2mount *ump;
761	int error, start, len, loc, map, i;
762	char *ibp;
763	ipref--; /* to avoid a lot of (ipref -1) */
764	if (ipref == -1)
765		ipref = 0;
766	fs = ip->i_e2fs;
767	ump = ip->i_ump;
768	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
769		return (0);
770	EXT2_UNLOCK(ump);
771	error = bread(ip->i_devvp, fsbtodb(fs,
772		fs->e2fs_gd[cg].ext2bgd_i_bitmap),
773		(int)fs->e2fs_bsize, NOCRED, &bp);
774	if (error) {
775		brelse(bp);
776		EXT2_LOCK(ump);
777		return (0);
778	}
779	ibp = (char *)bp->b_data;
780	if (ipref) {
781		ipref %= fs->e2fs->e2fs_ipg;
782		if (isclr(ibp, ipref))
783			goto gotit;
784	}
785	start = ipref / NBBY;
786	len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY);
787	loc = skpc(0xff, len, &ibp[start]);
788	if (loc == 0) {
789		len = start + 1;
790		start = 0;
791		loc = skpc(0xff, len, &ibp[0]);
792		if (loc == 0) {
793			printf("cg = %d, ipref = %lld, fs = %s\n",
794				cg, (long long)ipref, fs->e2fs_fsmnt);
795			panic("ext2fs_nodealloccg: map corrupted");
796			/* NOTREACHED */
797		}
798	}
799	i = start + len - loc;
800	map = ibp[i];
801	ipref = i * NBBY;
802	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
803		if ((map & i) == 0) {
804			goto gotit;
805		}
806	}
807	printf("fs = %s\n", fs->e2fs_fsmnt);
808	panic("ext2fs_nodealloccg: block not in map");
809	/* NOTREACHED */
810gotit:
811	setbit(ibp, ipref);
812	EXT2_LOCK(ump);
813	fs->e2fs_gd[cg].ext2bgd_nifree--;
814	fs->e2fs->e2fs_ficount--;
815	fs->e2fs_fmod = 1;
816	if ((mode & IFMT) == IFDIR) {
817		fs->e2fs_gd[cg].ext2bgd_ndirs++;
818		fs->e2fs_total_dir++;
819	}
820	EXT2_UNLOCK(ump);
821	bdwrite(bp);
822	return (cg * fs->e2fs->e2fs_ipg + ipref +1);
823}
824
825/*
826 * Free a block or fragment.
827 *
828 */
829void
830ext2_blkfree(ip, bno, size)
831	struct inode *ip;
832	int32_t bno;
833	long size;
834{
835	struct m_ext2fs *fs;
836	struct buf *bp;
837	struct ext2mount *ump;
838	int cg, error;
839	char *bbp;
840
841	fs = ip->i_e2fs;
842	ump = ip->i_ump;
843	cg = dtog(fs, bno);
844	if ((u_int)bno >= fs->e2fs->e2fs_bcount) {
845                printf("bad block %lld, ino %llu\n", (long long)bno,
846                    (unsigned long long)ip->i_number);
847                ext2_fserr(fs, ip->i_uid, "bad block");
848                return;
849        }
850        error = bread(ip->i_devvp,
851                fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
852                (int)fs->e2fs_bsize, NOCRED, &bp);
853        if (error) {
854                brelse(bp);
855                return;
856        }
857        bbp = (char *)bp->b_data;
858        bno = dtogd(fs, bno);
859        if (isclr(bbp, bno)) {
860                printf("block = %lld, fs = %s\n",
861                     (long long)bno, fs->e2fs_fsmnt);
862                panic("blkfree: freeing free block");
863        }
864        clrbit(bbp, bno);
865	EXT2_LOCK(ump);
866        fs->e2fs->e2fs_fbcount++;
867        fs->e2fs_gd[cg].ext2bgd_nbfree++;
868        fs->e2fs_fmod = 1;
869	EXT2_UNLOCK(ump);
870        bdwrite(bp);
871}
872
873/*
874 * Free an inode.
875 *
876 */
877int
878ext2_vfree(pvp, ino, mode)
879	struct vnode *pvp;
880	ino_t ino;
881	int mode;
882{
883	struct m_ext2fs *fs;
884	struct inode *pip;
885	struct buf *bp;
886	struct ext2mount *ump;
887	int error, cg;
888	char * ibp;
889/*	mode_t save_i_mode; */
890
891	pip = VTOI(pvp);
892	fs = pip->i_e2fs;
893	ump = pip->i_ump;
894	if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount)
895		panic("ext2_vfree: range: devvp = %p, ino = %d, fs = %s",
896		    pip->i_devvp, ino, fs->e2fs_fsmnt);
897
898	cg = ino_to_cg(fs, ino);
899	error = bread(pip->i_devvp,
900		fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
901		(int)fs->e2fs_bsize, NOCRED, &bp);
902	if (error) {
903		brelse(bp);
904		return (0);
905	}
906	ibp = (char *)bp->b_data;
907	ino = (ino - 1) % fs->e2fs->e2fs_ipg;
908	if (isclr(ibp, ino)) {
909		printf("ino = %llu, fs = %s\n",
910			 (unsigned long long)ino, fs->e2fs_fsmnt);
911		if (fs->e2fs_ronly == 0)
912			panic("ifree: freeing free inode");
913	}
914	clrbit(ibp, ino);
915	EXT2_LOCK(ump);
916	fs->e2fs->e2fs_ficount++;
917	fs->e2fs_gd[cg].ext2bgd_nifree++;
918	if ((mode & IFMT) == IFDIR) {
919		fs->e2fs_gd[cg].ext2bgd_ndirs--;
920		fs->e2fs_total_dir--;
921	}
922	fs->e2fs_fmod = 1;
923	EXT2_UNLOCK(ump);
924	bdwrite(bp);
925	return (0);
926}
927
928/*
929 * Find a block in the specified cylinder group.
930 *
931 * It is a panic if a request is made to find a block if none are
932 * available.
933 */
934static daddr_t
935ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
936{
937	daddr_t bno;
938	int start, len, loc, i, map;
939
940	/*
941	 * find the fragment by searching through the free block
942	 * map for an appropriate bit pattern
943	 */
944	if (bpref)
945		start = dtogd(fs, bpref) / NBBY;
946	else
947		start = 0;
948	len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
949	loc = skpc(0xff, len, &bbp[start]);
950	if (loc == 0) {
951		len = start + 1;
952		start = 0;
953		loc = skpc(0xff, len, &bbp[start]);
954		if (loc == 0) {
955			printf("start = %d, len = %d, fs = %s\n",
956				start, len, fs->e2fs_fsmnt);
957			panic("ext2fs_alloccg: map corrupted");
958			/* NOTREACHED */
959		}
960	}
961	i = start + len - loc;
962	map = bbp[i];
963	bno = i * NBBY;
964	for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
965		if ((map & i) == 0)
966			return (bno);
967	}
968	printf("fs = %s\n", fs->e2fs_fsmnt);
969	panic("ext2fs_mapsearch: block not in map");
970	/* NOTREACHED */
971}
972
973/*
974 * Fserr prints the name of a file system with an error diagnostic.
975 *
976 * The form of the error message is:
977 *	fs: error message
978 */
979static void
980ext2_fserr(fs, uid, cp)
981	struct m_ext2fs *fs;
982	uid_t uid;
983	char *cp;
984{
985
986	log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
987}
988
989int
990cg_has_sb(int i)
991{
992        int a3, a5, a7;
993
994        if (i == 0 || i == 1)
995                return 1;
996        for (a3 = 3, a5 = 5, a7 = 7;
997            a3 <= i || a5 <= i || a7 <= i;
998            a3 *= 3, a5 *= 5, a7 *= 7)
999                if (i == a3 || i == a5 || i == a7)
1000                        return 1;
1001        return 0;
1002}
1003