ffs_alloc.c revision 24101
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_alloc.c	8.18 (Berkeley) 5/26/95
34 * $Id: ffs_alloc.c,v 1.31 1997/03/09 06:00:40 mpp Exp $
35 */
36
37#include "opt_quota.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/buf.h>
42#include <sys/proc.h>
43#include <sys/vnode.h>
44#include <sys/mount.h>
45#include <sys/kernel.h>
46#include <sys/sysctl.h>
47#include <sys/syslog.h>
48
49#include <vm/vm.h>
50
51#include <ufs/ufs/quota.h>
52#include <ufs/ufs/inode.h>
53#include <ufs/ufs/ufs_extern.h>
54
55#include <ufs/ffs/fs.h>
56#include <ufs/ffs/ffs_extern.h>
57
58extern u_long nextgennumber;
59
60typedef ufs_daddr_t allocfcn_t __P((struct inode *ip, int cg, ufs_daddr_t bpref,
61				  int size));
62
63static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int));
64static ufs_daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, ufs_daddr_t));
65static void	ffs_clusteracct	__P((struct fs *, struct cg *, ufs_daddr_t,
66				     int));
67static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t,
68	    int));
69static ino_t	ffs_dirpref __P((struct fs *));
70static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
71static void	ffs_fserr __P((struct fs *, u_int, char *));
72static u_long	ffs_hashalloc
73		    __P((struct inode *, int, long, int, allocfcn_t *));
74static ino_t	ffs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int));
75static ufs_daddr_t ffs_mapsearch __P((struct fs *, struct cg *, ufs_daddr_t,
76	    int));
77
78/*
79 * Allocate a block in the file system.
80 *
81 * The size of the requested block is given, which must be some
82 * multiple of fs_fsize and <= fs_bsize.
83 * A preference may be optionally specified. If a preference is given
84 * the following hierarchy is used to allocate a block:
85 *   1) allocate the requested block.
86 *   2) allocate a rotationally optimal block in the same cylinder.
87 *   3) allocate a block in the same cylinder group.
88 *   4) quadradically rehash into other cylinder groups, until an
89 *      available block is located.
90 * If no block preference is given the following heirarchy is used
91 * to allocate a block:
92 *   1) allocate a block in the cylinder group that contains the
93 *      inode for the file.
94 *   2) quadradically rehash into other cylinder groups, until an
95 *      available block is located.
96 */
97int
98ffs_alloc(ip, lbn, bpref, size, cred, bnp)
99	register struct inode *ip;
100	ufs_daddr_t lbn, bpref;
101	int size;
102	struct ucred *cred;
103	ufs_daddr_t *bnp;
104{
105	register struct fs *fs;
106	ufs_daddr_t bno;
107	int cg;
108#ifdef QUOTA
109	int error;
110#endif
111
112	*bnp = 0;
113	fs = ip->i_fs;
114#ifdef DIAGNOSTIC
115	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
116		printf("dev = 0x%lx, bsize = %ld, size = %d, fs = %s\n",
117		    (u_long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
118		panic("ffs_alloc: bad size");
119	}
120	if (cred == NOCRED)
121		panic("ffs_alloc: missing credential");
122#endif /* DIAGNOSTIC */
123	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
124		goto nospace;
125	if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
126		goto nospace;
127#ifdef QUOTA
128	error = chkdq(ip, (long)btodb(size), cred, 0);
129	if (error)
130		return (error);
131#endif
132	if (bpref >= fs->fs_size)
133		bpref = 0;
134	if (bpref == 0)
135		cg = ino_to_cg(fs, ip->i_number);
136	else
137		cg = dtog(fs, bpref);
138	bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
139					 ffs_alloccg);
140	if (bno > 0) {
141		ip->i_blocks += btodb(size);
142		ip->i_flag |= IN_CHANGE | IN_UPDATE;
143		*bnp = bno;
144		return (0);
145	}
146#ifdef QUOTA
147	/*
148	 * Restore user's disk quota because allocation failed.
149	 */
150	(void) chkdq(ip, (long)-btodb(size), cred, FORCE);
151#endif
152nospace:
153	ffs_fserr(fs, cred->cr_uid, "file system full");
154	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
155	return (ENOSPC);
156}
157
158/*
159 * Reallocate a fragment to a bigger size
160 *
161 * The number and size of the old block is given, and a preference
162 * and new size is also specified. The allocator attempts to extend
163 * the original block. Failing that, the regular block allocator is
164 * invoked to get an appropriate block.
165 */
166int
167ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
168	register struct inode *ip;
169	ufs_daddr_t lbprev;
170	ufs_daddr_t bpref;
171	int osize, nsize;
172	struct ucred *cred;
173	struct buf **bpp;
174{
175	register struct fs *fs;
176	struct buf *bp;
177	int cg, request, error;
178	ufs_daddr_t bprev, bno;
179
180	*bpp = 0;
181	fs = ip->i_fs;
182#ifdef DIAGNOSTIC
183	if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
184	    (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
185		printf(
186		    "dev = 0x%lx, bsize = %ld, osize = %d, "
187		    "nsize = %d, fs = %s\n",
188		    (u_long)ip->i_dev, fs->fs_bsize, osize,
189		    nsize, fs->fs_fsmnt);
190		panic("ffs_realloccg: bad size");
191	}
192	if (cred == NOCRED)
193		panic("ffs_realloccg: missing credential");
194#endif /* DIAGNOSTIC */
195	if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
196		goto nospace;
197	if ((bprev = ip->i_db[lbprev]) == 0) {
198		printf("dev = 0x%lx, bsize = %ld, bprev = %ld, fs = %s\n",
199		    (u_long) ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
200		panic("ffs_realloccg: bad bprev");
201	}
202	/*
203	 * Allocate the extra space in the buffer.
204	 */
205	error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp);
206	if (error) {
207		brelse(bp);
208		return (error);
209	}
210
211	if( bp->b_blkno == bp->b_lblkno) {
212		if( lbprev >= NDADDR)
213			panic("ffs_realloccg: lbprev out of range");
214		bp->b_blkno = fsbtodb(fs, bprev);
215	}
216
217#ifdef QUOTA
218	error = chkdq(ip, (long)btodb(nsize - osize), cred, 0);
219	if (error) {
220		brelse(bp);
221		return (error);
222	}
223#endif
224	/*
225	 * Check for extension in the existing location.
226	 */
227	cg = dtog(fs, bprev);
228	bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize);
229	if (bno) {
230		if (bp->b_blkno != fsbtodb(fs, bno))
231			panic("ffs_realloccg: bad blockno");
232		ip->i_blocks += btodb(nsize - osize);
233		ip->i_flag |= IN_CHANGE | IN_UPDATE;
234		allocbuf(bp, nsize);
235		bp->b_flags |= B_DONE;
236		bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
237		*bpp = bp;
238		return (0);
239	}
240	/*
241	 * Allocate a new disk location.
242	 */
243	if (bpref >= fs->fs_size)
244		bpref = 0;
245	switch ((int)fs->fs_optim) {
246	case FS_OPTSPACE:
247		/*
248		 * Allocate an exact sized fragment. Although this makes
249		 * best use of space, we will waste time relocating it if
250		 * the file continues to grow. If the fragmentation is
251		 * less than half of the minimum free reserve, we choose
252		 * to begin optimizing for time.
253		 */
254		request = nsize;
255		if (fs->fs_minfree <= 5 ||
256		    fs->fs_cstotal.cs_nffree >
257		    fs->fs_dsize * fs->fs_minfree / (2 * 100))
258			break;
259		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
260			fs->fs_fsmnt);
261		fs->fs_optim = FS_OPTTIME;
262		break;
263	case FS_OPTTIME:
264		/*
265		 * At this point we have discovered a file that is trying to
266		 * grow a small fragment to a larger fragment. To save time,
267		 * we allocate a full sized block, then free the unused portion.
268		 * If the file continues to grow, the `ffs_fragextend' call
269		 * above will be able to grow it in place without further
270		 * copying. If aberrant programs cause disk fragmentation to
271		 * grow within 2% of the free reserve, we choose to begin
272		 * optimizing for space.
273		 */
274		request = fs->fs_bsize;
275		if (fs->fs_cstotal.cs_nffree <
276		    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
277			break;
278		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
279			fs->fs_fsmnt);
280		fs->fs_optim = FS_OPTSPACE;
281		break;
282	default:
283		printf("dev = 0x%lx, optim = %ld, fs = %s\n",
284		    (u_long)ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
285		panic("ffs_realloccg: bad optim");
286		/* NOTREACHED */
287	}
288	bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
289					 ffs_alloccg);
290	if (bno > 0) {
291		bp->b_blkno = fsbtodb(fs, bno);
292		ffs_blkfree(ip, bprev, (long)osize);
293		if (nsize < request)
294			ffs_blkfree(ip, bno + numfrags(fs, nsize),
295			    (long)(request - nsize));
296		ip->i_blocks += btodb(nsize - osize);
297		ip->i_flag |= IN_CHANGE | IN_UPDATE;
298		allocbuf(bp, nsize);
299		bp->b_flags |= B_DONE;
300		bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
301		*bpp = bp;
302		return (0);
303	}
304#ifdef QUOTA
305	/*
306	 * Restore user's disk quota because allocation failed.
307	 */
308	(void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE);
309#endif
310	brelse(bp);
311nospace:
312	/*
313	 * no space available
314	 */
315	ffs_fserr(fs, cred->cr_uid, "file system full");
316	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
317	return (ENOSPC);
318}
319
320/*
321 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
322 *
323 * The vnode and an array of buffer pointers for a range of sequential
324 * logical blocks to be made contiguous is given. The allocator attempts
325 * to find a range of sequential blocks starting as close as possible to
326 * an fs_rotdelay offset from the end of the allocation for the logical
327 * block immediately preceeding the current range. If successful, the
328 * physical block numbers in the buffer pointers and in the inode are
329 * changed to reflect the new allocation. If unsuccessful, the allocation
330 * is left unchanged. The success in doing the reallocation is returned.
331 * Note that the error return is not reflected back to the user. Rather
332 * the previous block allocation will be used.
333 */
334static int doasyncfree = 1;
335SYSCTL_INT(_vfs_ffs, FFS_ASYNCFREE, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
336
337int doreallocblks = 1;
338SYSCTL_INT(_vfs_ffs, FFS_REALLOCBLKS, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
339
340static int prtrealloc = 0;
341
342int
343ffs_reallocblks(ap)
344	struct vop_reallocblks_args /* {
345		struct vnode *a_vp;
346		struct cluster_save *a_buflist;
347	} */ *ap;
348{
349#if !defined (not_yes)
350	return (ENOSPC);
351#else
352	struct fs *fs;
353	struct inode *ip;
354	struct vnode *vp;
355	struct buf *sbp, *ebp;
356	ufs_daddr_t *bap, *sbap, *ebap = 0;
357	struct cluster_save *buflist;
358	ufs_daddr_t start_lbn, end_lbn, soff, newblk, blkno;
359	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
360	int i, len, start_lvl, end_lvl, pref, ssize;
361	struct timeval tv;
362
363	if (doreallocblks == 0)
364		return (ENOSPC);
365	vp = ap->a_vp;
366	ip = VTOI(vp);
367	fs = ip->i_fs;
368	if (fs->fs_contigsumsize <= 0)
369		return (ENOSPC);
370	buflist = ap->a_buflist;
371	len = buflist->bs_nchildren;
372	start_lbn = buflist->bs_children[0]->b_lblkno;
373	end_lbn = start_lbn + len - 1;
374#ifdef DIAGNOSTIC
375	for (i = 0; i < len; i++)
376		if (!ffs_checkblk(ip,
377		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
378			panic("ffs_reallocblks: unallocated block 1");
379	for (i = 1; i < len; i++)
380		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
381			panic("ffs_reallocblks: non-logical cluster");
382	blkno = buflist->bs_children[0]->b_blkno;
383	ssize = fsbtodb(fs, fs->fs_frag);
384	for (i = 1; i < len - 1; i++)
385		if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
386			panic("ffs_reallocblks: non-physical cluster %d", i);
387#endif
388	/*
389	 * If the latest allocation is in a new cylinder group, assume that
390	 * the filesystem has decided to move and do not force it back to
391	 * the previous cylinder group.
392	 */
393	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
394	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
395		return (ENOSPC);
396	if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
397	    ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
398		return (ENOSPC);
399	/*
400	 * Get the starting offset and block map for the first block.
401	 */
402	if (start_lvl == 0) {
403		sbap = &ip->i_db[0];
404		soff = start_lbn;
405	} else {
406		idp = &start_ap[start_lvl - 1];
407		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
408			brelse(sbp);
409			return (ENOSPC);
410		}
411		sbap = (ufs_daddr_t *)sbp->b_data;
412		soff = idp->in_off;
413	}
414	/*
415	 * Find the preferred location for the cluster.
416	 */
417	pref = ffs_blkpref(ip, start_lbn, soff, sbap);
418	/*
419	 * If the block range spans two block maps, get the second map.
420	 */
421	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
422		ssize = len;
423	} else {
424#ifdef DIAGNOSTIC
425		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
426			panic("ffs_reallocblk: start == end");
427#endif
428		ssize = len - (idp->in_off + 1);
429		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
430			goto fail;
431		ebap = (ufs_daddr_t *)ebp->b_data;
432	}
433	/*
434	 * Search the block map looking for an allocation of the desired size.
435	 */
436	if ((newblk = (ufs_daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
437	    len, ffs_clusteralloc)) == 0)
438		goto fail;
439	/*
440	 * We have found a new contiguous block.
441	 *
442	 * First we have to replace the old block pointers with the new
443	 * block pointers in the inode and indirect blocks associated
444	 * with the file.
445	 */
446#ifdef DEBUG
447	if (prtrealloc)
448		printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number,
449		    start_lbn, end_lbn);
450#endif
451	blkno = newblk;
452	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
453		if (i == ssize)
454			bap = ebap;
455#ifdef DIAGNOSTIC
456		if (!ffs_checkblk(ip,
457		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
458			panic("ffs_reallocblks: unallocated block 2");
459		if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
460			panic("ffs_reallocblks: alloc mismatch");
461#endif
462#ifdef DEBUG
463		if (prtrealloc)
464			printf(" %d,", *bap);
465#endif
466		*bap++ = blkno;
467	}
468	/*
469	 * Next we must write out the modified inode and indirect blocks.
470	 * For strict correctness, the writes should be synchronous since
471	 * the old block values may have been written to disk. In practise
472	 * they are almost never written, but if we are concerned about
473	 * strict correctness, the `doasyncfree' flag should be set to zero.
474	 *
475	 * The test on `doasyncfree' should be changed to test a flag
476	 * that shows whether the associated buffers and inodes have
477	 * been written. The flag should be set when the cluster is
478	 * started and cleared whenever the buffer or inode is flushed.
479	 * We can then check below to see if it is set, and do the
480	 * synchronous write only when it has been cleared.
481	 */
482	if (sbap != &ip->i_db[0]) {
483		if (doasyncfree)
484			bdwrite(sbp);
485		else
486			bwrite(sbp);
487	} else {
488		ip->i_flag |= IN_CHANGE | IN_UPDATE;
489		if (!doasyncfree) {
490			gettime(&tv);
491			VOP_UPDATE(vp, &tv, &tv, 1);
492		}
493	}
494	if (ssize < len)
495		if (doasyncfree)
496			bdwrite(ebp);
497		else
498			bwrite(ebp);
499	/*
500	 * Last, free the old blocks and assign the new blocks to the buffers.
501	 */
502#ifdef DEBUG
503	if (prtrealloc)
504		printf("\n\tnew:");
505#endif
506	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
507		ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
508		    fs->fs_bsize);
509		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
510#ifdef DEBUG
511		if (!ffs_checkblk(ip,
512		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
513			panic("ffs_reallocblks: unallocated block 3");
514		if (prtrealloc)
515			printf(" %d,", blkno);
516#endif
517	}
518#ifdef DEBUG
519	if (prtrealloc) {
520		prtrealloc--;
521		printf("\n");
522	}
523#endif
524	return (0);
525
526fail:
527	if (ssize < len)
528		brelse(ebp);
529	if (sbap != &ip->i_db[0])
530		brelse(sbp);
531	return (ENOSPC);
532#endif
533}
534
535/*
536 * Allocate an inode in the file system.
537 *
538 * If allocating a directory, use ffs_dirpref to select the inode.
539 * If allocating in a directory, the following hierarchy is followed:
540 *   1) allocate the preferred inode.
541 *   2) allocate an inode in the same cylinder group.
542 *   3) quadradically rehash into other cylinder groups, until an
543 *      available inode is located.
544 * If no inode preference is given the following heirarchy is used
545 * to allocate an inode:
546 *   1) allocate an inode in cylinder group 0.
547 *   2) quadradically rehash into other cylinder groups, until an
548 *      available inode is located.
549 */
550int
551ffs_valloc(ap)
552	struct vop_valloc_args /* {
553		struct vnode *a_pvp;
554		int a_mode;
555		struct ucred *a_cred;
556		struct vnode **a_vpp;
557	} */ *ap;
558{
559	register struct vnode *pvp = ap->a_pvp;
560	register struct inode *pip;
561	register struct fs *fs;
562	register struct inode *ip;
563	mode_t mode = ap->a_mode;
564	ino_t ino, ipref;
565	int cg, error;
566
567	*ap->a_vpp = NULL;
568	pip = VTOI(pvp);
569	fs = pip->i_fs;
570	if (fs->fs_cstotal.cs_nifree == 0)
571		goto noinodes;
572
573	if ((mode & IFMT) == IFDIR)
574		ipref = ffs_dirpref(fs);
575	else
576		ipref = pip->i_number;
577	if (ipref >= fs->fs_ncg * fs->fs_ipg)
578		ipref = 0;
579	cg = ino_to_cg(fs, ipref);
580	ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode,
581					(allocfcn_t *)ffs_nodealloccg);
582	if (ino == 0)
583		goto noinodes;
584	error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp);
585	if (error) {
586		VOP_VFREE(pvp, ino, mode);
587		return (error);
588	}
589	ip = VTOI(*ap->a_vpp);
590	if (ip->i_mode) {
591		printf("mode = 0%o, inum = %ld, fs = %s\n",
592		    ip->i_mode, ip->i_number, fs->fs_fsmnt);
593		panic("ffs_valloc: dup alloc");
594	}
595	if (ip->i_blocks) {				/* XXX */
596		printf("free inode %s/%ld had %ld blocks\n",
597		    fs->fs_fsmnt, ino, ip->i_blocks);
598		ip->i_blocks = 0;
599	}
600	ip->i_flags = 0;
601	/*
602	 * Set up a new generation number for this inode.
603	 */
604	if (++nextgennumber < (u_long)time.tv_sec)
605		nextgennumber = time.tv_sec;
606	ip->i_gen = nextgennumber;
607	return (0);
608noinodes:
609	ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
610	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
611	return (ENOSPC);
612}
613
614/*
615 * Find a cylinder to place a directory.
616 *
617 * The policy implemented by this algorithm is to select from
618 * among those cylinder groups with above the average number of
619 * free inodes, the one with the smallest number of directories.
620 */
621static ino_t
622ffs_dirpref(fs)
623	register struct fs *fs;
624{
625	int cg, minndir, mincg, avgifree;
626
627	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
628	minndir = fs->fs_ipg;
629	mincg = 0;
630	for (cg = 0; cg < fs->fs_ncg; cg++)
631		if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
632		    fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
633			mincg = cg;
634			minndir = fs->fs_cs(fs, cg).cs_ndir;
635		}
636	return ((ino_t)(fs->fs_ipg * mincg));
637}
638
639/*
640 * Select the desired position for the next block in a file.  The file is
641 * logically divided into sections. The first section is composed of the
642 * direct blocks. Each additional section contains fs_maxbpg blocks.
643 *
644 * If no blocks have been allocated in the first section, the policy is to
645 * request a block in the same cylinder group as the inode that describes
646 * the file. If no blocks have been allocated in any other section, the
647 * policy is to place the section in a cylinder group with a greater than
648 * average number of free blocks.  An appropriate cylinder group is found
649 * by using a rotor that sweeps the cylinder groups. When a new group of
650 * blocks is needed, the sweep begins in the cylinder group following the
651 * cylinder group from which the previous allocation was made. The sweep
652 * continues until a cylinder group with greater than the average number
653 * of free blocks is found. If the allocation is for the first block in an
654 * indirect block, the information on the previous allocation is unavailable;
655 * here a best guess is made based upon the logical block number being
656 * allocated.
657 *
658 * If a section is already partially allocated, the policy is to
659 * contiguously allocate fs_maxcontig blocks.  The end of one of these
660 * contiguous blocks and the beginning of the next is physically separated
661 * so that the disk head will be in transit between them for at least
662 * fs_rotdelay milliseconds.  This is to allow time for the processor to
663 * schedule another I/O transfer.
664 */
665ufs_daddr_t
666ffs_blkpref(ip, lbn, indx, bap)
667	struct inode *ip;
668	ufs_daddr_t lbn;
669	int indx;
670	ufs_daddr_t *bap;
671{
672	register struct fs *fs;
673	register int cg;
674	int avgbfree, startcg;
675	ufs_daddr_t nextblk;
676
677	fs = ip->i_fs;
678	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
679		if (lbn < NDADDR) {
680			cg = ino_to_cg(fs, ip->i_number);
681			return (fs->fs_fpg * cg + fs->fs_frag);
682		}
683		/*
684		 * Find a cylinder with greater than average number of
685		 * unused data blocks.
686		 */
687		if (indx == 0 || bap[indx - 1] == 0)
688			startcg =
689			    ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
690		else
691			startcg = dtog(fs, bap[indx - 1]) + 1;
692		startcg %= fs->fs_ncg;
693		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
694		for (cg = startcg; cg < fs->fs_ncg; cg++)
695			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
696				fs->fs_cgrotor = cg;
697				return (fs->fs_fpg * cg + fs->fs_frag);
698			}
699		for (cg = 0; cg <= startcg; cg++)
700			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
701				fs->fs_cgrotor = cg;
702				return (fs->fs_fpg * cg + fs->fs_frag);
703			}
704		return (0);
705	}
706	/*
707	 * One or more previous blocks have been laid out. If less
708	 * than fs_maxcontig previous blocks are contiguous, the
709	 * next block is requested contiguously, otherwise it is
710	 * requested rotationally delayed by fs_rotdelay milliseconds.
711	 */
712	nextblk = bap[indx - 1] + fs->fs_frag;
713	if (fs->fs_rotdelay == 0 || indx < fs->fs_maxcontig ||
714	    bap[indx - fs->fs_maxcontig] +
715	    blkstofrags(fs, fs->fs_maxcontig) != nextblk)
716		return (nextblk);
717	/*
718	 * Here we convert ms of delay to frags as:
719	 * (frags) = (ms) * (rev/sec) * (sect/rev) /
720	 *	((sect/frag) * (ms/sec))
721	 * then round up to the next block.
722	 */
723	nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
724	    (NSPF(fs) * 1000), fs->fs_frag);
725	return (nextblk);
726}
727
728/*
729 * Implement the cylinder overflow algorithm.
730 *
731 * The policy implemented by this algorithm is:
732 *   1) allocate the block in its requested cylinder group.
733 *   2) quadradically rehash on the cylinder group number.
734 *   3) brute force search for a free block.
735 */
736/*VARARGS5*/
737static u_long
738ffs_hashalloc(ip, cg, pref, size, allocator)
739	struct inode *ip;
740	int cg;
741	long pref;
742	int size;	/* size for data blocks, mode for inodes */
743	allocfcn_t *allocator;
744{
745	register struct fs *fs;
746	long result;	/* XXX why not same type as we return? */
747	int i, icg = cg;
748
749	fs = ip->i_fs;
750	/*
751	 * 1: preferred cylinder group
752	 */
753	result = (*allocator)(ip, cg, pref, size);
754	if (result)
755		return (result);
756	/*
757	 * 2: quadratic rehash
758	 */
759	for (i = 1; i < fs->fs_ncg; i *= 2) {
760		cg += i;
761		if (cg >= fs->fs_ncg)
762			cg -= fs->fs_ncg;
763		result = (*allocator)(ip, cg, 0, size);
764		if (result)
765			return (result);
766	}
767	/*
768	 * 3: brute force search
769	 * Note that we start at i == 2, since 0 was checked initially,
770	 * and 1 is always checked in the quadratic rehash.
771	 */
772	cg = (icg + 2) % fs->fs_ncg;
773	for (i = 2; i < fs->fs_ncg; i++) {
774		result = (*allocator)(ip, cg, 0, size);
775		if (result)
776			return (result);
777		cg++;
778		if (cg == fs->fs_ncg)
779			cg = 0;
780	}
781	return (0);
782}
783
784/*
785 * Determine whether a fragment can be extended.
786 *
787 * Check to see if the necessary fragments are available, and
788 * if they are, allocate them.
789 */
790static ufs_daddr_t
791ffs_fragextend(ip, cg, bprev, osize, nsize)
792	struct inode *ip;
793	int cg;
794	long bprev;
795	int osize, nsize;
796{
797	register struct fs *fs;
798	register struct cg *cgp;
799	struct buf *bp;
800	long bno;
801	int frags, bbase;
802	int i, error;
803
804	fs = ip->i_fs;
805	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
806		return (0);
807	frags = numfrags(fs, nsize);
808	bbase = fragnum(fs, bprev);
809	if (bbase > fragnum(fs, (bprev + frags - 1))) {
810		/* cannot extend across a block boundary */
811		return (0);
812	}
813	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
814		(int)fs->fs_cgsize, NOCRED, &bp);
815	if (error) {
816		brelse(bp);
817		return (0);
818	}
819	cgp = (struct cg *)bp->b_data;
820	if (!cg_chkmagic(cgp)) {
821		brelse(bp);
822		return (0);
823	}
824	cgp->cg_time = time.tv_sec;
825	bno = dtogd(fs, bprev);
826	for (i = numfrags(fs, osize); i < frags; i++)
827		if (isclr(cg_blksfree(cgp), bno + i)) {
828			brelse(bp);
829			return (0);
830		}
831	/*
832	 * the current fragment can be extended
833	 * deduct the count on fragment being extended into
834	 * increase the count on the remaining fragment (if any)
835	 * allocate the extended piece
836	 */
837	for (i = frags; i < fs->fs_frag - bbase; i++)
838		if (isclr(cg_blksfree(cgp), bno + i))
839			break;
840	cgp->cg_frsum[i - numfrags(fs, osize)]--;
841	if (i != frags)
842		cgp->cg_frsum[i - frags]++;
843	for (i = numfrags(fs, osize); i < frags; i++) {
844		clrbit(cg_blksfree(cgp), bno + i);
845		cgp->cg_cs.cs_nffree--;
846		fs->fs_cstotal.cs_nffree--;
847		fs->fs_cs(fs, cg).cs_nffree--;
848	}
849	fs->fs_fmod = 1;
850	bdwrite(bp);
851	return (bprev);
852}
853
854/*
855 * Determine whether a block can be allocated.
856 *
857 * Check to see if a block of the appropriate size is available,
858 * and if it is, allocate it.
859 */
860static ufs_daddr_t
861ffs_alloccg(ip, cg, bpref, size)
862	struct inode *ip;
863	int cg;
864	ufs_daddr_t bpref;
865	int size;
866{
867	register struct fs *fs;
868	register struct cg *cgp;
869	struct buf *bp;
870	register int i;
871	int error, bno, frags, allocsiz;
872
873	fs = ip->i_fs;
874	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
875		return (0);
876	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
877		(int)fs->fs_cgsize, NOCRED, &bp);
878	if (error) {
879		brelse(bp);
880		return (0);
881	}
882	cgp = (struct cg *)bp->b_data;
883	if (!cg_chkmagic(cgp) ||
884	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
885		brelse(bp);
886		return (0);
887	}
888	cgp->cg_time = time.tv_sec;
889	if (size == fs->fs_bsize) {
890		bno = ffs_alloccgblk(fs, cgp, bpref);
891		bdwrite(bp);
892		return (bno);
893	}
894	/*
895	 * check to see if any fragments are already available
896	 * allocsiz is the size which will be allocated, hacking
897	 * it down to a smaller size if necessary
898	 */
899	frags = numfrags(fs, size);
900	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
901		if (cgp->cg_frsum[allocsiz] != 0)
902			break;
903	if (allocsiz == fs->fs_frag) {
904		/*
905		 * no fragments were available, so a block will be
906		 * allocated, and hacked up
907		 */
908		if (cgp->cg_cs.cs_nbfree == 0) {
909			brelse(bp);
910			return (0);
911		}
912		bno = ffs_alloccgblk(fs, cgp, bpref);
913		bpref = dtogd(fs, bno);
914		for (i = frags; i < fs->fs_frag; i++)
915			setbit(cg_blksfree(cgp), bpref + i);
916		i = fs->fs_frag - frags;
917		cgp->cg_cs.cs_nffree += i;
918		fs->fs_cstotal.cs_nffree += i;
919		fs->fs_cs(fs, cg).cs_nffree += i;
920		fs->fs_fmod = 1;
921		cgp->cg_frsum[i]++;
922		bdwrite(bp);
923		return (bno);
924	}
925	bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
926	if (bno < 0) {
927		brelse(bp);
928		return (0);
929	}
930	for (i = 0; i < frags; i++)
931		clrbit(cg_blksfree(cgp), bno + i);
932	cgp->cg_cs.cs_nffree -= frags;
933	fs->fs_cstotal.cs_nffree -= frags;
934	fs->fs_cs(fs, cg).cs_nffree -= frags;
935	fs->fs_fmod = 1;
936	cgp->cg_frsum[allocsiz]--;
937	if (frags != allocsiz)
938		cgp->cg_frsum[allocsiz - frags]++;
939	bdwrite(bp);
940	return (cg * fs->fs_fpg + bno);
941}
942
943/*
944 * Allocate a block in a cylinder group.
945 *
946 * This algorithm implements the following policy:
947 *   1) allocate the requested block.
948 *   2) allocate a rotationally optimal block in the same cylinder.
949 *   3) allocate the next available block on the block rotor for the
950 *      specified cylinder group.
951 * Note that this routine only allocates fs_bsize blocks; these
952 * blocks may be fragmented by the routine that allocates them.
953 */
954static ufs_daddr_t
955ffs_alloccgblk(fs, cgp, bpref)
956	register struct fs *fs;
957	register struct cg *cgp;
958	ufs_daddr_t bpref;
959{
960	ufs_daddr_t bno, blkno;
961	int cylno, pos, delta;
962	short *cylbp;
963	register int i;
964
965	if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
966		bpref = cgp->cg_rotor;
967		goto norot;
968	}
969	bpref = blknum(fs, bpref);
970	bpref = dtogd(fs, bpref);
971	/*
972	 * if the requested block is available, use it
973	 */
974	if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
975		bno = bpref;
976		goto gotit;
977	}
978	if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) {
979		/*
980		 * Block layout information is not available.
981		 * Leaving bpref unchanged means we take the
982		 * next available free block following the one
983		 * we just allocated. Hopefully this will at
984		 * least hit a track cache on drives of unknown
985		 * geometry (e.g. SCSI).
986		 */
987		goto norot;
988	}
989	/*
990	 * check for a block available on the same cylinder
991	 */
992	cylno = cbtocylno(fs, bpref);
993	if (cg_blktot(cgp)[cylno] == 0)
994		goto norot;
995	/*
996	 * check the summary information to see if a block is
997	 * available in the requested cylinder starting at the
998	 * requested rotational position and proceeding around.
999	 */
1000	cylbp = cg_blks(fs, cgp, cylno);
1001	pos = cbtorpos(fs, bpref);
1002	for (i = pos; i < fs->fs_nrpos; i++)
1003		if (cylbp[i] > 0)
1004			break;
1005	if (i == fs->fs_nrpos)
1006		for (i = 0; i < pos; i++)
1007			if (cylbp[i] > 0)
1008				break;
1009	if (cylbp[i] > 0) {
1010		/*
1011		 * found a rotational position, now find the actual
1012		 * block. A panic if none is actually there.
1013		 */
1014		pos = cylno % fs->fs_cpc;
1015		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
1016		if (fs_postbl(fs, pos)[i] == -1) {
1017			printf("pos = %d, i = %d, fs = %s\n",
1018			    pos, i, fs->fs_fsmnt);
1019			panic("ffs_alloccgblk: cyl groups corrupted");
1020		}
1021		for (i = fs_postbl(fs, pos)[i];; ) {
1022			if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) {
1023				bno = blkstofrags(fs, (bno + i));
1024				goto gotit;
1025			}
1026			delta = fs_rotbl(fs)[i];
1027			if (delta <= 0 ||
1028			    delta + i > fragstoblks(fs, fs->fs_fpg))
1029				break;
1030			i += delta;
1031		}
1032		printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
1033		panic("ffs_alloccgblk: can't find blk in cyl");
1034	}
1035norot:
1036	/*
1037	 * no blocks in the requested cylinder, so take next
1038	 * available one in this cylinder group.
1039	 */
1040	bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
1041	if (bno < 0)
1042		return (0);
1043	cgp->cg_rotor = bno;
1044gotit:
1045	blkno = fragstoblks(fs, bno);
1046	ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno);
1047	ffs_clusteracct(fs, cgp, blkno, -1);
1048	cgp->cg_cs.cs_nbfree--;
1049	fs->fs_cstotal.cs_nbfree--;
1050	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
1051	cylno = cbtocylno(fs, bno);
1052	cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
1053	cg_blktot(cgp)[cylno]--;
1054	fs->fs_fmod = 1;
1055	return (cgp->cg_cgx * fs->fs_fpg + bno);
1056}
1057
1058#ifdef notyet
1059/*
1060 * Determine whether a cluster can be allocated.
1061 *
1062 * We do not currently check for optimal rotational layout if there
1063 * are multiple choices in the same cylinder group. Instead we just
1064 * take the first one that we find following bpref.
1065 */
1066static ufs_daddr_t
1067ffs_clusteralloc(ip, cg, bpref, len)
1068	struct inode *ip;
1069	int cg;
1070	ufs_daddr_t bpref;
1071	int len;
1072{
1073	register struct fs *fs;
1074	register struct cg *cgp;
1075	struct buf *bp;
1076	int i, got, run, bno, bit, map;
1077	u_char *mapp;
1078	int32_t *lp;
1079
1080	fs = ip->i_fs;
1081	if (fs->fs_maxcluster[cg] < len)
1082		return (NULL);
1083	if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1084	    NOCRED, &bp))
1085		goto fail;
1086	cgp = (struct cg *)bp->b_data;
1087	if (!cg_chkmagic(cgp))
1088		goto fail;
1089	/*
1090	 * Check to see if a cluster of the needed size (or bigger) is
1091	 * available in this cylinder group.
1092	 */
1093	lp = &cg_clustersum(cgp)[len];
1094	for (i = len; i <= fs->fs_contigsumsize; i++)
1095		if (*lp++ > 0)
1096			break;
1097	if (i > fs->fs_contigsumsize) {
1098		/*
1099		 * This is the first time looking for a cluster in this
1100		 * cylinder group. Update the cluster summary information
1101		 * to reflect the true maximum sized cluster so that
1102		 * future cluster allocation requests can avoid reading
1103		 * the cylinder group map only to find no clusters.
1104		 */
1105		lp = &cg_clustersum(cgp)[len - 1];
1106		for (i = len - 1; i > 0; i--)
1107			if (*lp-- > 0)
1108				break;
1109		fs->fs_maxcluster[cg] = i;
1110		goto fail;
1111	}
1112	/*
1113	 * Search the cluster map to find a big enough cluster.
1114	 * We take the first one that we find, even if it is larger
1115	 * than we need as we prefer to get one close to the previous
1116	 * block allocation. We do not search before the current
1117	 * preference point as we do not want to allocate a block
1118	 * that is allocated before the previous one (as we will
1119	 * then have to wait for another pass of the elevator
1120	 * algorithm before it will be read). We prefer to fail and
1121	 * be recalled to try an allocation in the next cylinder group.
1122	 */
1123	if (dtog(fs, bpref) != cg)
1124		bpref = 0;
1125	else
1126		bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1127	mapp = &cg_clustersfree(cgp)[bpref / NBBY];
1128	map = *mapp++;
1129	bit = 1 << (bpref % NBBY);
1130	for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
1131		if ((map & bit) == 0) {
1132			run = 0;
1133		} else {
1134			run++;
1135			if (run == len)
1136				break;
1137		}
1138		if ((got & (NBBY - 1)) != (NBBY - 1)) {
1139			bit <<= 1;
1140		} else {
1141			map = *mapp++;
1142			bit = 1;
1143		}
1144	}
1145	if (got == cgp->cg_nclusterblks)
1146		goto fail;
1147	/*
1148	 * Allocate the cluster that we have found.
1149	 */
1150	for (i = 1; i <= len; i++)
1151		if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i))
1152			panic("ffs_clusteralloc: map mismatch");
1153	bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
1154	if (dtog(fs, bno) != cg)
1155		panic("ffs_clusteralloc: allocated out of group");
1156	len = blkstofrags(fs, len);
1157	for (i = 0; i < len; i += fs->fs_frag)
1158		if ((got = ffs_alloccgblk(fs, cgp, bno + i)) != bno + i)
1159			panic("ffs_clusteralloc: lost block");
1160	bdwrite(bp);
1161	return (bno);
1162
1163fail:
1164	brelse(bp);
1165	return (0);
1166}
1167#endif
1168
1169/*
1170 * Determine whether an inode can be allocated.
1171 *
1172 * Check to see if an inode is available, and if it is,
1173 * allocate it using the following policy:
1174 *   1) allocate the requested inode.
1175 *   2) allocate the next available inode after the requested
1176 *      inode in the specified cylinder group.
1177 */
1178static ino_t
1179ffs_nodealloccg(ip, cg, ipref, mode)
1180	struct inode *ip;
1181	int cg;
1182	ufs_daddr_t ipref;
1183	int mode;
1184{
1185	register struct fs *fs;
1186	register struct cg *cgp;
1187	struct buf *bp;
1188	int error, start, len, loc, map, i;
1189
1190	fs = ip->i_fs;
1191	if (fs->fs_cs(fs, cg).cs_nifree == 0)
1192		return (0);
1193	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1194		(int)fs->fs_cgsize, NOCRED, &bp);
1195	if (error) {
1196		brelse(bp);
1197		return (0);
1198	}
1199	cgp = (struct cg *)bp->b_data;
1200	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
1201		brelse(bp);
1202		return (0);
1203	}
1204	cgp->cg_time = time.tv_sec;
1205	if (ipref) {
1206		ipref %= fs->fs_ipg;
1207		if (isclr(cg_inosused(cgp), ipref))
1208			goto gotit;
1209	}
1210	start = cgp->cg_irotor / NBBY;
1211	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
1212	loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
1213	if (loc == 0) {
1214		len = start + 1;
1215		start = 0;
1216		loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
1217		if (loc == 0) {
1218			printf("cg = %d, irotor = %ld, fs = %s\n",
1219			    cg, cgp->cg_irotor, fs->fs_fsmnt);
1220			panic("ffs_nodealloccg: map corrupted");
1221			/* NOTREACHED */
1222		}
1223	}
1224	i = start + len - loc;
1225	map = cg_inosused(cgp)[i];
1226	ipref = i * NBBY;
1227	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1228		if ((map & i) == 0) {
1229			cgp->cg_irotor = ipref;
1230			goto gotit;
1231		}
1232	}
1233	printf("fs = %s\n", fs->fs_fsmnt);
1234	panic("ffs_nodealloccg: block not in map");
1235	/* NOTREACHED */
1236gotit:
1237	setbit(cg_inosused(cgp), ipref);
1238	cgp->cg_cs.cs_nifree--;
1239	fs->fs_cstotal.cs_nifree--;
1240	fs->fs_cs(fs, cg).cs_nifree--;
1241	fs->fs_fmod = 1;
1242	if ((mode & IFMT) == IFDIR) {
1243		cgp->cg_cs.cs_ndir++;
1244		fs->fs_cstotal.cs_ndir++;
1245		fs->fs_cs(fs, cg).cs_ndir++;
1246	}
1247	bdwrite(bp);
1248	return (cg * fs->fs_ipg + ipref);
1249}
1250
1251/*
1252 * Free a block or fragment.
1253 *
1254 * The specified block or fragment is placed back in the
1255 * free map. If a fragment is deallocated, a possible
1256 * block reassembly is checked.
1257 */
1258void
1259ffs_blkfree(ip, bno, size)
1260	register struct inode *ip;
1261	ufs_daddr_t bno;
1262	long size;
1263{
1264	register struct fs *fs;
1265	register struct cg *cgp;
1266	struct buf *bp;
1267	ufs_daddr_t blkno;
1268	int i, error, cg, blk, frags, bbase;
1269
1270	fs = ip->i_fs;
1271	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1272		printf("dev = 0x%lx, bsize = %ld, size = %ld, fs = %s\n",
1273		    (u_long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
1274		panic("ffs_blkfree: bad size");
1275	}
1276	cg = dtog(fs, bno);
1277	if ((u_int)bno >= fs->fs_size) {
1278		printf("bad block %ld, ino %ld\n", bno, ip->i_number);
1279		ffs_fserr(fs, ip->i_uid, "bad block");
1280		return;
1281	}
1282	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1283		(int)fs->fs_cgsize, NOCRED, &bp);
1284	if (error) {
1285		brelse(bp);
1286		return;
1287	}
1288	cgp = (struct cg *)bp->b_data;
1289	if (!cg_chkmagic(cgp)) {
1290		brelse(bp);
1291		return;
1292	}
1293	cgp->cg_time = time.tv_sec;
1294	bno = dtogd(fs, bno);
1295	if (size == fs->fs_bsize) {
1296		blkno = fragstoblks(fs, bno);
1297		if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
1298			printf("dev = 0x%lx, block = %ld, fs = %s\n",
1299			    (u_long) ip->i_dev, bno, fs->fs_fsmnt);
1300			panic("ffs_blkfree: freeing free block");
1301		}
1302		ffs_setblock(fs, cg_blksfree(cgp), blkno);
1303		ffs_clusteracct(fs, cgp, blkno, 1);
1304		cgp->cg_cs.cs_nbfree++;
1305		fs->fs_cstotal.cs_nbfree++;
1306		fs->fs_cs(fs, cg).cs_nbfree++;
1307		i = cbtocylno(fs, bno);
1308		cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
1309		cg_blktot(cgp)[i]++;
1310	} else {
1311		bbase = bno - fragnum(fs, bno);
1312		/*
1313		 * decrement the counts associated with the old frags
1314		 */
1315		blk = blkmap(fs, cg_blksfree(cgp), bbase);
1316		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
1317		/*
1318		 * deallocate the fragment
1319		 */
1320		frags = numfrags(fs, size);
1321		for (i = 0; i < frags; i++) {
1322			if (isset(cg_blksfree(cgp), bno + i)) {
1323				printf("dev = 0x%lx, block = %ld, fs = %s\n",
1324				    (u_long) ip->i_dev, bno + i, fs->fs_fsmnt);
1325				panic("ffs_blkfree: freeing free frag");
1326			}
1327			setbit(cg_blksfree(cgp), bno + i);
1328		}
1329		cgp->cg_cs.cs_nffree += i;
1330		fs->fs_cstotal.cs_nffree += i;
1331		fs->fs_cs(fs, cg).cs_nffree += i;
1332		/*
1333		 * add back in counts associated with the new frags
1334		 */
1335		blk = blkmap(fs, cg_blksfree(cgp), bbase);
1336		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
1337		/*
1338		 * if a complete block has been reassembled, account for it
1339		 */
1340		blkno = fragstoblks(fs, bbase);
1341		if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
1342			cgp->cg_cs.cs_nffree -= fs->fs_frag;
1343			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1344			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1345			ffs_clusteracct(fs, cgp, blkno, 1);
1346			cgp->cg_cs.cs_nbfree++;
1347			fs->fs_cstotal.cs_nbfree++;
1348			fs->fs_cs(fs, cg).cs_nbfree++;
1349			i = cbtocylno(fs, bbase);
1350			cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
1351			cg_blktot(cgp)[i]++;
1352		}
1353	}
1354	fs->fs_fmod = 1;
1355	bdwrite(bp);
1356}
1357
1358#ifdef DIAGNOSTIC
1359/*
1360 * Verify allocation of a block or fragment. Returns true if block or
1361 * fragment is allocated, false if it is free.
1362 */
1363int
1364ffs_checkblk(ip, bno, size)
1365	struct inode *ip;
1366	ufs_daddr_t bno;
1367	long size;
1368{
1369	struct fs *fs;
1370	struct cg *cgp;
1371	struct buf *bp;
1372	int i, error, frags, free;
1373
1374	fs = ip->i_fs;
1375	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1376		printf("bsize = %d, size = %d, fs = %s\n",
1377		    fs->fs_bsize, size, fs->fs_fsmnt);
1378		panic("ffs_checkblk: bad size");
1379	}
1380	if ((u_int)bno >= fs->fs_size)
1381		panic("ffs_checkblk: bad block %d", bno);
1382	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1383		(int)fs->fs_cgsize, NOCRED, &bp);
1384	if (error)
1385		panic("ffs_checkblk: cg bread failed");
1386	cgp = (struct cg *)bp->b_data;
1387	if (!cg_chkmagic(cgp))
1388		panic("ffs_checkblk: cg magic mismatch");
1389	bno = dtogd(fs, bno);
1390	if (size == fs->fs_bsize) {
1391		free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
1392	} else {
1393		frags = numfrags(fs, size);
1394		for (free = 0, i = 0; i < frags; i++)
1395			if (isset(cg_blksfree(cgp), bno + i))
1396				free++;
1397		if (free != 0 && free != frags)
1398			panic("ffs_checkblk: partially free fragment");
1399	}
1400	brelse(bp);
1401	return (!free);
1402}
1403#endif /* DIAGNOSTIC */
1404
1405/*
1406 * Free an inode.
1407 *
1408 * The specified inode is placed back in the free map.
1409 */
1410int
1411ffs_vfree(ap)
1412	struct vop_vfree_args /* {
1413		struct vnode *a_pvp;
1414		ino_t a_ino;
1415		int a_mode;
1416	} */ *ap;
1417{
1418	register struct fs *fs;
1419	register struct cg *cgp;
1420	register struct inode *pip;
1421	ino_t ino = ap->a_ino;
1422	struct buf *bp;
1423	int error, cg;
1424
1425	pip = VTOI(ap->a_pvp);
1426	fs = pip->i_fs;
1427	if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1428		panic("ffs_vfree: range: dev = 0x%x, ino = %d, fs = %s",
1429		    pip->i_dev, ino, fs->fs_fsmnt);
1430	cg = ino_to_cg(fs, ino);
1431	error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1432		(int)fs->fs_cgsize, NOCRED, &bp);
1433	if (error) {
1434		brelse(bp);
1435		return (0);
1436	}
1437	cgp = (struct cg *)bp->b_data;
1438	if (!cg_chkmagic(cgp)) {
1439		brelse(bp);
1440		return (0);
1441	}
1442	cgp->cg_time = time.tv_sec;
1443	ino %= fs->fs_ipg;
1444	if (isclr(cg_inosused(cgp), ino)) {
1445		printf("dev = 0x%lx, ino = %ld, fs = %s\n",
1446		    (u_long)pip->i_dev, ino, fs->fs_fsmnt);
1447		if (fs->fs_ronly == 0)
1448			panic("ffs_vfree: freeing free inode");
1449	}
1450	clrbit(cg_inosused(cgp), ino);
1451	if (ino < cgp->cg_irotor)
1452		cgp->cg_irotor = ino;
1453	cgp->cg_cs.cs_nifree++;
1454	fs->fs_cstotal.cs_nifree++;
1455	fs->fs_cs(fs, cg).cs_nifree++;
1456	if ((ap->a_mode & IFMT) == IFDIR) {
1457		cgp->cg_cs.cs_ndir--;
1458		fs->fs_cstotal.cs_ndir--;
1459		fs->fs_cs(fs, cg).cs_ndir--;
1460	}
1461	fs->fs_fmod = 1;
1462	bdwrite(bp);
1463	return (0);
1464}
1465
1466/*
1467 * Find a block of the specified size in the specified cylinder group.
1468 *
1469 * It is a panic if a request is made to find a block if none are
1470 * available.
1471 */
1472static ufs_daddr_t
1473ffs_mapsearch(fs, cgp, bpref, allocsiz)
1474	register struct fs *fs;
1475	register struct cg *cgp;
1476	ufs_daddr_t bpref;
1477	int allocsiz;
1478{
1479	ufs_daddr_t bno;
1480	int start, len, loc, i;
1481	int blk, field, subfield, pos;
1482
1483	/*
1484	 * find the fragment by searching through the free block
1485	 * map for an appropriate bit pattern
1486	 */
1487	if (bpref)
1488		start = dtogd(fs, bpref) / NBBY;
1489	else
1490		start = cgp->cg_frotor / NBBY;
1491	len = howmany(fs->fs_fpg, NBBY) - start;
1492	loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start],
1493		(u_char *)fragtbl[fs->fs_frag],
1494		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1495	if (loc == 0) {
1496		len = start + 1;
1497		start = 0;
1498		loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0],
1499			(u_char *)fragtbl[fs->fs_frag],
1500			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1501		if (loc == 0) {
1502			printf("start = %d, len = %d, fs = %s\n",
1503			    start, len, fs->fs_fsmnt);
1504			panic("ffs_alloccg: map corrupted");
1505			/* NOTREACHED */
1506		}
1507	}
1508	bno = (start + len - loc) * NBBY;
1509	cgp->cg_frotor = bno;
1510	/*
1511	 * found the byte in the map
1512	 * sift through the bits to find the selected frag
1513	 */
1514	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1515		blk = blkmap(fs, cg_blksfree(cgp), bno);
1516		blk <<= 1;
1517		field = around[allocsiz];
1518		subfield = inside[allocsiz];
1519		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1520			if ((blk & field) == subfield)
1521				return (bno + pos);
1522			field <<= 1;
1523			subfield <<= 1;
1524		}
1525	}
1526	printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt);
1527	panic("ffs_alloccg: block not in map");
1528	return (-1);
1529}
1530
1531/*
1532 * Update the cluster map because of an allocation or free.
1533 *
1534 * Cnt == 1 means free; cnt == -1 means allocating.
1535 */
1536static void
1537ffs_clusteracct(fs, cgp, blkno, cnt)
1538	struct fs *fs;
1539	struct cg *cgp;
1540	ufs_daddr_t blkno;
1541	int cnt;
1542{
1543	int32_t *sump;
1544	int32_t *lp;
1545	u_char *freemapp, *mapp;
1546	int i, start, end, forw, back, map, bit;
1547
1548	if (fs->fs_contigsumsize <= 0)
1549		return;
1550	freemapp = cg_clustersfree(cgp);
1551	sump = cg_clustersum(cgp);
1552	/*
1553	 * Allocate or clear the actual block.
1554	 */
1555	if (cnt > 0)
1556		setbit(freemapp, blkno);
1557	else
1558		clrbit(freemapp, blkno);
1559	/*
1560	 * Find the size of the cluster going forward.
1561	 */
1562	start = blkno + 1;
1563	end = start + fs->fs_contigsumsize;
1564	if (end >= cgp->cg_nclusterblks)
1565		end = cgp->cg_nclusterblks;
1566	mapp = &freemapp[start / NBBY];
1567	map = *mapp++;
1568	bit = 1 << (start % NBBY);
1569	for (i = start; i < end; i++) {
1570		if ((map & bit) == 0)
1571			break;
1572		if ((i & (NBBY - 1)) != (NBBY - 1)) {
1573			bit <<= 1;
1574		} else {
1575			map = *mapp++;
1576			bit = 1;
1577		}
1578	}
1579	forw = i - start;
1580	/*
1581	 * Find the size of the cluster going backward.
1582	 */
1583	start = blkno - 1;
1584	end = start - fs->fs_contigsumsize;
1585	if (end < 0)
1586		end = -1;
1587	mapp = &freemapp[start / NBBY];
1588	map = *mapp--;
1589	bit = 1 << (start % NBBY);
1590	for (i = start; i > end; i--) {
1591		if ((map & bit) == 0)
1592			break;
1593		if ((i & (NBBY - 1)) != 0) {
1594			bit >>= 1;
1595		} else {
1596			map = *mapp--;
1597			bit = 1 << (NBBY - 1);
1598		}
1599	}
1600	back = start - i;
1601	/*
1602	 * Account for old cluster and the possibly new forward and
1603	 * back clusters.
1604	 */
1605	i = back + forw + 1;
1606	if (i > fs->fs_contigsumsize)
1607		i = fs->fs_contigsumsize;
1608	sump[i] += cnt;
1609	if (back > 0)
1610		sump[back] -= cnt;
1611	if (forw > 0)
1612		sump[forw] -= cnt;
1613	/*
1614	 * Update cluster summary information.
1615	 */
1616	lp = &sump[fs->fs_contigsumsize];
1617	for (i = fs->fs_contigsumsize; i > 0; i--)
1618		if (*lp-- > 0)
1619			break;
1620	fs->fs_maxcluster[cgp->cg_cgx] = i;
1621}
1622
1623/*
1624 * Fserr prints the name of a file system with an error diagnostic.
1625 *
1626 * The form of the error message is:
1627 *	fs: error message
1628 */
1629static void
1630ffs_fserr(fs, uid, cp)
1631	struct fs *fs;
1632	u_int uid;
1633	char *cp;
1634{
1635	struct proc *p = curproc;	/* XXX */
1636
1637	log(LOG_ERR, "pid %d (%s), uid %d on %s: %s\n", p ? p->p_pid : -1,
1638			p ? p->p_comm : "-", uid, fs->fs_fsmnt, cp);
1639}
1640