ffs_balloc.c revision 98542
1/*
2 * Copyright (c) 2002 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Copyright (c) 1982, 1989, 1993
12 *	The Regents of the University of California.  All rights reserved.
13 * (c) UNIX System Laboratories, Inc.
14 * Copyright (c) 1982, 1986, 1989, 1993
15 *	The Regents of the University of California.  All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 *    notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 *    notice, this list of conditions and the following disclaimer in the
24 *    documentation and/or other materials provided with the distribution.
25 * 3. All advertising materials mentioning features or use of this software
26 *    must display the following acknowledgement:
27 *	This product includes software developed by the University of
28 *	California, Berkeley and its contributors.
29 * 4. Neither the name of the University nor the names of its contributors
30 *    may be used to endorse or promote products derived from this software
31 *    without specific prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
34 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
37 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 * SUCH DAMAGE.
44 *
45 *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
46 * $FreeBSD: head/sys/ufs/ffs/ffs_balloc.c 98542 2002-06-21 06:18:05Z mckusick $
47 */
48
49#include <sys/param.h>
50#include <sys/systm.h>
51#include <sys/bio.h>
52#include <sys/buf.h>
53#include <sys/lock.h>
54#include <sys/mount.h>
55#include <sys/vnode.h>
56
57#include <ufs/ufs/quota.h>
58#include <ufs/ufs/inode.h>
59#include <ufs/ufs/ufs_extern.h>
60
61#include <ufs/ffs/fs.h>
62#include <ufs/ffs/ffs_extern.h>
63
64/*
65 * Balloc defines the structure of filesystem storage
66 * by allocating the physical blocks on a device given
67 * the inode and the logical block number in a file.
68 * This is the allocation strategy for UFS1. Below is
69 * the allocation strategy for UFS2.
70 */
71int
72ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
73    struct ucred *cred, int flags, struct buf **bpp)
74{
75	struct inode *ip;
76	ufs_lbn_t lbn, lastlbn;
77	struct fs *fs;
78	ufs1_daddr_t nb;
79	struct buf *bp, *nbp;
80	struct indir indirs[NIADDR + 2];
81	int deallocated, osize, nsize, num, i, error;
82	ufs2_daddr_t newb;
83	ufs1_daddr_t *bap, pref;
84	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
85	int unwindidx = -1;
86	struct thread *td = curthread;	/* XXX */
87
88	ip = VTOI(vp);
89	fs = ip->i_fs;
90	lbn = lblkno(fs, startoffset);
91	size = blkoff(fs, startoffset) + size;
92	if (size > fs->fs_bsize)
93		panic("ffs_balloc_ufs1: blk too big");
94	*bpp = NULL;
95	if (lbn < 0)
96		return (EFBIG);
97
98	/*
99	 * If the next write will extend the file into a new block,
100	 * and the file is currently composed of a fragment
101	 * this fragment has to be extended to be a full block.
102	 */
103	lastlbn = lblkno(fs, ip->i_size);
104	if (lastlbn < NDADDR && lastlbn < lbn) {
105		nb = lastlbn;
106		osize = blksize(fs, ip, nb);
107		if (osize < fs->fs_bsize && osize > 0) {
108			error = ffs_realloccg(ip, nb,
109				ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
110				    &ip->i_din1->di_db[0]),
111				osize, (int)fs->fs_bsize, cred, &bp);
112			if (error)
113				return (error);
114			if (DOINGSOFTDEP(vp))
115				softdep_setup_allocdirect(ip, nb,
116				    dbtofsb(fs, bp->b_blkno),
117				    ip->i_din1->di_db[nb],
118				    fs->fs_bsize, osize, bp);
119			ip->i_size = smalllblktosize(fs, nb + 1);
120			ip->i_din1->di_size = ip->i_size;
121			ip->i_din1->di_db[nb] = dbtofsb(fs, bp->b_blkno);
122			ip->i_flag |= IN_CHANGE | IN_UPDATE;
123			if (flags & B_SYNC)
124				bwrite(bp);
125			else
126				bawrite(bp);
127		}
128	}
129	/*
130	 * The first NDADDR blocks are direct blocks
131	 */
132	if (lbn < NDADDR) {
133		if (flags & B_METAONLY)
134			panic("ffs_balloc_ufs1: B_METAONLY for direct block");
135		nb = ip->i_din1->di_db[lbn];
136		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
137			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
138			if (error) {
139				brelse(bp);
140				return (error);
141			}
142			bp->b_blkno = fsbtodb(fs, nb);
143			*bpp = bp;
144			return (0);
145		}
146		if (nb != 0) {
147			/*
148			 * Consider need to reallocate a fragment.
149			 */
150			osize = fragroundup(fs, blkoff(fs, ip->i_size));
151			nsize = fragroundup(fs, size);
152			if (nsize <= osize) {
153				error = bread(vp, lbn, osize, NOCRED, &bp);
154				if (error) {
155					brelse(bp);
156					return (error);
157				}
158				bp->b_blkno = fsbtodb(fs, nb);
159			} else {
160				error = ffs_realloccg(ip, lbn,
161				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
162					&ip->i_din1->di_db[0]),
163				    osize, nsize, cred, &bp);
164				if (error)
165					return (error);
166				if (DOINGSOFTDEP(vp))
167					softdep_setup_allocdirect(ip, lbn,
168					    dbtofsb(fs, bp->b_blkno), nb,
169					    nsize, osize, bp);
170			}
171		} else {
172			if (ip->i_size < smalllblktosize(fs, lbn + 1))
173				nsize = fragroundup(fs, size);
174			else
175				nsize = fs->fs_bsize;
176			error = ffs_alloc(ip, lbn,
177			    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
178				&ip->i_din1->di_db[0]),
179			    nsize, cred, &newb);
180			if (error)
181				return (error);
182			bp = getblk(vp, lbn, nsize, 0, 0);
183			bp->b_blkno = fsbtodb(fs, newb);
184			if (flags & B_CLRBUF)
185				vfs_bio_clrbuf(bp);
186			if (DOINGSOFTDEP(vp))
187				softdep_setup_allocdirect(ip, lbn, newb, 0,
188				    nsize, 0, bp);
189		}
190		ip->i_din1->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
191		ip->i_flag |= IN_CHANGE | IN_UPDATE;
192		*bpp = bp;
193		return (0);
194	}
195	/*
196	 * Determine the number of levels of indirection.
197	 */
198	pref = 0;
199	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
200		return(error);
201#ifdef DIAGNOSTIC
202	if (num < 1)
203		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
204#endif
205	/*
206	 * Fetch the first indirect block allocating if necessary.
207	 */
208	--num;
209	nb = ip->i_din1->di_ib[indirs[0].in_off];
210	allocib = NULL;
211	allocblk = allociblk;
212	if (nb == 0) {
213		pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
214	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
215		    cred, &newb)) != 0)
216			return (error);
217		nb = newb;
218		*allocblk++ = nb;
219		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
220		bp->b_blkno = fsbtodb(fs, nb);
221		vfs_bio_clrbuf(bp);
222		if (DOINGSOFTDEP(vp)) {
223			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
224			    newb, 0, fs->fs_bsize, 0, bp);
225			bdwrite(bp);
226		} else {
227			/*
228			 * Write synchronously so that indirect blocks
229			 * never point at garbage.
230			 */
231			if (DOINGASYNC(vp))
232				bdwrite(bp);
233			else if ((error = bwrite(bp)) != 0)
234				goto fail;
235		}
236		allocib = &ip->i_din1->di_ib[indirs[0].in_off];
237		*allocib = nb;
238		ip->i_flag |= IN_CHANGE | IN_UPDATE;
239	}
240	/*
241	 * Fetch through the indirect blocks, allocating as necessary.
242	 */
243	for (i = 1;;) {
244		error = bread(vp,
245		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
246		if (error) {
247			brelse(bp);
248			goto fail;
249		}
250		bap = (ufs1_daddr_t *)bp->b_data;
251		nb = bap[indirs[i].in_off];
252		if (i == num)
253			break;
254		i += 1;
255		if (nb != 0) {
256			bqrelse(bp);
257			continue;
258		}
259		if (pref == 0)
260			pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
261		if ((error =
262		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
263			brelse(bp);
264			goto fail;
265		}
266		nb = newb;
267		*allocblk++ = nb;
268		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
269		nbp->b_blkno = fsbtodb(fs, nb);
270		vfs_bio_clrbuf(nbp);
271		if (DOINGSOFTDEP(vp)) {
272			softdep_setup_allocindir_meta(nbp, ip, bp,
273			    indirs[i - 1].in_off, nb);
274			bdwrite(nbp);
275		} else {
276			/*
277			 * Write synchronously so that indirect blocks
278			 * never point at garbage.
279			 */
280			if ((error = bwrite(nbp)) != 0) {
281				brelse(bp);
282				goto fail;
283			}
284		}
285		bap[indirs[i - 1].in_off] = nb;
286		if (allocib == NULL && unwindidx < 0)
287			unwindidx = i - 1;
288		/*
289		 * If required, write synchronously, otherwise use
290		 * delayed write.
291		 */
292		if (flags & B_SYNC) {
293			bwrite(bp);
294		} else {
295			if (bp->b_bufsize == fs->fs_bsize)
296				bp->b_flags |= B_CLUSTEROK;
297			bdwrite(bp);
298		}
299	}
300	/*
301	 * If asked only for the indirect block, then return it.
302	 */
303	if (flags & B_METAONLY) {
304		*bpp = bp;
305		return (0);
306	}
307	/*
308	 * Get the data block, allocating if necessary.
309	 */
310	if (nb == 0) {
311		pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
312		error = ffs_alloc(ip,
313		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
314		if (error) {
315			brelse(bp);
316			goto fail;
317		}
318		nb = newb;
319		*allocblk++ = nb;
320		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
321		nbp->b_blkno = fsbtodb(fs, nb);
322		if (flags & B_CLRBUF)
323			vfs_bio_clrbuf(nbp);
324		if (DOINGSOFTDEP(vp))
325			softdep_setup_allocindir_page(ip, lbn, bp,
326			    indirs[i].in_off, nb, 0, nbp);
327		bap[indirs[i].in_off] = nb;
328		/*
329		 * If required, write synchronously, otherwise use
330		 * delayed write.
331		 */
332		if (flags & B_SYNC) {
333			bwrite(bp);
334		} else {
335			if (bp->b_bufsize == fs->fs_bsize)
336				bp->b_flags |= B_CLUSTEROK;
337			bdwrite(bp);
338		}
339		*bpp = nbp;
340		return (0);
341	}
342	brelse(bp);
343	if (flags & B_CLRBUF) {
344		error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
345		if (error) {
346			brelse(nbp);
347			goto fail;
348		}
349	} else {
350		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
351		nbp->b_blkno = fsbtodb(fs, nb);
352	}
353	*bpp = nbp;
354	return (0);
355fail:
356	/*
357	 * If we have failed part way through block allocation, we
358	 * have to deallocate any indirect blocks that we have allocated.
359	 * We have to fsync the file before we start to get rid of all
360	 * of its dependencies so that we do not leave them dangling.
361	 * We have to sync it at the end so that the soft updates code
362	 * does not find any untracked changes. Although this is really
363	 * slow, running out of disk space is not expected to be a common
364	 * occurence. The error return from fsync is ignored as we already
365	 * have an error to return to the user.
366	 */
367	(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
368	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
369		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
370		deallocated += fs->fs_bsize;
371	}
372	if (allocib != NULL) {
373		*allocib = 0;
374	} else if (unwindidx >= 0) {
375		int r;
376
377		r = bread(vp, indirs[unwindidx].in_lbn,
378		    (int)fs->fs_bsize, NOCRED, &bp);
379		if (r) {
380			panic("Could not unwind indirect block, error %d", r);
381			brelse(bp);
382		} else {
383			bap = (ufs1_daddr_t *)bp->b_data;
384			bap[indirs[unwindidx].in_off] = 0;
385			if (flags & B_SYNC) {
386				bwrite(bp);
387			} else {
388				if (bp->b_bufsize == fs->fs_bsize)
389					bp->b_flags |= B_CLUSTEROK;
390				bdwrite(bp);
391			}
392		}
393	}
394	if (deallocated) {
395#ifdef QUOTA
396		/*
397		 * Restore user's disk quota because allocation failed.
398		 */
399		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
400#endif
401		ip->i_din1->di_blocks -= btodb(deallocated);
402		ip->i_flag |= IN_CHANGE | IN_UPDATE;
403	}
404	(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
405	return (error);
406}
407
408/*
409 * Balloc defines the structure of file system storage
410 * by allocating the physical blocks on a device given
411 * the inode and the logical block number in a file.
412 * This is the allocation strategy for UFS2. Above is
413 * the allocation strategy for UFS1.
414 */
415int
416ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
417    struct ucred *cred, int flags, struct buf **bpp)
418{
419	struct inode *ip;
420	ufs_lbn_t lbn, lastlbn;
421	struct fs *fs;
422	struct buf *bp, *nbp;
423	struct indir indirs[NIADDR + 2];
424	ufs2_daddr_t nb, newb, *bap, pref;
425	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
426	int deallocated, osize, nsize, num, i, error;
427	int unwindidx = -1;
428	struct thread *td = curthread;	/* XXX */
429
430	ip = VTOI(vp);
431	fs = ip->i_fs;
432	lbn = lblkno(fs, startoffset);
433	size = blkoff(fs, startoffset) + size;
434	if (size > fs->fs_bsize)
435		panic("ffs_balloc_ufs2: blk too big");
436	*bpp = NULL;
437	if (lbn < 0)
438		return (EFBIG);
439
440	/*
441	 * If the next write will extend the file into a new block,
442	 * and the file is currently composed of a fragment
443	 * this fragment has to be extended to be a full block.
444	 */
445	lastlbn = lblkno(fs, ip->i_size);
446	if (lastlbn < NDADDR && lastlbn < lbn) {
447		nb = lastlbn;
448		osize = blksize(fs, ip, nb);
449		if (osize < fs->fs_bsize && osize > 0) {
450			error = ffs_realloccg(ip, nb,
451				ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
452				    &ip->i_din2->di_db[0]),
453				osize, (int)fs->fs_bsize, cred, &bp);
454			if (error)
455				return (error);
456			if (DOINGSOFTDEP(vp))
457				softdep_setup_allocdirect(ip, nb,
458				    dbtofsb(fs, bp->b_blkno),
459				    ip->i_din2->di_db[nb],
460				    fs->fs_bsize, osize, bp);
461			ip->i_size = smalllblktosize(fs, nb + 1);
462			ip->i_din2->di_size = ip->i_size;
463			ip->i_din2->di_db[nb] = dbtofsb(fs, bp->b_blkno);
464			ip->i_flag |= IN_CHANGE | IN_UPDATE;
465			if (flags & B_SYNC)
466				bwrite(bp);
467			else
468				bawrite(bp);
469		}
470	}
471	/*
472	 * The first NDADDR blocks are direct blocks
473	 */
474	if (lbn < NDADDR) {
475		if (flags & B_METAONLY)
476			panic("ffs_balloc_ufs2: B_METAONLY for direct block");
477		nb = ip->i_din2->di_db[lbn];
478		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
479			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
480			if (error) {
481				brelse(bp);
482				return (error);
483			}
484			bp->b_blkno = fsbtodb(fs, nb);
485			*bpp = bp;
486			return (0);
487		}
488		if (nb != 0) {
489			/*
490			 * Consider need to reallocate a fragment.
491			 */
492			osize = fragroundup(fs, blkoff(fs, ip->i_size));
493			nsize = fragroundup(fs, size);
494			if (nsize <= osize) {
495				error = bread(vp, lbn, osize, NOCRED, &bp);
496				if (error) {
497					brelse(bp);
498					return (error);
499				}
500				bp->b_blkno = fsbtodb(fs, nb);
501			} else {
502				error = ffs_realloccg(ip, lbn,
503				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
504					&ip->i_din2->di_db[0]),
505				    osize, nsize, cred, &bp);
506				if (error)
507					return (error);
508				if (DOINGSOFTDEP(vp))
509					softdep_setup_allocdirect(ip, lbn,
510					    dbtofsb(fs, bp->b_blkno), nb,
511					    nsize, osize, bp);
512			}
513		} else {
514			if (ip->i_size < smalllblktosize(fs, lbn + 1))
515				nsize = fragroundup(fs, size);
516			else
517				nsize = fs->fs_bsize;
518			error = ffs_alloc(ip, lbn,
519			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
520				&ip->i_din2->di_db[0]),
521			    nsize, cred, &newb);
522			if (error)
523				return (error);
524			bp = getblk(vp, lbn, nsize, 0, 0);
525			bp->b_blkno = fsbtodb(fs, newb);
526			if (flags & B_CLRBUF)
527				vfs_bio_clrbuf(bp);
528			if (DOINGSOFTDEP(vp))
529				softdep_setup_allocdirect(ip, lbn, newb, 0,
530				    nsize, 0, bp);
531		}
532		ip->i_din2->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
533		ip->i_flag |= IN_CHANGE | IN_UPDATE;
534		*bpp = bp;
535		return (0);
536	}
537	/*
538	 * Determine the number of levels of indirection.
539	 */
540	pref = 0;
541	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
542		return(error);
543#ifdef DIAGNOSTIC
544	if (num < 1)
545		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
546#endif
547	/*
548	 * Fetch the first indirect block allocating if necessary.
549	 */
550	--num;
551	nb = ip->i_din2->di_ib[indirs[0].in_off];
552	allocib = NULL;
553	allocblk = allociblk;
554	if (nb == 0) {
555		pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
556	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
557		    cred, &newb)) != 0)
558			return (error);
559		nb = newb;
560		*allocblk++ = nb;
561		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
562		bp->b_blkno = fsbtodb(fs, nb);
563		vfs_bio_clrbuf(bp);
564		if (DOINGSOFTDEP(vp)) {
565			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
566			    newb, 0, fs->fs_bsize, 0, bp);
567			bdwrite(bp);
568		} else {
569			/*
570			 * Write synchronously so that indirect blocks
571			 * never point at garbage.
572			 */
573			if (DOINGASYNC(vp))
574				bdwrite(bp);
575			else if ((error = bwrite(bp)) != 0)
576				goto fail;
577		}
578		allocib = &ip->i_din2->di_ib[indirs[0].in_off];
579		*allocib = nb;
580		ip->i_flag |= IN_CHANGE | IN_UPDATE;
581	}
582	/*
583	 * Fetch through the indirect blocks, allocating as necessary.
584	 */
585	for (i = 1;;) {
586		error = bread(vp,
587		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
588		if (error) {
589			brelse(bp);
590			goto fail;
591		}
592		bap = (ufs2_daddr_t *)bp->b_data;
593		nb = bap[indirs[i].in_off];
594		if (i == num)
595			break;
596		i += 1;
597		if (nb != 0) {
598			bqrelse(bp);
599			continue;
600		}
601		if (pref == 0)
602			pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
603		if ((error =
604		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
605			brelse(bp);
606			goto fail;
607		}
608		nb = newb;
609		*allocblk++ = nb;
610		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
611		nbp->b_blkno = fsbtodb(fs, nb);
612		vfs_bio_clrbuf(nbp);
613		if (DOINGSOFTDEP(vp)) {
614			softdep_setup_allocindir_meta(nbp, ip, bp,
615			    indirs[i - 1].in_off, nb);
616			bdwrite(nbp);
617		} else {
618			/*
619			 * Write synchronously so that indirect blocks
620			 * never point at garbage.
621			 */
622			if ((error = bwrite(nbp)) != 0) {
623				brelse(bp);
624				goto fail;
625			}
626		}
627		bap[indirs[i - 1].in_off] = nb;
628		if (allocib == NULL && unwindidx < 0)
629			unwindidx = i - 1;
630		/*
631		 * If required, write synchronously, otherwise use
632		 * delayed write.
633		 */
634		if (flags & B_SYNC) {
635			bwrite(bp);
636		} else {
637			if (bp->b_bufsize == fs->fs_bsize)
638				bp->b_flags |= B_CLUSTEROK;
639			bdwrite(bp);
640		}
641	}
642	/*
643	 * If asked only for the indirect block, then return it.
644	 */
645	if (flags & B_METAONLY) {
646		*bpp = bp;
647		return (0);
648	}
649	/*
650	 * Get the data block, allocating if necessary.
651	 */
652	if (nb == 0) {
653		pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]);
654		error = ffs_alloc(ip,
655		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
656		if (error) {
657			brelse(bp);
658			goto fail;
659		}
660		nb = newb;
661		*allocblk++ = nb;
662		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
663		nbp->b_blkno = fsbtodb(fs, nb);
664		if (flags & B_CLRBUF)
665			vfs_bio_clrbuf(nbp);
666		if (DOINGSOFTDEP(vp))
667			softdep_setup_allocindir_page(ip, lbn, bp,
668			    indirs[i].in_off, nb, 0, nbp);
669		bap[indirs[i].in_off] = nb;
670		/*
671		 * If required, write synchronously, otherwise use
672		 * delayed write.
673		 */
674		if (flags & B_SYNC) {
675			bwrite(bp);
676		} else {
677			if (bp->b_bufsize == fs->fs_bsize)
678				bp->b_flags |= B_CLUSTEROK;
679			bdwrite(bp);
680		}
681		*bpp = nbp;
682		return (0);
683	}
684	brelse(bp);
685	if (flags & B_CLRBUF) {
686		error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
687		if (error) {
688			brelse(nbp);
689			goto fail;
690		}
691	} else {
692		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
693		nbp->b_blkno = fsbtodb(fs, nb);
694	}
695	*bpp = nbp;
696	return (0);
697fail:
698	/*
699	 * If we have failed part way through block allocation, we
700	 * have to deallocate any indirect blocks that we have allocated.
701	 * We have to fsync the file before we start to get rid of all
702	 * of its dependencies so that we do not leave them dangling.
703	 * We have to sync it at the end so that the soft updates code
704	 * does not find any untracked changes. Although this is really
705	 * slow, running out of disk space is not expected to be a common
706	 * occurence. The error return from fsync is ignored as we already
707	 * have an error to return to the user.
708	 */
709	(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
710	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
711		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
712		deallocated += fs->fs_bsize;
713	}
714	if (allocib != NULL) {
715		*allocib = 0;
716	} else if (unwindidx >= 0) {
717		int r;
718
719		r = bread(vp, indirs[unwindidx].in_lbn,
720		    (int)fs->fs_bsize, NOCRED, &bp);
721		if (r) {
722			panic("Could not unwind indirect block, error %d", r);
723			brelse(bp);
724		} else {
725			bap = (ufs2_daddr_t *)bp->b_data;
726			bap[indirs[unwindidx].in_off] = 0;
727			if (flags & B_SYNC) {
728				bwrite(bp);
729			} else {
730				if (bp->b_bufsize == fs->fs_bsize)
731					bp->b_flags |= B_CLUSTEROK;
732				bdwrite(bp);
733			}
734		}
735	}
736	if (deallocated) {
737#ifdef QUOTA
738		/*
739		 * Restore user's disk quota because allocation failed.
740		 */
741		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
742#endif
743		ip->i_din2->di_blocks -= btodb(deallocated);
744		ip->i_flag |= IN_CHANGE | IN_UPDATE;
745	}
746	(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
747	return (error);
748}
749