ffs_balloc.c revision 60041
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
34 * $FreeBSD: head/sys/ufs/ffs/ffs_balloc.c 60041 2000-05-05 09:59:14Z phk $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/bio.h>
40#include <sys/buf.h>
41#include <sys/lock.h>
42#include <sys/mount.h>
43#include <sys/vnode.h>
44
45#include <ufs/ufs/quota.h>
46#include <ufs/ufs/inode.h>
47#include <ufs/ufs/ufs_extern.h>
48
49#include <ufs/ffs/fs.h>
50#include <ufs/ffs/ffs_extern.h>
51
52/*
53 * Balloc defines the structure of file system storage
54 * by allocating the physical blocks on a device given
55 * the inode and the logical block number in a file.
56 */
57int
58ffs_balloc(ap)
59	struct vop_balloc_args /* {
60		struct vnode *a_vp;
61		ufs_daddr_t a_lbn;
62		int a_size;
63		struct ucred *a_cred;
64		int a_flags;
65		struct buf *a_bpp;
66	} */ *ap;
67{
68	struct inode *ip;
69	ufs_daddr_t lbn;
70	int size;
71	struct ucred *cred;
72	int flags;
73	struct fs *fs;
74	ufs_daddr_t nb;
75	struct buf *bp, *nbp;
76	struct vnode *vp;
77	struct indir indirs[NIADDR + 2];
78	ufs_daddr_t newb, *bap, pref;
79	int deallocated, osize, nsize, num, i, error;
80	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
81	int unwindidx = -1;
82	struct proc *p = curproc;	/* XXX */
83
84	vp = ap->a_vp;
85	ip = VTOI(vp);
86	fs = ip->i_fs;
87	lbn = lblkno(fs, ap->a_startoffset);
88	size = blkoff(fs, ap->a_startoffset) + ap->a_size;
89	if (size > fs->fs_bsize)
90		panic("ffs_balloc: blk too big");
91	*ap->a_bpp = NULL;
92	if (lbn < 0)
93		return (EFBIG);
94	cred = ap->a_cred;
95	flags = ap->a_flags;
96
97	/*
98	 * If the next write will extend the file into a new block,
99	 * and the file is currently composed of a fragment
100	 * this fragment has to be extended to be a full block.
101	 */
102	nb = lblkno(fs, ip->i_size);
103	if (nb < NDADDR && nb < lbn) {
104		osize = blksize(fs, ip, nb);
105		if (osize < fs->fs_bsize && osize > 0) {
106			error = ffs_realloccg(ip, nb,
107				ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
108				osize, (int)fs->fs_bsize, cred, &bp);
109			if (error)
110				return (error);
111			if (DOINGSOFTDEP(vp))
112				softdep_setup_allocdirect(ip, nb,
113				    dbtofsb(fs, bp->b_blkno), ip->i_db[nb],
114				    fs->fs_bsize, osize, bp);
115			ip->i_size = smalllblktosize(fs, nb + 1);
116			ip->i_db[nb] = dbtofsb(fs, bp->b_blkno);
117			ip->i_flag |= IN_CHANGE | IN_UPDATE;
118			if (flags & B_SYNC)
119				bwrite(bp);
120			else
121				bawrite(bp);
122		}
123	}
124	/*
125	 * The first NDADDR blocks are direct blocks
126	 */
127	if (lbn < NDADDR) {
128		nb = ip->i_db[lbn];
129		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
130			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
131			if (error) {
132				brelse(bp);
133				return (error);
134			}
135			bp->b_blkno = fsbtodb(fs, nb);
136			*ap->a_bpp = bp;
137			return (0);
138		}
139		if (nb != 0) {
140			/*
141			 * Consider need to reallocate a fragment.
142			 */
143			osize = fragroundup(fs, blkoff(fs, ip->i_size));
144			nsize = fragroundup(fs, size);
145			if (nsize <= osize) {
146				error = bread(vp, lbn, osize, NOCRED, &bp);
147				if (error) {
148					brelse(bp);
149					return (error);
150				}
151				bp->b_blkno = fsbtodb(fs, nb);
152			} else {
153				error = ffs_realloccg(ip, lbn,
154				    ffs_blkpref(ip, lbn, (int)lbn,
155					&ip->i_db[0]), osize, nsize, cred, &bp);
156				if (error)
157					return (error);
158				if (DOINGSOFTDEP(vp))
159					softdep_setup_allocdirect(ip, lbn,
160					    dbtofsb(fs, bp->b_blkno), nb,
161					    nsize, osize, bp);
162			}
163		} else {
164			if (ip->i_size < smalllblktosize(fs, lbn + 1))
165				nsize = fragroundup(fs, size);
166			else
167				nsize = fs->fs_bsize;
168			error = ffs_alloc(ip, lbn,
169			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]),
170			    nsize, cred, &newb);
171			if (error)
172				return (error);
173			bp = getblk(vp, lbn, nsize, 0, 0);
174			bp->b_blkno = fsbtodb(fs, newb);
175			if (flags & B_CLRBUF)
176				vfs_bio_clrbuf(bp);
177			if (DOINGSOFTDEP(vp))
178				softdep_setup_allocdirect(ip, lbn, newb, 0,
179				    nsize, 0, bp);
180		}
181		ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno);
182		ip->i_flag |= IN_CHANGE | IN_UPDATE;
183		*ap->a_bpp = bp;
184		return (0);
185	}
186	/*
187	 * Determine the number of levels of indirection.
188	 */
189	pref = 0;
190	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
191		return(error);
192#ifdef DIAGNOSTIC
193	if (num < 1)
194		panic ("ffs_balloc: ufs_bmaparray returned indirect block");
195#endif
196	/*
197	 * Fetch the first indirect block allocating if necessary.
198	 */
199	--num;
200	nb = ip->i_ib[indirs[0].in_off];
201	allocib = NULL;
202	allocblk = allociblk;
203	if (nb == 0) {
204		pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
205	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
206		    cred, &newb)) != 0)
207			return (error);
208		nb = newb;
209		*allocblk++ = nb;
210		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
211		bp->b_blkno = fsbtodb(fs, nb);
212		vfs_bio_clrbuf(bp);
213		if (DOINGSOFTDEP(vp)) {
214			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
215			    newb, 0, fs->fs_bsize, 0, bp);
216			bdwrite(bp);
217		} else {
218			/*
219			 * Write synchronously so that indirect blocks
220			 * never point at garbage.
221			 */
222			if (DOINGASYNC(vp))
223				bdwrite(bp);
224			else if ((error = bwrite(bp)) != 0)
225				goto fail;
226		}
227		allocib = &ip->i_ib[indirs[0].in_off];
228		*allocib = nb;
229		ip->i_flag |= IN_CHANGE | IN_UPDATE;
230	}
231	/*
232	 * Fetch through the indirect blocks, allocating as necessary.
233	 */
234	for (i = 1;;) {
235		error = bread(vp,
236		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
237		if (error) {
238			brelse(bp);
239			goto fail;
240		}
241		bap = (ufs_daddr_t *)bp->b_data;
242		nb = bap[indirs[i].in_off];
243		if (i == num)
244			break;
245		i += 1;
246		if (nb != 0) {
247			bqrelse(bp);
248			continue;
249		}
250		if (pref == 0)
251			pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
252		if ((error =
253		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
254			brelse(bp);
255			goto fail;
256		}
257		nb = newb;
258		*allocblk++ = nb;
259		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
260		nbp->b_blkno = fsbtodb(fs, nb);
261		vfs_bio_clrbuf(nbp);
262		if (DOINGSOFTDEP(vp)) {
263			softdep_setup_allocindir_meta(nbp, ip, bp,
264			    indirs[i - 1].in_off, nb);
265			bdwrite(nbp);
266		} else {
267			/*
268			 * Write synchronously so that indirect blocks
269			 * never point at garbage.
270			 */
271			if ((error = bwrite(nbp)) != 0) {
272				brelse(bp);
273				goto fail;
274			}
275		}
276		bap[indirs[i - 1].in_off] = nb;
277		if (allocib == NULL && unwindidx < 0)
278			unwindidx = i - 1;
279		/*
280		 * If required, write synchronously, otherwise use
281		 * delayed write.
282		 */
283		if (flags & B_SYNC) {
284			bwrite(bp);
285		} else {
286			if (bp->b_bufsize == fs->fs_bsize)
287				bp->b_flags |= B_CLUSTEROK;
288			bdwrite(bp);
289		}
290	}
291	/*
292	 * Get the data block, allocating if necessary.
293	 */
294	if (nb == 0) {
295		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
296		error = ffs_alloc(ip,
297		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
298		if (error) {
299			brelse(bp);
300			goto fail;
301		}
302		nb = newb;
303		*allocblk++ = nb;
304		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
305		nbp->b_blkno = fsbtodb(fs, nb);
306		if (flags & B_CLRBUF)
307			vfs_bio_clrbuf(nbp);
308		if (DOINGSOFTDEP(vp))
309			softdep_setup_allocindir_page(ip, lbn, bp,
310			    indirs[i].in_off, nb, 0, nbp);
311		bap[indirs[i].in_off] = nb;
312		/*
313		 * If required, write synchronously, otherwise use
314		 * delayed write.
315		 */
316		if (flags & B_SYNC) {
317			bwrite(bp);
318		} else {
319			if (bp->b_bufsize == fs->fs_bsize)
320				bp->b_flags |= B_CLUSTEROK;
321			bdwrite(bp);
322		}
323		*ap->a_bpp = nbp;
324		return (0);
325	}
326	brelse(bp);
327	if (flags & B_CLRBUF) {
328		error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
329		if (error) {
330			brelse(nbp);
331			goto fail;
332		}
333	} else {
334		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
335		nbp->b_blkno = fsbtodb(fs, nb);
336	}
337	*ap->a_bpp = nbp;
338	return (0);
339fail:
340	/*
341	 * If we have failed part way through block allocation, we
342	 * have to deallocate any indirect blocks that we have allocated.
343	 * We have to fsync the file before we start to get rid of all
344	 * of its dependencies so that we do not leave them dangling.
345	 * We have to sync it at the end so that the soft updates code
346	 * does not find any untracked changes. Although this is really
347	 * slow, running out of disk space is not expected to be a common
348	 * occurence. The error return from fsync is ignored as we already
349	 * have an error to return to the user.
350	 */
351	(void) VOP_FSYNC(vp, cred, MNT_WAIT, p);
352	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
353		ffs_blkfree(ip, *blkp, fs->fs_bsize);
354		deallocated += fs->fs_bsize;
355	}
356	if (allocib != NULL) {
357		*allocib = 0;
358	} else if (unwindidx >= 0) {
359		int r;
360
361		r = bread(vp, indirs[unwindidx].in_lbn,
362		    (int)fs->fs_bsize, NOCRED, &bp);
363		if (r) {
364			panic("Could not unwind indirect block, error %d", r);
365			brelse(bp);
366		} else {
367			bap = (ufs_daddr_t *)bp->b_data;
368			bap[indirs[unwindidx].in_off] = 0;
369			if (flags & B_SYNC) {
370				bwrite(bp);
371			} else {
372				if (bp->b_bufsize == fs->fs_bsize)
373					bp->b_flags |= B_CLUSTEROK;
374				bdwrite(bp);
375			}
376		}
377	}
378	if (deallocated) {
379#ifdef QUOTA
380		/*
381		 * Restore user's disk quota because allocation failed.
382		 */
383		(void) chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
384#endif
385		ip->i_blocks -= btodb(deallocated);
386		ip->i_flag |= IN_CHANGE | IN_UPDATE;
387	}
388	(void) VOP_FSYNC(vp, cred, MNT_WAIT, p);
389	return (error);
390}
391