ffs_balloc.c revision 62976
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
34 * $FreeBSD: head/sys/ufs/ffs/ffs_balloc.c 62976 2000-07-11 22:07:57Z mckusick $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/bio.h>
40#include <sys/buf.h>
41#include <sys/lock.h>
42#include <sys/mount.h>
43#include <sys/vnode.h>
44
45#include <ufs/ufs/quota.h>
46#include <ufs/ufs/inode.h>
47#include <ufs/ufs/ufs_extern.h>
48
49#include <ufs/ffs/fs.h>
50#include <ufs/ffs/ffs_extern.h>
51
52/*
53 * Balloc defines the structure of file system storage
54 * by allocating the physical blocks on a device given
55 * the inode and the logical block number in a file.
56 */
57int
58ffs_balloc(ap)
59	struct vop_balloc_args /* {
60		struct vnode *a_vp;
61		ufs_daddr_t a_lbn;
62		int a_size;
63		struct ucred *a_cred;
64		int a_flags;
65		struct buf *a_bpp;
66	} */ *ap;
67{
68	struct inode *ip;
69	ufs_daddr_t lbn;
70	int size;
71	struct ucred *cred;
72	int flags;
73	struct fs *fs;
74	ufs_daddr_t nb;
75	struct buf *bp, *nbp;
76	struct vnode *vp;
77	struct indir indirs[NIADDR + 2];
78	ufs_daddr_t newb, *bap, pref;
79	int deallocated, osize, nsize, num, i, error;
80	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
81	int unwindidx = -1;
82	struct proc *p = curproc;	/* XXX */
83
84	vp = ap->a_vp;
85	ip = VTOI(vp);
86	fs = ip->i_fs;
87	lbn = lblkno(fs, ap->a_startoffset);
88	size = blkoff(fs, ap->a_startoffset) + ap->a_size;
89	if (size > fs->fs_bsize)
90		panic("ffs_balloc: blk too big");
91	*ap->a_bpp = NULL;
92	if (lbn < 0)
93		return (EFBIG);
94	cred = ap->a_cred;
95	flags = ap->a_flags;
96
97	/*
98	 * If the next write will extend the file into a new block,
99	 * and the file is currently composed of a fragment
100	 * this fragment has to be extended to be a full block.
101	 */
102	nb = lblkno(fs, ip->i_size);
103	if (nb < NDADDR && nb < lbn) {
104		osize = blksize(fs, ip, nb);
105		if (osize < fs->fs_bsize && osize > 0) {
106			error = ffs_realloccg(ip, nb,
107				ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
108				osize, (int)fs->fs_bsize, cred, &bp);
109			if (error)
110				return (error);
111			if (DOINGSOFTDEP(vp))
112				softdep_setup_allocdirect(ip, nb,
113				    dbtofsb(fs, bp->b_blkno), ip->i_db[nb],
114				    fs->fs_bsize, osize, bp);
115			ip->i_size = smalllblktosize(fs, nb + 1);
116			ip->i_db[nb] = dbtofsb(fs, bp->b_blkno);
117			ip->i_flag |= IN_CHANGE | IN_UPDATE;
118			if (flags & B_SYNC)
119				bwrite(bp);
120			else
121				bawrite(bp);
122		}
123	}
124	/*
125	 * The first NDADDR blocks are direct blocks
126	 */
127	if (lbn < NDADDR) {
128		if (flags & B_METAONLY)
129			panic("ffs_balloc: B_METAONLY for direct block");
130		nb = ip->i_db[lbn];
131		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
132			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
133			if (error) {
134				brelse(bp);
135				return (error);
136			}
137			bp->b_blkno = fsbtodb(fs, nb);
138			*ap->a_bpp = bp;
139			return (0);
140		}
141		if (nb != 0) {
142			/*
143			 * Consider need to reallocate a fragment.
144			 */
145			osize = fragroundup(fs, blkoff(fs, ip->i_size));
146			nsize = fragroundup(fs, size);
147			if (nsize <= osize) {
148				error = bread(vp, lbn, osize, NOCRED, &bp);
149				if (error) {
150					brelse(bp);
151					return (error);
152				}
153				bp->b_blkno = fsbtodb(fs, nb);
154			} else {
155				error = ffs_realloccg(ip, lbn,
156				    ffs_blkpref(ip, lbn, (int)lbn,
157					&ip->i_db[0]), osize, nsize, cred, &bp);
158				if (error)
159					return (error);
160				if (DOINGSOFTDEP(vp))
161					softdep_setup_allocdirect(ip, lbn,
162					    dbtofsb(fs, bp->b_blkno), nb,
163					    nsize, osize, bp);
164			}
165		} else {
166			if (ip->i_size < smalllblktosize(fs, lbn + 1))
167				nsize = fragroundup(fs, size);
168			else
169				nsize = fs->fs_bsize;
170			error = ffs_alloc(ip, lbn,
171			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]),
172			    nsize, cred, &newb);
173			if (error)
174				return (error);
175			bp = getblk(vp, lbn, nsize, 0, 0);
176			bp->b_blkno = fsbtodb(fs, newb);
177			if (flags & B_CLRBUF)
178				vfs_bio_clrbuf(bp);
179			if (DOINGSOFTDEP(vp))
180				softdep_setup_allocdirect(ip, lbn, newb, 0,
181				    nsize, 0, bp);
182		}
183		ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno);
184		ip->i_flag |= IN_CHANGE | IN_UPDATE;
185		*ap->a_bpp = bp;
186		return (0);
187	}
188	/*
189	 * Determine the number of levels of indirection.
190	 */
191	pref = 0;
192	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
193		return(error);
194#ifdef DIAGNOSTIC
195	if (num < 1)
196		panic ("ffs_balloc: ufs_bmaparray returned indirect block");
197#endif
198	/*
199	 * Fetch the first indirect block allocating if necessary.
200	 */
201	--num;
202	nb = ip->i_ib[indirs[0].in_off];
203	allocib = NULL;
204	allocblk = allociblk;
205	if (nb == 0) {
206		pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
207	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
208		    cred, &newb)) != 0)
209			return (error);
210		nb = newb;
211		*allocblk++ = nb;
212		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
213		bp->b_blkno = fsbtodb(fs, nb);
214		vfs_bio_clrbuf(bp);
215		if (DOINGSOFTDEP(vp)) {
216			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
217			    newb, 0, fs->fs_bsize, 0, bp);
218			bdwrite(bp);
219		} else {
220			/*
221			 * Write synchronously so that indirect blocks
222			 * never point at garbage.
223			 */
224			if (DOINGASYNC(vp))
225				bdwrite(bp);
226			else if ((error = bwrite(bp)) != 0)
227				goto fail;
228		}
229		allocib = &ip->i_ib[indirs[0].in_off];
230		*allocib = nb;
231		ip->i_flag |= IN_CHANGE | IN_UPDATE;
232	}
233	/*
234	 * Fetch through the indirect blocks, allocating as necessary.
235	 */
236	for (i = 1;;) {
237		error = bread(vp,
238		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
239		if (error) {
240			brelse(bp);
241			goto fail;
242		}
243		bap = (ufs_daddr_t *)bp->b_data;
244		nb = bap[indirs[i].in_off];
245		if (i == num)
246			break;
247		i += 1;
248		if (nb != 0) {
249			bqrelse(bp);
250			continue;
251		}
252		if (pref == 0)
253			pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
254		if ((error =
255		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
256			brelse(bp);
257			goto fail;
258		}
259		nb = newb;
260		*allocblk++ = nb;
261		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
262		nbp->b_blkno = fsbtodb(fs, nb);
263		vfs_bio_clrbuf(nbp);
264		if (DOINGSOFTDEP(vp)) {
265			softdep_setup_allocindir_meta(nbp, ip, bp,
266			    indirs[i - 1].in_off, nb);
267			bdwrite(nbp);
268		} else {
269			/*
270			 * Write synchronously so that indirect blocks
271			 * never point at garbage.
272			 */
273			if ((error = bwrite(nbp)) != 0) {
274				brelse(bp);
275				goto fail;
276			}
277		}
278		bap[indirs[i - 1].in_off] = nb;
279		if (allocib == NULL && unwindidx < 0)
280			unwindidx = i - 1;
281		/*
282		 * If required, write synchronously, otherwise use
283		 * delayed write.
284		 */
285		if (flags & B_SYNC) {
286			bwrite(bp);
287		} else {
288			if (bp->b_bufsize == fs->fs_bsize)
289				bp->b_flags |= B_CLUSTEROK;
290			bdwrite(bp);
291		}
292	}
293	/*
294	 * If asked only for the indirect block, then return it.
295	 */
296	if (flags & B_METAONLY) {
297		*ap->a_bpp = bp;
298		return (0);
299	}
300	/*
301	 * Get the data block, allocating if necessary.
302	 */
303	if (nb == 0) {
304		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
305		error = ffs_alloc(ip,
306		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
307		if (error) {
308			brelse(bp);
309			goto fail;
310		}
311		nb = newb;
312		*allocblk++ = nb;
313		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
314		nbp->b_blkno = fsbtodb(fs, nb);
315		if (flags & B_CLRBUF)
316			vfs_bio_clrbuf(nbp);
317		if (DOINGSOFTDEP(vp))
318			softdep_setup_allocindir_page(ip, lbn, bp,
319			    indirs[i].in_off, nb, 0, nbp);
320		bap[indirs[i].in_off] = nb;
321		/*
322		 * If required, write synchronously, otherwise use
323		 * delayed write.
324		 */
325		if (flags & B_SYNC) {
326			bwrite(bp);
327		} else {
328			if (bp->b_bufsize == fs->fs_bsize)
329				bp->b_flags |= B_CLUSTEROK;
330			bdwrite(bp);
331		}
332		*ap->a_bpp = nbp;
333		return (0);
334	}
335	brelse(bp);
336	if (flags & B_CLRBUF) {
337		error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
338		if (error) {
339			brelse(nbp);
340			goto fail;
341		}
342	} else {
343		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
344		nbp->b_blkno = fsbtodb(fs, nb);
345	}
346	*ap->a_bpp = nbp;
347	return (0);
348fail:
349	/*
350	 * If we have failed part way through block allocation, we
351	 * have to deallocate any indirect blocks that we have allocated.
352	 * We have to fsync the file before we start to get rid of all
353	 * of its dependencies so that we do not leave them dangling.
354	 * We have to sync it at the end so that the soft updates code
355	 * does not find any untracked changes. Although this is really
356	 * slow, running out of disk space is not expected to be a common
357	 * occurence. The error return from fsync is ignored as we already
358	 * have an error to return to the user.
359	 */
360	(void) VOP_FSYNC(vp, cred, MNT_WAIT, p);
361	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
362		ffs_blkfree(ip, *blkp, fs->fs_bsize);
363		deallocated += fs->fs_bsize;
364	}
365	if (allocib != NULL) {
366		*allocib = 0;
367	} else if (unwindidx >= 0) {
368		int r;
369
370		r = bread(vp, indirs[unwindidx].in_lbn,
371		    (int)fs->fs_bsize, NOCRED, &bp);
372		if (r) {
373			panic("Could not unwind indirect block, error %d", r);
374			brelse(bp);
375		} else {
376			bap = (ufs_daddr_t *)bp->b_data;
377			bap[indirs[unwindidx].in_off] = 0;
378			if (flags & B_SYNC) {
379				bwrite(bp);
380			} else {
381				if (bp->b_bufsize == fs->fs_bsize)
382					bp->b_flags |= B_CLUSTEROK;
383				bdwrite(bp);
384			}
385		}
386	}
387	if (deallocated) {
388#ifdef QUOTA
389		/*
390		 * Restore user's disk quota because allocation failed.
391		 */
392		(void) chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
393#endif
394		ip->i_blocks -= btodb(deallocated);
395		ip->i_flag |= IN_CHANGE | IN_UPDATE;
396	}
397	(void) VOP_FSYNC(vp, cred, MNT_WAIT, p);
398	return (error);
399}
400