Deleted Added
full compact
ffs_balloc.c (248623) ffs_balloc.c (249582)
1/*-
2 * Copyright (c) 2002 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
60 */
61
62#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2002 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
60 */
61
62#include <sys/cdefs.h>
63__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_balloc.c 248623 2013-03-22 21:45:28Z mckusick $");
63__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_balloc.c 249582 2013-04-17 11:40:10Z gabor $");
64
65#include <sys/param.h>
66#include <sys/systm.h>
67#include <sys/bio.h>
68#include <sys/buf.h>
69#include <sys/lock.h>
70#include <sys/mount.h>
71#include <sys/vnode.h>
72
73#include <ufs/ufs/quota.h>
74#include <ufs/ufs/inode.h>
75#include <ufs/ufs/ufs_extern.h>
76#include <ufs/ufs/extattr.h>
77#include <ufs/ufs/ufsmount.h>
78
79#include <ufs/ffs/fs.h>
80#include <ufs/ffs/ffs_extern.h>
81
82/*
83 * Balloc defines the structure of filesystem storage
84 * by allocating the physical blocks on a device given
85 * the inode and the logical block number in a file.
86 * This is the allocation strategy for UFS1. Below is
87 * the allocation strategy for UFS2.
88 */
89int
90ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
91 struct ucred *cred, int flags, struct buf **bpp)
92{
93 struct inode *ip;
94 struct ufs1_dinode *dp;
95 ufs_lbn_t lbn, lastlbn;
96 struct fs *fs;
97 ufs1_daddr_t nb;
98 struct buf *bp, *nbp;
99 struct ufsmount *ump;
100 struct indir indirs[NIADDR + 2];
101 int deallocated, osize, nsize, num, i, error;
102 ufs2_daddr_t newb;
103 ufs1_daddr_t *bap, pref;
104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
106 int unwindidx = -1;
107 int saved_inbdflush;
108 static struct timeval lastfail;
109 static int curfail;
110 int gbflags, reclaimed;
111
112 ip = VTOI(vp);
113 dp = ip->i_din1;
114 fs = ip->i_fs;
115 ump = ip->i_ump;
116 lbn = lblkno(fs, startoffset);
117 size = blkoff(fs, startoffset) + size;
118 reclaimed = 0;
119 if (size > fs->fs_bsize)
120 panic("ffs_balloc_ufs1: blk too big");
121 *bpp = NULL;
122 if (flags & IO_EXT)
123 return (EOPNOTSUPP);
124 if (lbn < 0)
125 return (EFBIG);
126 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
127
128 if (DOINGSOFTDEP(vp))
129 softdep_prealloc(vp, MNT_WAIT);
130 /*
131 * If the next write will extend the file into a new block,
132 * and the file is currently composed of a fragment
133 * this fragment has to be extended to be a full block.
134 */
135 lastlbn = lblkno(fs, ip->i_size);
136 if (lastlbn < NDADDR && lastlbn < lbn) {
137 nb = lastlbn;
138 osize = blksize(fs, ip, nb);
139 if (osize < fs->fs_bsize && osize > 0) {
140 UFS_LOCK(ump);
141 error = ffs_realloccg(ip, nb, dp->di_db[nb],
142 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
143 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
144 cred, &bp);
145 if (error)
146 return (error);
147 if (DOINGSOFTDEP(vp))
148 softdep_setup_allocdirect(ip, nb,
149 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
150 fs->fs_bsize, osize, bp);
151 ip->i_size = smalllblktosize(fs, nb + 1);
152 dp->di_size = ip->i_size;
153 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
154 ip->i_flag |= IN_CHANGE | IN_UPDATE;
155 if (flags & IO_SYNC)
156 bwrite(bp);
157 else
158 bawrite(bp);
159 }
160 }
161 /*
162 * The first NDADDR blocks are direct blocks
163 */
164 if (lbn < NDADDR) {
165 if (flags & BA_METAONLY)
166 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
167 nb = dp->di_db[lbn];
168 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
169 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
170 if (error) {
171 brelse(bp);
172 return (error);
173 }
174 bp->b_blkno = fsbtodb(fs, nb);
175 *bpp = bp;
176 return (0);
177 }
178 if (nb != 0) {
179 /*
180 * Consider need to reallocate a fragment.
181 */
182 osize = fragroundup(fs, blkoff(fs, ip->i_size));
183 nsize = fragroundup(fs, size);
184 if (nsize <= osize) {
185 error = bread(vp, lbn, osize, NOCRED, &bp);
186 if (error) {
187 brelse(bp);
188 return (error);
189 }
190 bp->b_blkno = fsbtodb(fs, nb);
191 } else {
192 UFS_LOCK(ump);
193 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
194 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
195 &dp->di_db[0]), osize, nsize, flags,
196 cred, &bp);
197 if (error)
198 return (error);
199 if (DOINGSOFTDEP(vp))
200 softdep_setup_allocdirect(ip, lbn,
201 dbtofsb(fs, bp->b_blkno), nb,
202 nsize, osize, bp);
203 }
204 } else {
205 if (ip->i_size < smalllblktosize(fs, lbn + 1))
206 nsize = fragroundup(fs, size);
207 else
208 nsize = fs->fs_bsize;
209 UFS_LOCK(ump);
210 error = ffs_alloc(ip, lbn,
211 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
212 nsize, flags, cred, &newb);
213 if (error)
214 return (error);
215 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
216 bp->b_blkno = fsbtodb(fs, newb);
217 if (flags & BA_CLRBUF)
218 vfs_bio_clrbuf(bp);
219 if (DOINGSOFTDEP(vp))
220 softdep_setup_allocdirect(ip, lbn, newb, 0,
221 nsize, 0, bp);
222 }
223 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
224 ip->i_flag |= IN_CHANGE | IN_UPDATE;
225 *bpp = bp;
226 return (0);
227 }
228 /*
229 * Determine the number of levels of indirection.
230 */
231 pref = 0;
232 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
233 return(error);
234#ifdef INVARIANTS
235 if (num < 1)
236 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
237#endif
238 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
239 /*
240 * Fetch the first indirect block allocating if necessary.
241 */
242 --num;
243 nb = dp->di_ib[indirs[0].in_off];
244 allocib = NULL;
245 allocblk = allociblk;
246 lbns_remfree = lbns;
247 if (nb == 0) {
248 UFS_LOCK(ump);
249 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
250 (ufs1_daddr_t *)0);
251 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
252 flags, cred, &newb)) != 0) {
253 curthread_pflags_restore(saved_inbdflush);
254 return (error);
255 }
256 pref = newb + fs->fs_frag;
257 nb = newb;
258 *allocblk++ = nb;
259 *lbns_remfree++ = indirs[1].in_lbn;
260 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
261 bp->b_blkno = fsbtodb(fs, nb);
262 vfs_bio_clrbuf(bp);
263 if (DOINGSOFTDEP(vp)) {
264 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
265 newb, 0, fs->fs_bsize, 0, bp);
266 bdwrite(bp);
267 } else {
268 /*
269 * Write synchronously so that indirect blocks
270 * never point at garbage.
271 */
272 if (DOINGASYNC(vp))
273 bdwrite(bp);
274 else if ((error = bwrite(bp)) != 0)
275 goto fail;
276 }
277 allocib = &dp->di_ib[indirs[0].in_off];
278 *allocib = nb;
279 ip->i_flag |= IN_CHANGE | IN_UPDATE;
280 }
281 /*
282 * Fetch through the indirect blocks, allocating as necessary.
283 */
284retry:
285 for (i = 1;;) {
286 error = bread(vp,
287 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
288 if (error) {
289 brelse(bp);
290 goto fail;
291 }
292 bap = (ufs1_daddr_t *)bp->b_data;
293 nb = bap[indirs[i].in_off];
294 if (i == num)
295 break;
296 i += 1;
297 if (nb != 0) {
298 bqrelse(bp);
299 continue;
300 }
301 UFS_LOCK(ump);
302 if (pref == 0)
303 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
304 (ufs1_daddr_t *)0);
305 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
306 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
307 brelse(bp);
308 if (++reclaimed == 1) {
309 UFS_LOCK(ump);
310 softdep_request_cleanup(fs, vp, cred,
311 FLUSH_BLOCKS_WAIT);
312 UFS_UNLOCK(ump);
313 goto retry;
314 }
315 if (ppsratecheck(&lastfail, &curfail, 1)) {
316 ffs_fserr(fs, ip->i_number, "filesystem full");
317 uprintf("\n%s: write failed, filesystem "
318 "is full\n", fs->fs_fsmnt);
319 }
320 goto fail;
321 }
322 pref = newb + fs->fs_frag;
323 nb = newb;
324 *allocblk++ = nb;
325 *lbns_remfree++ = indirs[i].in_lbn;
326 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
327 nbp->b_blkno = fsbtodb(fs, nb);
328 vfs_bio_clrbuf(nbp);
329 if (DOINGSOFTDEP(vp)) {
330 softdep_setup_allocindir_meta(nbp, ip, bp,
331 indirs[i - 1].in_off, nb);
332 bdwrite(nbp);
333 } else {
334 /*
335 * Write synchronously so that indirect blocks
336 * never point at garbage.
337 */
338 if ((error = bwrite(nbp)) != 0) {
339 brelse(bp);
340 goto fail;
341 }
342 }
343 bap[indirs[i - 1].in_off] = nb;
344 if (allocib == NULL && unwindidx < 0)
345 unwindidx = i - 1;
346 /*
347 * If required, write synchronously, otherwise use
348 * delayed write.
349 */
350 if (flags & IO_SYNC) {
351 bwrite(bp);
352 } else {
353 if (bp->b_bufsize == fs->fs_bsize)
354 bp->b_flags |= B_CLUSTEROK;
355 bdwrite(bp);
356 }
357 }
358 /*
359 * If asked only for the indirect block, then return it.
360 */
361 if (flags & BA_METAONLY) {
362 curthread_pflags_restore(saved_inbdflush);
363 *bpp = bp;
364 return (0);
365 }
366 /*
367 * Get the data block, allocating if necessary.
368 */
369 if (nb == 0) {
370 UFS_LOCK(ump);
371 if (pref == 0)
372 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
373 &bap[0]);
374 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
375 flags | IO_BUFLOCKED, cred, &newb);
376 if (error) {
377 brelse(bp);
378 if (++reclaimed == 1) {
379 UFS_LOCK(ump);
380 softdep_request_cleanup(fs, vp, cred,
381 FLUSH_BLOCKS_WAIT);
382 UFS_UNLOCK(ump);
383 goto retry;
384 }
385 if (ppsratecheck(&lastfail, &curfail, 1)) {
386 ffs_fserr(fs, ip->i_number, "filesystem full");
387 uprintf("\n%s: write failed, filesystem "
388 "is full\n", fs->fs_fsmnt);
389 }
390 goto fail;
391 }
392 nb = newb;
393 *allocblk++ = nb;
394 *lbns_remfree++ = lbn;
395 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
396 nbp->b_blkno = fsbtodb(fs, nb);
397 if (flags & BA_CLRBUF)
398 vfs_bio_clrbuf(nbp);
399 if (DOINGSOFTDEP(vp))
400 softdep_setup_allocindir_page(ip, lbn, bp,
401 indirs[i].in_off, nb, 0, nbp);
402 bap[indirs[i].in_off] = nb;
403 /*
404 * If required, write synchronously, otherwise use
405 * delayed write.
406 */
407 if (flags & IO_SYNC) {
408 bwrite(bp);
409 } else {
410 if (bp->b_bufsize == fs->fs_bsize)
411 bp->b_flags |= B_CLUSTEROK;
412 bdwrite(bp);
413 }
414 curthread_pflags_restore(saved_inbdflush);
415 *bpp = nbp;
416 return (0);
417 }
418 brelse(bp);
419 if (flags & BA_CLRBUF) {
420 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
421 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
422 error = cluster_read(vp, ip->i_size, lbn,
423 (int)fs->fs_bsize, NOCRED,
424 MAXBSIZE, seqcount, gbflags, &nbp);
425 } else {
426 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
427 gbflags, &nbp);
428 }
429 if (error) {
430 brelse(nbp);
431 goto fail;
432 }
433 } else {
434 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
435 nbp->b_blkno = fsbtodb(fs, nb);
436 }
437 curthread_pflags_restore(saved_inbdflush);
438 *bpp = nbp;
439 return (0);
440fail:
441 curthread_pflags_restore(saved_inbdflush);
442 /*
443 * If we have failed to allocate any blocks, simply return the error.
444 * This is the usual case and avoids the need to fsync the file.
445 */
446 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
447 return (error);
448 /*
449 * If we have failed part way through block allocation, we
450 * have to deallocate any indirect blocks that we have allocated.
451 * We have to fsync the file before we start to get rid of all
452 * of its dependencies so that we do not leave them dangling.
453 * We have to sync it at the end so that the soft updates code
454 * does not find any untracked changes. Although this is really
455 * slow, running out of disk space is not expected to be a common
64
65#include <sys/param.h>
66#include <sys/systm.h>
67#include <sys/bio.h>
68#include <sys/buf.h>
69#include <sys/lock.h>
70#include <sys/mount.h>
71#include <sys/vnode.h>
72
73#include <ufs/ufs/quota.h>
74#include <ufs/ufs/inode.h>
75#include <ufs/ufs/ufs_extern.h>
76#include <ufs/ufs/extattr.h>
77#include <ufs/ufs/ufsmount.h>
78
79#include <ufs/ffs/fs.h>
80#include <ufs/ffs/ffs_extern.h>
81
82/*
83 * Balloc defines the structure of filesystem storage
84 * by allocating the physical blocks on a device given
85 * the inode and the logical block number in a file.
86 * This is the allocation strategy for UFS1. Below is
87 * the allocation strategy for UFS2.
88 */
89int
90ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
91 struct ucred *cred, int flags, struct buf **bpp)
92{
93 struct inode *ip;
94 struct ufs1_dinode *dp;
95 ufs_lbn_t lbn, lastlbn;
96 struct fs *fs;
97 ufs1_daddr_t nb;
98 struct buf *bp, *nbp;
99 struct ufsmount *ump;
100 struct indir indirs[NIADDR + 2];
101 int deallocated, osize, nsize, num, i, error;
102 ufs2_daddr_t newb;
103 ufs1_daddr_t *bap, pref;
104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
106 int unwindidx = -1;
107 int saved_inbdflush;
108 static struct timeval lastfail;
109 static int curfail;
110 int gbflags, reclaimed;
111
112 ip = VTOI(vp);
113 dp = ip->i_din1;
114 fs = ip->i_fs;
115 ump = ip->i_ump;
116 lbn = lblkno(fs, startoffset);
117 size = blkoff(fs, startoffset) + size;
118 reclaimed = 0;
119 if (size > fs->fs_bsize)
120 panic("ffs_balloc_ufs1: blk too big");
121 *bpp = NULL;
122 if (flags & IO_EXT)
123 return (EOPNOTSUPP);
124 if (lbn < 0)
125 return (EFBIG);
126 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
127
128 if (DOINGSOFTDEP(vp))
129 softdep_prealloc(vp, MNT_WAIT);
130 /*
131 * If the next write will extend the file into a new block,
132 * and the file is currently composed of a fragment
133 * this fragment has to be extended to be a full block.
134 */
135 lastlbn = lblkno(fs, ip->i_size);
136 if (lastlbn < NDADDR && lastlbn < lbn) {
137 nb = lastlbn;
138 osize = blksize(fs, ip, nb);
139 if (osize < fs->fs_bsize && osize > 0) {
140 UFS_LOCK(ump);
141 error = ffs_realloccg(ip, nb, dp->di_db[nb],
142 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
143 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
144 cred, &bp);
145 if (error)
146 return (error);
147 if (DOINGSOFTDEP(vp))
148 softdep_setup_allocdirect(ip, nb,
149 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
150 fs->fs_bsize, osize, bp);
151 ip->i_size = smalllblktosize(fs, nb + 1);
152 dp->di_size = ip->i_size;
153 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
154 ip->i_flag |= IN_CHANGE | IN_UPDATE;
155 if (flags & IO_SYNC)
156 bwrite(bp);
157 else
158 bawrite(bp);
159 }
160 }
161 /*
162 * The first NDADDR blocks are direct blocks
163 */
164 if (lbn < NDADDR) {
165 if (flags & BA_METAONLY)
166 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
167 nb = dp->di_db[lbn];
168 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
169 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
170 if (error) {
171 brelse(bp);
172 return (error);
173 }
174 bp->b_blkno = fsbtodb(fs, nb);
175 *bpp = bp;
176 return (0);
177 }
178 if (nb != 0) {
179 /*
180 * Consider need to reallocate a fragment.
181 */
182 osize = fragroundup(fs, blkoff(fs, ip->i_size));
183 nsize = fragroundup(fs, size);
184 if (nsize <= osize) {
185 error = bread(vp, lbn, osize, NOCRED, &bp);
186 if (error) {
187 brelse(bp);
188 return (error);
189 }
190 bp->b_blkno = fsbtodb(fs, nb);
191 } else {
192 UFS_LOCK(ump);
193 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
194 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
195 &dp->di_db[0]), osize, nsize, flags,
196 cred, &bp);
197 if (error)
198 return (error);
199 if (DOINGSOFTDEP(vp))
200 softdep_setup_allocdirect(ip, lbn,
201 dbtofsb(fs, bp->b_blkno), nb,
202 nsize, osize, bp);
203 }
204 } else {
205 if (ip->i_size < smalllblktosize(fs, lbn + 1))
206 nsize = fragroundup(fs, size);
207 else
208 nsize = fs->fs_bsize;
209 UFS_LOCK(ump);
210 error = ffs_alloc(ip, lbn,
211 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
212 nsize, flags, cred, &newb);
213 if (error)
214 return (error);
215 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
216 bp->b_blkno = fsbtodb(fs, newb);
217 if (flags & BA_CLRBUF)
218 vfs_bio_clrbuf(bp);
219 if (DOINGSOFTDEP(vp))
220 softdep_setup_allocdirect(ip, lbn, newb, 0,
221 nsize, 0, bp);
222 }
223 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
224 ip->i_flag |= IN_CHANGE | IN_UPDATE;
225 *bpp = bp;
226 return (0);
227 }
228 /*
229 * Determine the number of levels of indirection.
230 */
231 pref = 0;
232 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
233 return(error);
234#ifdef INVARIANTS
235 if (num < 1)
236 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
237#endif
238 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
239 /*
240 * Fetch the first indirect block allocating if necessary.
241 */
242 --num;
243 nb = dp->di_ib[indirs[0].in_off];
244 allocib = NULL;
245 allocblk = allociblk;
246 lbns_remfree = lbns;
247 if (nb == 0) {
248 UFS_LOCK(ump);
249 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
250 (ufs1_daddr_t *)0);
251 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
252 flags, cred, &newb)) != 0) {
253 curthread_pflags_restore(saved_inbdflush);
254 return (error);
255 }
256 pref = newb + fs->fs_frag;
257 nb = newb;
258 *allocblk++ = nb;
259 *lbns_remfree++ = indirs[1].in_lbn;
260 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
261 bp->b_blkno = fsbtodb(fs, nb);
262 vfs_bio_clrbuf(bp);
263 if (DOINGSOFTDEP(vp)) {
264 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
265 newb, 0, fs->fs_bsize, 0, bp);
266 bdwrite(bp);
267 } else {
268 /*
269 * Write synchronously so that indirect blocks
270 * never point at garbage.
271 */
272 if (DOINGASYNC(vp))
273 bdwrite(bp);
274 else if ((error = bwrite(bp)) != 0)
275 goto fail;
276 }
277 allocib = &dp->di_ib[indirs[0].in_off];
278 *allocib = nb;
279 ip->i_flag |= IN_CHANGE | IN_UPDATE;
280 }
281 /*
282 * Fetch through the indirect blocks, allocating as necessary.
283 */
284retry:
285 for (i = 1;;) {
286 error = bread(vp,
287 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
288 if (error) {
289 brelse(bp);
290 goto fail;
291 }
292 bap = (ufs1_daddr_t *)bp->b_data;
293 nb = bap[indirs[i].in_off];
294 if (i == num)
295 break;
296 i += 1;
297 if (nb != 0) {
298 bqrelse(bp);
299 continue;
300 }
301 UFS_LOCK(ump);
302 if (pref == 0)
303 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
304 (ufs1_daddr_t *)0);
305 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
306 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
307 brelse(bp);
308 if (++reclaimed == 1) {
309 UFS_LOCK(ump);
310 softdep_request_cleanup(fs, vp, cred,
311 FLUSH_BLOCKS_WAIT);
312 UFS_UNLOCK(ump);
313 goto retry;
314 }
315 if (ppsratecheck(&lastfail, &curfail, 1)) {
316 ffs_fserr(fs, ip->i_number, "filesystem full");
317 uprintf("\n%s: write failed, filesystem "
318 "is full\n", fs->fs_fsmnt);
319 }
320 goto fail;
321 }
322 pref = newb + fs->fs_frag;
323 nb = newb;
324 *allocblk++ = nb;
325 *lbns_remfree++ = indirs[i].in_lbn;
326 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
327 nbp->b_blkno = fsbtodb(fs, nb);
328 vfs_bio_clrbuf(nbp);
329 if (DOINGSOFTDEP(vp)) {
330 softdep_setup_allocindir_meta(nbp, ip, bp,
331 indirs[i - 1].in_off, nb);
332 bdwrite(nbp);
333 } else {
334 /*
335 * Write synchronously so that indirect blocks
336 * never point at garbage.
337 */
338 if ((error = bwrite(nbp)) != 0) {
339 brelse(bp);
340 goto fail;
341 }
342 }
343 bap[indirs[i - 1].in_off] = nb;
344 if (allocib == NULL && unwindidx < 0)
345 unwindidx = i - 1;
346 /*
347 * If required, write synchronously, otherwise use
348 * delayed write.
349 */
350 if (flags & IO_SYNC) {
351 bwrite(bp);
352 } else {
353 if (bp->b_bufsize == fs->fs_bsize)
354 bp->b_flags |= B_CLUSTEROK;
355 bdwrite(bp);
356 }
357 }
358 /*
359 * If asked only for the indirect block, then return it.
360 */
361 if (flags & BA_METAONLY) {
362 curthread_pflags_restore(saved_inbdflush);
363 *bpp = bp;
364 return (0);
365 }
366 /*
367 * Get the data block, allocating if necessary.
368 */
369 if (nb == 0) {
370 UFS_LOCK(ump);
371 if (pref == 0)
372 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
373 &bap[0]);
374 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
375 flags | IO_BUFLOCKED, cred, &newb);
376 if (error) {
377 brelse(bp);
378 if (++reclaimed == 1) {
379 UFS_LOCK(ump);
380 softdep_request_cleanup(fs, vp, cred,
381 FLUSH_BLOCKS_WAIT);
382 UFS_UNLOCK(ump);
383 goto retry;
384 }
385 if (ppsratecheck(&lastfail, &curfail, 1)) {
386 ffs_fserr(fs, ip->i_number, "filesystem full");
387 uprintf("\n%s: write failed, filesystem "
388 "is full\n", fs->fs_fsmnt);
389 }
390 goto fail;
391 }
392 nb = newb;
393 *allocblk++ = nb;
394 *lbns_remfree++ = lbn;
395 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
396 nbp->b_blkno = fsbtodb(fs, nb);
397 if (flags & BA_CLRBUF)
398 vfs_bio_clrbuf(nbp);
399 if (DOINGSOFTDEP(vp))
400 softdep_setup_allocindir_page(ip, lbn, bp,
401 indirs[i].in_off, nb, 0, nbp);
402 bap[indirs[i].in_off] = nb;
403 /*
404 * If required, write synchronously, otherwise use
405 * delayed write.
406 */
407 if (flags & IO_SYNC) {
408 bwrite(bp);
409 } else {
410 if (bp->b_bufsize == fs->fs_bsize)
411 bp->b_flags |= B_CLUSTEROK;
412 bdwrite(bp);
413 }
414 curthread_pflags_restore(saved_inbdflush);
415 *bpp = nbp;
416 return (0);
417 }
418 brelse(bp);
419 if (flags & BA_CLRBUF) {
420 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
421 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
422 error = cluster_read(vp, ip->i_size, lbn,
423 (int)fs->fs_bsize, NOCRED,
424 MAXBSIZE, seqcount, gbflags, &nbp);
425 } else {
426 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
427 gbflags, &nbp);
428 }
429 if (error) {
430 brelse(nbp);
431 goto fail;
432 }
433 } else {
434 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
435 nbp->b_blkno = fsbtodb(fs, nb);
436 }
437 curthread_pflags_restore(saved_inbdflush);
438 *bpp = nbp;
439 return (0);
440fail:
441 curthread_pflags_restore(saved_inbdflush);
442 /*
443 * If we have failed to allocate any blocks, simply return the error.
444 * This is the usual case and avoids the need to fsync the file.
445 */
446 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
447 return (error);
448 /*
449 * If we have failed part way through block allocation, we
450 * have to deallocate any indirect blocks that we have allocated.
451 * We have to fsync the file before we start to get rid of all
452 * of its dependencies so that we do not leave them dangling.
453 * We have to sync it at the end so that the soft updates code
454 * does not find any untracked changes. Although this is really
455 * slow, running out of disk space is not expected to be a common
456 * occurence. The error return from fsync is ignored as we already
456 * occurrence. The error return from fsync is ignored as we already
457 * have an error to return to the user.
458 *
459 * XXX Still have to journal the free below
460 */
461 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
462 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
463 blkp < allocblk; blkp++, lbns_remfree++) {
464 /*
465 * We shall not leave the freed blocks on the vnode
466 * buffer object lists.
467 */
468 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
469 if (bp != NULL) {
470 bp->b_flags |= (B_INVAL | B_RELBUF);
471 bp->b_flags &= ~B_ASYNC;
472 brelse(bp);
473 }
474 deallocated += fs->fs_bsize;
475 }
476 if (allocib != NULL) {
477 *allocib = 0;
478 } else if (unwindidx >= 0) {
479 int r;
480
481 r = bread(vp, indirs[unwindidx].in_lbn,
482 (int)fs->fs_bsize, NOCRED, &bp);
483 if (r) {
484 panic("Could not unwind indirect block, error %d", r);
485 brelse(bp);
486 } else {
487 bap = (ufs1_daddr_t *)bp->b_data;
488 bap[indirs[unwindidx].in_off] = 0;
489 if (flags & IO_SYNC) {
490 bwrite(bp);
491 } else {
492 if (bp->b_bufsize == fs->fs_bsize)
493 bp->b_flags |= B_CLUSTEROK;
494 bdwrite(bp);
495 }
496 }
497 }
498 if (deallocated) {
499#ifdef QUOTA
500 /*
501 * Restore user's disk quota because allocation failed.
502 */
503 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
504#endif
505 dp->di_blocks -= btodb(deallocated);
506 ip->i_flag |= IN_CHANGE | IN_UPDATE;
507 }
508 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
509 /*
510 * After the buffers are invalidated and on-disk pointers are
511 * cleared, free the blocks.
512 */
513 for (blkp = allociblk; blkp < allocblk; blkp++) {
514 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
515 ip->i_number, vp->v_type, NULL);
516 }
517 return (error);
518}
519
520/*
521 * Balloc defines the structure of file system storage
522 * by allocating the physical blocks on a device given
523 * the inode and the logical block number in a file.
524 * This is the allocation strategy for UFS2. Above is
525 * the allocation strategy for UFS1.
526 */
527int
528ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
529 struct ucred *cred, int flags, struct buf **bpp)
530{
531 struct inode *ip;
532 struct ufs2_dinode *dp;
533 ufs_lbn_t lbn, lastlbn;
534 struct fs *fs;
535 struct buf *bp, *nbp;
536 struct ufsmount *ump;
537 struct indir indirs[NIADDR + 2];
538 ufs2_daddr_t nb, newb, *bap, pref;
539 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
540 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
541 int deallocated, osize, nsize, num, i, error;
542 int unwindidx = -1;
543 int saved_inbdflush;
544 static struct timeval lastfail;
545 static int curfail;
546 int gbflags, reclaimed;
547
548 ip = VTOI(vp);
549 dp = ip->i_din2;
550 fs = ip->i_fs;
551 ump = ip->i_ump;
552 lbn = lblkno(fs, startoffset);
553 size = blkoff(fs, startoffset) + size;
554 reclaimed = 0;
555 if (size > fs->fs_bsize)
556 panic("ffs_balloc_ufs2: blk too big");
557 *bpp = NULL;
558 if (lbn < 0)
559 return (EFBIG);
560 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
561
562 if (DOINGSOFTDEP(vp))
563 softdep_prealloc(vp, MNT_WAIT);
564
565 /*
566 * Check for allocating external data.
567 */
568 if (flags & IO_EXT) {
569 if (lbn >= NXADDR)
570 return (EFBIG);
571 /*
572 * If the next write will extend the data into a new block,
573 * and the data is currently composed of a fragment
574 * this fragment has to be extended to be a full block.
575 */
576 lastlbn = lblkno(fs, dp->di_extsize);
577 if (lastlbn < lbn) {
578 nb = lastlbn;
579 osize = sblksize(fs, dp->di_extsize, nb);
580 if (osize < fs->fs_bsize && osize > 0) {
581 UFS_LOCK(ump);
582 error = ffs_realloccg(ip, -1 - nb,
583 dp->di_extb[nb],
584 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
585 &dp->di_extb[0]), osize,
586 (int)fs->fs_bsize, flags, cred, &bp);
587 if (error)
588 return (error);
589 if (DOINGSOFTDEP(vp))
590 softdep_setup_allocext(ip, nb,
591 dbtofsb(fs, bp->b_blkno),
592 dp->di_extb[nb],
593 fs->fs_bsize, osize, bp);
594 dp->di_extsize = smalllblktosize(fs, nb + 1);
595 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
596 bp->b_xflags |= BX_ALTDATA;
597 ip->i_flag |= IN_CHANGE;
598 if (flags & IO_SYNC)
599 bwrite(bp);
600 else
601 bawrite(bp);
602 }
603 }
604 /*
605 * All blocks are direct blocks
606 */
607 if (flags & BA_METAONLY)
608 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
609 nb = dp->di_extb[lbn];
610 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
611 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
612 gbflags, &bp);
613 if (error) {
614 brelse(bp);
615 return (error);
616 }
617 bp->b_blkno = fsbtodb(fs, nb);
618 bp->b_xflags |= BX_ALTDATA;
619 *bpp = bp;
620 return (0);
621 }
622 if (nb != 0) {
623 /*
624 * Consider need to reallocate a fragment.
625 */
626 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
627 nsize = fragroundup(fs, size);
628 if (nsize <= osize) {
629 error = bread_gb(vp, -1 - lbn, osize, NOCRED,
630 gbflags, &bp);
631 if (error) {
632 brelse(bp);
633 return (error);
634 }
635 bp->b_blkno = fsbtodb(fs, nb);
636 bp->b_xflags |= BX_ALTDATA;
637 } else {
638 UFS_LOCK(ump);
639 error = ffs_realloccg(ip, -1 - lbn,
640 dp->di_extb[lbn],
641 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
642 &dp->di_extb[0]), osize, nsize, flags,
643 cred, &bp);
644 if (error)
645 return (error);
646 bp->b_xflags |= BX_ALTDATA;
647 if (DOINGSOFTDEP(vp))
648 softdep_setup_allocext(ip, lbn,
649 dbtofsb(fs, bp->b_blkno), nb,
650 nsize, osize, bp);
651 }
652 } else {
653 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
654 nsize = fragroundup(fs, size);
655 else
656 nsize = fs->fs_bsize;
657 UFS_LOCK(ump);
658 error = ffs_alloc(ip, lbn,
659 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
660 nsize, flags, cred, &newb);
661 if (error)
662 return (error);
663 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
664 bp->b_blkno = fsbtodb(fs, newb);
665 bp->b_xflags |= BX_ALTDATA;
666 if (flags & BA_CLRBUF)
667 vfs_bio_clrbuf(bp);
668 if (DOINGSOFTDEP(vp))
669 softdep_setup_allocext(ip, lbn, newb, 0,
670 nsize, 0, bp);
671 }
672 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
673 ip->i_flag |= IN_CHANGE;
674 *bpp = bp;
675 return (0);
676 }
677 /*
678 * If the next write will extend the file into a new block,
679 * and the file is currently composed of a fragment
680 * this fragment has to be extended to be a full block.
681 */
682 lastlbn = lblkno(fs, ip->i_size);
683 if (lastlbn < NDADDR && lastlbn < lbn) {
684 nb = lastlbn;
685 osize = blksize(fs, ip, nb);
686 if (osize < fs->fs_bsize && osize > 0) {
687 UFS_LOCK(ump);
688 error = ffs_realloccg(ip, nb, dp->di_db[nb],
689 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
690 &dp->di_db[0]), osize, (int)fs->fs_bsize,
691 flags, cred, &bp);
692 if (error)
693 return (error);
694 if (DOINGSOFTDEP(vp))
695 softdep_setup_allocdirect(ip, nb,
696 dbtofsb(fs, bp->b_blkno),
697 dp->di_db[nb],
698 fs->fs_bsize, osize, bp);
699 ip->i_size = smalllblktosize(fs, nb + 1);
700 dp->di_size = ip->i_size;
701 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
702 ip->i_flag |= IN_CHANGE | IN_UPDATE;
703 if (flags & IO_SYNC)
704 bwrite(bp);
705 else
706 bawrite(bp);
707 }
708 }
709 /*
710 * The first NDADDR blocks are direct blocks
711 */
712 if (lbn < NDADDR) {
713 if (flags & BA_METAONLY)
714 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
715 nb = dp->di_db[lbn];
716 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
717 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
718 gbflags, &bp);
719 if (error) {
720 brelse(bp);
721 return (error);
722 }
723 bp->b_blkno = fsbtodb(fs, nb);
724 *bpp = bp;
725 return (0);
726 }
727 if (nb != 0) {
728 /*
729 * Consider need to reallocate a fragment.
730 */
731 osize = fragroundup(fs, blkoff(fs, ip->i_size));
732 nsize = fragroundup(fs, size);
733 if (nsize <= osize) {
734 error = bread_gb(vp, lbn, osize, NOCRED,
735 gbflags, &bp);
736 if (error) {
737 brelse(bp);
738 return (error);
739 }
740 bp->b_blkno = fsbtodb(fs, nb);
741 } else {
742 UFS_LOCK(ump);
743 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
744 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
745 &dp->di_db[0]), osize, nsize, flags,
746 cred, &bp);
747 if (error)
748 return (error);
749 if (DOINGSOFTDEP(vp))
750 softdep_setup_allocdirect(ip, lbn,
751 dbtofsb(fs, bp->b_blkno), nb,
752 nsize, osize, bp);
753 }
754 } else {
755 if (ip->i_size < smalllblktosize(fs, lbn + 1))
756 nsize = fragroundup(fs, size);
757 else
758 nsize = fs->fs_bsize;
759 UFS_LOCK(ump);
760 error = ffs_alloc(ip, lbn,
761 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
762 &dp->di_db[0]), nsize, flags, cred, &newb);
763 if (error)
764 return (error);
765 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
766 bp->b_blkno = fsbtodb(fs, newb);
767 if (flags & BA_CLRBUF)
768 vfs_bio_clrbuf(bp);
769 if (DOINGSOFTDEP(vp))
770 softdep_setup_allocdirect(ip, lbn, newb, 0,
771 nsize, 0, bp);
772 }
773 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
774 ip->i_flag |= IN_CHANGE | IN_UPDATE;
775 *bpp = bp;
776 return (0);
777 }
778 /*
779 * Determine the number of levels of indirection.
780 */
781 pref = 0;
782 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
783 return(error);
784#ifdef INVARIANTS
785 if (num < 1)
786 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
787#endif
788 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
789 /*
790 * Fetch the first indirect block allocating if necessary.
791 */
792 --num;
793 nb = dp->di_ib[indirs[0].in_off];
794 allocib = NULL;
795 allocblk = allociblk;
796 lbns_remfree = lbns;
797 if (nb == 0) {
798 UFS_LOCK(ump);
799 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
800 (ufs2_daddr_t *)0);
801 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
802 flags, cred, &newb)) != 0) {
803 curthread_pflags_restore(saved_inbdflush);
804 return (error);
805 }
806 pref = newb + fs->fs_frag;
807 nb = newb;
808 *allocblk++ = nb;
809 *lbns_remfree++ = indirs[1].in_lbn;
810 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
811 GB_UNMAPPED);
812 bp->b_blkno = fsbtodb(fs, nb);
813 vfs_bio_clrbuf(bp);
814 if (DOINGSOFTDEP(vp)) {
815 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
816 newb, 0, fs->fs_bsize, 0, bp);
817 bdwrite(bp);
818 } else {
819 /*
820 * Write synchronously so that indirect blocks
821 * never point at garbage.
822 */
823 if (DOINGASYNC(vp))
824 bdwrite(bp);
825 else if ((error = bwrite(bp)) != 0)
826 goto fail;
827 }
828 allocib = &dp->di_ib[indirs[0].in_off];
829 *allocib = nb;
830 ip->i_flag |= IN_CHANGE | IN_UPDATE;
831 }
832 /*
833 * Fetch through the indirect blocks, allocating as necessary.
834 */
835retry:
836 for (i = 1;;) {
837 error = bread(vp,
838 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
839 if (error) {
840 brelse(bp);
841 goto fail;
842 }
843 bap = (ufs2_daddr_t *)bp->b_data;
844 nb = bap[indirs[i].in_off];
845 if (i == num)
846 break;
847 i += 1;
848 if (nb != 0) {
849 bqrelse(bp);
850 continue;
851 }
852 UFS_LOCK(ump);
853 if (pref == 0)
854 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
855 (ufs2_daddr_t *)0);
856 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
857 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
858 brelse(bp);
859 if (++reclaimed == 1) {
860 UFS_LOCK(ump);
861 softdep_request_cleanup(fs, vp, cred,
862 FLUSH_BLOCKS_WAIT);
863 UFS_UNLOCK(ump);
864 goto retry;
865 }
866 if (ppsratecheck(&lastfail, &curfail, 1)) {
867 ffs_fserr(fs, ip->i_number, "filesystem full");
868 uprintf("\n%s: write failed, filesystem "
869 "is full\n", fs->fs_fsmnt);
870 }
871 goto fail;
872 }
873 pref = newb + fs->fs_frag;
874 nb = newb;
875 *allocblk++ = nb;
876 *lbns_remfree++ = indirs[i].in_lbn;
877 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
878 GB_UNMAPPED);
879 nbp->b_blkno = fsbtodb(fs, nb);
880 vfs_bio_clrbuf(nbp);
881 if (DOINGSOFTDEP(vp)) {
882 softdep_setup_allocindir_meta(nbp, ip, bp,
883 indirs[i - 1].in_off, nb);
884 bdwrite(nbp);
885 } else {
886 /*
887 * Write synchronously so that indirect blocks
888 * never point at garbage.
889 */
890 if ((error = bwrite(nbp)) != 0) {
891 brelse(bp);
892 goto fail;
893 }
894 }
895 bap[indirs[i - 1].in_off] = nb;
896 if (allocib == NULL && unwindidx < 0)
897 unwindidx = i - 1;
898 /*
899 * If required, write synchronously, otherwise use
900 * delayed write.
901 */
902 if (flags & IO_SYNC) {
903 bwrite(bp);
904 } else {
905 if (bp->b_bufsize == fs->fs_bsize)
906 bp->b_flags |= B_CLUSTEROK;
907 bdwrite(bp);
908 }
909 }
910 /*
911 * If asked only for the indirect block, then return it.
912 */
913 if (flags & BA_METAONLY) {
914 curthread_pflags_restore(saved_inbdflush);
915 *bpp = bp;
916 return (0);
917 }
918 /*
919 * Get the data block, allocating if necessary.
920 */
921 if (nb == 0) {
922 UFS_LOCK(ump);
923 if (pref == 0)
924 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
925 &bap[0]);
926 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
927 flags | IO_BUFLOCKED, cred, &newb);
928 if (error) {
929 brelse(bp);
930 if (++reclaimed == 1) {
931 UFS_LOCK(ump);
932 softdep_request_cleanup(fs, vp, cred,
933 FLUSH_BLOCKS_WAIT);
934 UFS_UNLOCK(ump);
935 goto retry;
936 }
937 if (ppsratecheck(&lastfail, &curfail, 1)) {
938 ffs_fserr(fs, ip->i_number, "filesystem full");
939 uprintf("\n%s: write failed, filesystem "
940 "is full\n", fs->fs_fsmnt);
941 }
942 goto fail;
943 }
944 nb = newb;
945 *allocblk++ = nb;
946 *lbns_remfree++ = lbn;
947 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
948 nbp->b_blkno = fsbtodb(fs, nb);
949 if (flags & BA_CLRBUF)
950 vfs_bio_clrbuf(nbp);
951 if (DOINGSOFTDEP(vp))
952 softdep_setup_allocindir_page(ip, lbn, bp,
953 indirs[i].in_off, nb, 0, nbp);
954 bap[indirs[i].in_off] = nb;
955 /*
956 * If required, write synchronously, otherwise use
957 * delayed write.
958 */
959 if (flags & IO_SYNC) {
960 bwrite(bp);
961 } else {
962 if (bp->b_bufsize == fs->fs_bsize)
963 bp->b_flags |= B_CLUSTEROK;
964 bdwrite(bp);
965 }
966 curthread_pflags_restore(saved_inbdflush);
967 *bpp = nbp;
968 return (0);
969 }
970 brelse(bp);
971 /*
972 * If requested clear invalid portions of the buffer. If we
973 * have to do a read-before-write (typical if BA_CLRBUF is set),
974 * try to do some read-ahead in the sequential case to reduce
975 * the number of I/O transactions.
976 */
977 if (flags & BA_CLRBUF) {
978 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
979 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
980 error = cluster_read(vp, ip->i_size, lbn,
981 (int)fs->fs_bsize, NOCRED,
982 MAXBSIZE, seqcount, gbflags, &nbp);
983 } else {
984 error = bread_gb(vp, lbn, (int)fs->fs_bsize,
985 NOCRED, gbflags, &nbp);
986 }
987 if (error) {
988 brelse(nbp);
989 goto fail;
990 }
991 } else {
992 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
993 nbp->b_blkno = fsbtodb(fs, nb);
994 }
995 curthread_pflags_restore(saved_inbdflush);
996 *bpp = nbp;
997 return (0);
998fail:
999 curthread_pflags_restore(saved_inbdflush);
1000 /*
1001 * If we have failed to allocate any blocks, simply return the error.
1002 * This is the usual case and avoids the need to fsync the file.
1003 */
1004 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1005 return (error);
1006 /*
1007 * If we have failed part way through block allocation, we
1008 * have to deallocate any indirect blocks that we have allocated.
1009 * We have to fsync the file before we start to get rid of all
1010 * of its dependencies so that we do not leave them dangling.
1011 * We have to sync it at the end so that the soft updates code
1012 * does not find any untracked changes. Although this is really
1013 * slow, running out of disk space is not expected to be a common
1014 * occurence. The error return from fsync is ignored as we already
1015 * have an error to return to the user.
1016 *
1017 * XXX Still have to journal the free below
1018 */
1019 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1020 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1021 blkp < allocblk; blkp++, lbns_remfree++) {
1022 /*
1023 * We shall not leave the freed blocks on the vnode
1024 * buffer object lists.
1025 */
1026 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
1027 if (bp != NULL) {
1028 bp->b_flags |= (B_INVAL | B_RELBUF);
1029 bp->b_flags &= ~B_ASYNC;
1030 brelse(bp);
1031 }
1032 deallocated += fs->fs_bsize;
1033 }
1034 if (allocib != NULL) {
1035 *allocib = 0;
1036 } else if (unwindidx >= 0) {
1037 int r;
1038
1039 r = bread(vp, indirs[unwindidx].in_lbn,
1040 (int)fs->fs_bsize, NOCRED, &bp);
1041 if (r) {
1042 panic("Could not unwind indirect block, error %d", r);
1043 brelse(bp);
1044 } else {
1045 bap = (ufs2_daddr_t *)bp->b_data;
1046 bap[indirs[unwindidx].in_off] = 0;
1047 if (flags & IO_SYNC) {
1048 bwrite(bp);
1049 } else {
1050 if (bp->b_bufsize == fs->fs_bsize)
1051 bp->b_flags |= B_CLUSTEROK;
1052 bdwrite(bp);
1053 }
1054 }
1055 }
1056 if (deallocated) {
1057#ifdef QUOTA
1058 /*
1059 * Restore user's disk quota because allocation failed.
1060 */
1061 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1062#endif
1063 dp->di_blocks -= btodb(deallocated);
1064 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1065 }
1066 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1067 /*
1068 * After the buffers are invalidated and on-disk pointers are
1069 * cleared, free the blocks.
1070 */
1071 for (blkp = allociblk; blkp < allocblk; blkp++) {
1072 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
1073 ip->i_number, vp->v_type, NULL);
1074 }
1075 return (error);
1076}
457 * have an error to return to the user.
458 *
459 * XXX Still have to journal the free below
460 */
461 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
462 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
463 blkp < allocblk; blkp++, lbns_remfree++) {
464 /*
465 * We shall not leave the freed blocks on the vnode
466 * buffer object lists.
467 */
468 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
469 if (bp != NULL) {
470 bp->b_flags |= (B_INVAL | B_RELBUF);
471 bp->b_flags &= ~B_ASYNC;
472 brelse(bp);
473 }
474 deallocated += fs->fs_bsize;
475 }
476 if (allocib != NULL) {
477 *allocib = 0;
478 } else if (unwindidx >= 0) {
479 int r;
480
481 r = bread(vp, indirs[unwindidx].in_lbn,
482 (int)fs->fs_bsize, NOCRED, &bp);
483 if (r) {
484 panic("Could not unwind indirect block, error %d", r);
485 brelse(bp);
486 } else {
487 bap = (ufs1_daddr_t *)bp->b_data;
488 bap[indirs[unwindidx].in_off] = 0;
489 if (flags & IO_SYNC) {
490 bwrite(bp);
491 } else {
492 if (bp->b_bufsize == fs->fs_bsize)
493 bp->b_flags |= B_CLUSTEROK;
494 bdwrite(bp);
495 }
496 }
497 }
498 if (deallocated) {
499#ifdef QUOTA
500 /*
501 * Restore user's disk quota because allocation failed.
502 */
503 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
504#endif
505 dp->di_blocks -= btodb(deallocated);
506 ip->i_flag |= IN_CHANGE | IN_UPDATE;
507 }
508 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
509 /*
510 * After the buffers are invalidated and on-disk pointers are
511 * cleared, free the blocks.
512 */
513 for (blkp = allociblk; blkp < allocblk; blkp++) {
514 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
515 ip->i_number, vp->v_type, NULL);
516 }
517 return (error);
518}
519
520/*
521 * Balloc defines the structure of file system storage
522 * by allocating the physical blocks on a device given
523 * the inode and the logical block number in a file.
524 * This is the allocation strategy for UFS2. Above is
525 * the allocation strategy for UFS1.
526 */
527int
528ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
529 struct ucred *cred, int flags, struct buf **bpp)
530{
531 struct inode *ip;
532 struct ufs2_dinode *dp;
533 ufs_lbn_t lbn, lastlbn;
534 struct fs *fs;
535 struct buf *bp, *nbp;
536 struct ufsmount *ump;
537 struct indir indirs[NIADDR + 2];
538 ufs2_daddr_t nb, newb, *bap, pref;
539 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
540 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
541 int deallocated, osize, nsize, num, i, error;
542 int unwindidx = -1;
543 int saved_inbdflush;
544 static struct timeval lastfail;
545 static int curfail;
546 int gbflags, reclaimed;
547
548 ip = VTOI(vp);
549 dp = ip->i_din2;
550 fs = ip->i_fs;
551 ump = ip->i_ump;
552 lbn = lblkno(fs, startoffset);
553 size = blkoff(fs, startoffset) + size;
554 reclaimed = 0;
555 if (size > fs->fs_bsize)
556 panic("ffs_balloc_ufs2: blk too big");
557 *bpp = NULL;
558 if (lbn < 0)
559 return (EFBIG);
560 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
561
562 if (DOINGSOFTDEP(vp))
563 softdep_prealloc(vp, MNT_WAIT);
564
565 /*
566 * Check for allocating external data.
567 */
568 if (flags & IO_EXT) {
569 if (lbn >= NXADDR)
570 return (EFBIG);
571 /*
572 * If the next write will extend the data into a new block,
573 * and the data is currently composed of a fragment
574 * this fragment has to be extended to be a full block.
575 */
576 lastlbn = lblkno(fs, dp->di_extsize);
577 if (lastlbn < lbn) {
578 nb = lastlbn;
579 osize = sblksize(fs, dp->di_extsize, nb);
580 if (osize < fs->fs_bsize && osize > 0) {
581 UFS_LOCK(ump);
582 error = ffs_realloccg(ip, -1 - nb,
583 dp->di_extb[nb],
584 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
585 &dp->di_extb[0]), osize,
586 (int)fs->fs_bsize, flags, cred, &bp);
587 if (error)
588 return (error);
589 if (DOINGSOFTDEP(vp))
590 softdep_setup_allocext(ip, nb,
591 dbtofsb(fs, bp->b_blkno),
592 dp->di_extb[nb],
593 fs->fs_bsize, osize, bp);
594 dp->di_extsize = smalllblktosize(fs, nb + 1);
595 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
596 bp->b_xflags |= BX_ALTDATA;
597 ip->i_flag |= IN_CHANGE;
598 if (flags & IO_SYNC)
599 bwrite(bp);
600 else
601 bawrite(bp);
602 }
603 }
604 /*
605 * All blocks are direct blocks
606 */
607 if (flags & BA_METAONLY)
608 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
609 nb = dp->di_extb[lbn];
610 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
611 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
612 gbflags, &bp);
613 if (error) {
614 brelse(bp);
615 return (error);
616 }
617 bp->b_blkno = fsbtodb(fs, nb);
618 bp->b_xflags |= BX_ALTDATA;
619 *bpp = bp;
620 return (0);
621 }
622 if (nb != 0) {
623 /*
624 * Consider need to reallocate a fragment.
625 */
626 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
627 nsize = fragroundup(fs, size);
628 if (nsize <= osize) {
629 error = bread_gb(vp, -1 - lbn, osize, NOCRED,
630 gbflags, &bp);
631 if (error) {
632 brelse(bp);
633 return (error);
634 }
635 bp->b_blkno = fsbtodb(fs, nb);
636 bp->b_xflags |= BX_ALTDATA;
637 } else {
638 UFS_LOCK(ump);
639 error = ffs_realloccg(ip, -1 - lbn,
640 dp->di_extb[lbn],
641 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
642 &dp->di_extb[0]), osize, nsize, flags,
643 cred, &bp);
644 if (error)
645 return (error);
646 bp->b_xflags |= BX_ALTDATA;
647 if (DOINGSOFTDEP(vp))
648 softdep_setup_allocext(ip, lbn,
649 dbtofsb(fs, bp->b_blkno), nb,
650 nsize, osize, bp);
651 }
652 } else {
653 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
654 nsize = fragroundup(fs, size);
655 else
656 nsize = fs->fs_bsize;
657 UFS_LOCK(ump);
658 error = ffs_alloc(ip, lbn,
659 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
660 nsize, flags, cred, &newb);
661 if (error)
662 return (error);
663 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
664 bp->b_blkno = fsbtodb(fs, newb);
665 bp->b_xflags |= BX_ALTDATA;
666 if (flags & BA_CLRBUF)
667 vfs_bio_clrbuf(bp);
668 if (DOINGSOFTDEP(vp))
669 softdep_setup_allocext(ip, lbn, newb, 0,
670 nsize, 0, bp);
671 }
672 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
673 ip->i_flag |= IN_CHANGE;
674 *bpp = bp;
675 return (0);
676 }
677 /*
678 * If the next write will extend the file into a new block,
679 * and the file is currently composed of a fragment
680 * this fragment has to be extended to be a full block.
681 */
682 lastlbn = lblkno(fs, ip->i_size);
683 if (lastlbn < NDADDR && lastlbn < lbn) {
684 nb = lastlbn;
685 osize = blksize(fs, ip, nb);
686 if (osize < fs->fs_bsize && osize > 0) {
687 UFS_LOCK(ump);
688 error = ffs_realloccg(ip, nb, dp->di_db[nb],
689 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
690 &dp->di_db[0]), osize, (int)fs->fs_bsize,
691 flags, cred, &bp);
692 if (error)
693 return (error);
694 if (DOINGSOFTDEP(vp))
695 softdep_setup_allocdirect(ip, nb,
696 dbtofsb(fs, bp->b_blkno),
697 dp->di_db[nb],
698 fs->fs_bsize, osize, bp);
699 ip->i_size = smalllblktosize(fs, nb + 1);
700 dp->di_size = ip->i_size;
701 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
702 ip->i_flag |= IN_CHANGE | IN_UPDATE;
703 if (flags & IO_SYNC)
704 bwrite(bp);
705 else
706 bawrite(bp);
707 }
708 }
709 /*
710 * The first NDADDR blocks are direct blocks
711 */
712 if (lbn < NDADDR) {
713 if (flags & BA_METAONLY)
714 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
715 nb = dp->di_db[lbn];
716 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
717 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
718 gbflags, &bp);
719 if (error) {
720 brelse(bp);
721 return (error);
722 }
723 bp->b_blkno = fsbtodb(fs, nb);
724 *bpp = bp;
725 return (0);
726 }
727 if (nb != 0) {
728 /*
729 * Consider need to reallocate a fragment.
730 */
731 osize = fragroundup(fs, blkoff(fs, ip->i_size));
732 nsize = fragroundup(fs, size);
733 if (nsize <= osize) {
734 error = bread_gb(vp, lbn, osize, NOCRED,
735 gbflags, &bp);
736 if (error) {
737 brelse(bp);
738 return (error);
739 }
740 bp->b_blkno = fsbtodb(fs, nb);
741 } else {
742 UFS_LOCK(ump);
743 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
744 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
745 &dp->di_db[0]), osize, nsize, flags,
746 cred, &bp);
747 if (error)
748 return (error);
749 if (DOINGSOFTDEP(vp))
750 softdep_setup_allocdirect(ip, lbn,
751 dbtofsb(fs, bp->b_blkno), nb,
752 nsize, osize, bp);
753 }
754 } else {
755 if (ip->i_size < smalllblktosize(fs, lbn + 1))
756 nsize = fragroundup(fs, size);
757 else
758 nsize = fs->fs_bsize;
759 UFS_LOCK(ump);
760 error = ffs_alloc(ip, lbn,
761 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
762 &dp->di_db[0]), nsize, flags, cred, &newb);
763 if (error)
764 return (error);
765 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
766 bp->b_blkno = fsbtodb(fs, newb);
767 if (flags & BA_CLRBUF)
768 vfs_bio_clrbuf(bp);
769 if (DOINGSOFTDEP(vp))
770 softdep_setup_allocdirect(ip, lbn, newb, 0,
771 nsize, 0, bp);
772 }
773 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
774 ip->i_flag |= IN_CHANGE | IN_UPDATE;
775 *bpp = bp;
776 return (0);
777 }
778 /*
779 * Determine the number of levels of indirection.
780 */
781 pref = 0;
782 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
783 return(error);
784#ifdef INVARIANTS
785 if (num < 1)
786 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
787#endif
788 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
789 /*
790 * Fetch the first indirect block allocating if necessary.
791 */
792 --num;
793 nb = dp->di_ib[indirs[0].in_off];
794 allocib = NULL;
795 allocblk = allociblk;
796 lbns_remfree = lbns;
797 if (nb == 0) {
798 UFS_LOCK(ump);
799 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
800 (ufs2_daddr_t *)0);
801 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
802 flags, cred, &newb)) != 0) {
803 curthread_pflags_restore(saved_inbdflush);
804 return (error);
805 }
806 pref = newb + fs->fs_frag;
807 nb = newb;
808 *allocblk++ = nb;
809 *lbns_remfree++ = indirs[1].in_lbn;
810 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
811 GB_UNMAPPED);
812 bp->b_blkno = fsbtodb(fs, nb);
813 vfs_bio_clrbuf(bp);
814 if (DOINGSOFTDEP(vp)) {
815 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
816 newb, 0, fs->fs_bsize, 0, bp);
817 bdwrite(bp);
818 } else {
819 /*
820 * Write synchronously so that indirect blocks
821 * never point at garbage.
822 */
823 if (DOINGASYNC(vp))
824 bdwrite(bp);
825 else if ((error = bwrite(bp)) != 0)
826 goto fail;
827 }
828 allocib = &dp->di_ib[indirs[0].in_off];
829 *allocib = nb;
830 ip->i_flag |= IN_CHANGE | IN_UPDATE;
831 }
832 /*
833 * Fetch through the indirect blocks, allocating as necessary.
834 */
835retry:
836 for (i = 1;;) {
837 error = bread(vp,
838 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
839 if (error) {
840 brelse(bp);
841 goto fail;
842 }
843 bap = (ufs2_daddr_t *)bp->b_data;
844 nb = bap[indirs[i].in_off];
845 if (i == num)
846 break;
847 i += 1;
848 if (nb != 0) {
849 bqrelse(bp);
850 continue;
851 }
852 UFS_LOCK(ump);
853 if (pref == 0)
854 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
855 (ufs2_daddr_t *)0);
856 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
857 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
858 brelse(bp);
859 if (++reclaimed == 1) {
860 UFS_LOCK(ump);
861 softdep_request_cleanup(fs, vp, cred,
862 FLUSH_BLOCKS_WAIT);
863 UFS_UNLOCK(ump);
864 goto retry;
865 }
866 if (ppsratecheck(&lastfail, &curfail, 1)) {
867 ffs_fserr(fs, ip->i_number, "filesystem full");
868 uprintf("\n%s: write failed, filesystem "
869 "is full\n", fs->fs_fsmnt);
870 }
871 goto fail;
872 }
873 pref = newb + fs->fs_frag;
874 nb = newb;
875 *allocblk++ = nb;
876 *lbns_remfree++ = indirs[i].in_lbn;
877 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
878 GB_UNMAPPED);
879 nbp->b_blkno = fsbtodb(fs, nb);
880 vfs_bio_clrbuf(nbp);
881 if (DOINGSOFTDEP(vp)) {
882 softdep_setup_allocindir_meta(nbp, ip, bp,
883 indirs[i - 1].in_off, nb);
884 bdwrite(nbp);
885 } else {
886 /*
887 * Write synchronously so that indirect blocks
888 * never point at garbage.
889 */
890 if ((error = bwrite(nbp)) != 0) {
891 brelse(bp);
892 goto fail;
893 }
894 }
895 bap[indirs[i - 1].in_off] = nb;
896 if (allocib == NULL && unwindidx < 0)
897 unwindidx = i - 1;
898 /*
899 * If required, write synchronously, otherwise use
900 * delayed write.
901 */
902 if (flags & IO_SYNC) {
903 bwrite(bp);
904 } else {
905 if (bp->b_bufsize == fs->fs_bsize)
906 bp->b_flags |= B_CLUSTEROK;
907 bdwrite(bp);
908 }
909 }
910 /*
911 * If asked only for the indirect block, then return it.
912 */
913 if (flags & BA_METAONLY) {
914 curthread_pflags_restore(saved_inbdflush);
915 *bpp = bp;
916 return (0);
917 }
918 /*
919 * Get the data block, allocating if necessary.
920 */
921 if (nb == 0) {
922 UFS_LOCK(ump);
923 if (pref == 0)
924 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
925 &bap[0]);
926 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
927 flags | IO_BUFLOCKED, cred, &newb);
928 if (error) {
929 brelse(bp);
930 if (++reclaimed == 1) {
931 UFS_LOCK(ump);
932 softdep_request_cleanup(fs, vp, cred,
933 FLUSH_BLOCKS_WAIT);
934 UFS_UNLOCK(ump);
935 goto retry;
936 }
937 if (ppsratecheck(&lastfail, &curfail, 1)) {
938 ffs_fserr(fs, ip->i_number, "filesystem full");
939 uprintf("\n%s: write failed, filesystem "
940 "is full\n", fs->fs_fsmnt);
941 }
942 goto fail;
943 }
944 nb = newb;
945 *allocblk++ = nb;
946 *lbns_remfree++ = lbn;
947 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
948 nbp->b_blkno = fsbtodb(fs, nb);
949 if (flags & BA_CLRBUF)
950 vfs_bio_clrbuf(nbp);
951 if (DOINGSOFTDEP(vp))
952 softdep_setup_allocindir_page(ip, lbn, bp,
953 indirs[i].in_off, nb, 0, nbp);
954 bap[indirs[i].in_off] = nb;
955 /*
956 * If required, write synchronously, otherwise use
957 * delayed write.
958 */
959 if (flags & IO_SYNC) {
960 bwrite(bp);
961 } else {
962 if (bp->b_bufsize == fs->fs_bsize)
963 bp->b_flags |= B_CLUSTEROK;
964 bdwrite(bp);
965 }
966 curthread_pflags_restore(saved_inbdflush);
967 *bpp = nbp;
968 return (0);
969 }
970 brelse(bp);
971 /*
972 * If requested clear invalid portions of the buffer. If we
973 * have to do a read-before-write (typical if BA_CLRBUF is set),
974 * try to do some read-ahead in the sequential case to reduce
975 * the number of I/O transactions.
976 */
977 if (flags & BA_CLRBUF) {
978 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
979 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
980 error = cluster_read(vp, ip->i_size, lbn,
981 (int)fs->fs_bsize, NOCRED,
982 MAXBSIZE, seqcount, gbflags, &nbp);
983 } else {
984 error = bread_gb(vp, lbn, (int)fs->fs_bsize,
985 NOCRED, gbflags, &nbp);
986 }
987 if (error) {
988 brelse(nbp);
989 goto fail;
990 }
991 } else {
992 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
993 nbp->b_blkno = fsbtodb(fs, nb);
994 }
995 curthread_pflags_restore(saved_inbdflush);
996 *bpp = nbp;
997 return (0);
998fail:
999 curthread_pflags_restore(saved_inbdflush);
1000 /*
1001 * If we have failed to allocate any blocks, simply return the error.
1002 * This is the usual case and avoids the need to fsync the file.
1003 */
1004 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1005 return (error);
1006 /*
1007 * If we have failed part way through block allocation, we
1008 * have to deallocate any indirect blocks that we have allocated.
1009 * We have to fsync the file before we start to get rid of all
1010 * of its dependencies so that we do not leave them dangling.
1011 * We have to sync it at the end so that the soft updates code
1012 * does not find any untracked changes. Although this is really
1013 * slow, running out of disk space is not expected to be a common
1014 * occurence. The error return from fsync is ignored as we already
1015 * have an error to return to the user.
1016 *
1017 * XXX Still have to journal the free below
1018 */
1019 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1020 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1021 blkp < allocblk; blkp++, lbns_remfree++) {
1022 /*
1023 * We shall not leave the freed blocks on the vnode
1024 * buffer object lists.
1025 */
1026 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
1027 if (bp != NULL) {
1028 bp->b_flags |= (B_INVAL | B_RELBUF);
1029 bp->b_flags &= ~B_ASYNC;
1030 brelse(bp);
1031 }
1032 deallocated += fs->fs_bsize;
1033 }
1034 if (allocib != NULL) {
1035 *allocib = 0;
1036 } else if (unwindidx >= 0) {
1037 int r;
1038
1039 r = bread(vp, indirs[unwindidx].in_lbn,
1040 (int)fs->fs_bsize, NOCRED, &bp);
1041 if (r) {
1042 panic("Could not unwind indirect block, error %d", r);
1043 brelse(bp);
1044 } else {
1045 bap = (ufs2_daddr_t *)bp->b_data;
1046 bap[indirs[unwindidx].in_off] = 0;
1047 if (flags & IO_SYNC) {
1048 bwrite(bp);
1049 } else {
1050 if (bp->b_bufsize == fs->fs_bsize)
1051 bp->b_flags |= B_CLUSTEROK;
1052 bdwrite(bp);
1053 }
1054 }
1055 }
1056 if (deallocated) {
1057#ifdef QUOTA
1058 /*
1059 * Restore user's disk quota because allocation failed.
1060 */
1061 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1062#endif
1063 dp->di_blocks -= btodb(deallocated);
1064 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1065 }
1066 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1067 /*
1068 * After the buffers are invalidated and on-disk pointers are
1069 * cleared, free the blocks.
1070 */
1071 for (blkp = allociblk; blkp < allocblk; blkp++) {
1072 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
1073 ip->i_number, vp->v_type, NULL);
1074 }
1075 return (error);
1076}