ufs_bmap.c revision 284021
1/*-
2 * Copyright (c) 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)ufs_bmap.c	8.7 (Berkeley) 3/21/95
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_bmap.c 284021 2015-06-05 08:36:25Z kib $");
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bio.h>
43#include <sys/buf.h>
44#include <sys/proc.h>
45#include <sys/vnode.h>
46#include <sys/mount.h>
47#include <sys/resourcevar.h>
48#include <sys/stat.h>
49
50#include <ufs/ufs/extattr.h>
51#include <ufs/ufs/quota.h>
52#include <ufs/ufs/inode.h>
53#include <ufs/ufs/ufsmount.h>
54#include <ufs/ufs/ufs_extern.h>
55
56/*
57 * Bmap converts the logical block number of a file to its physical block
58 * number on the disk. The conversion is done by using the logical block
59 * number to index into the array of block pointers described by the dinode.
60 */
61int
62ufs_bmap(ap)
63	struct vop_bmap_args /* {
64		struct vnode *a_vp;
65		daddr_t a_bn;
66		struct bufobj **a_bop;
67		daddr_t *a_bnp;
68		int *a_runp;
69		int *a_runb;
70	} */ *ap;
71{
72	ufs2_daddr_t blkno;
73	int error;
74
75	/*
76	 * Check for underlying vnode requests and ensure that logical
77	 * to physical mapping is requested.
78	 */
79	if (ap->a_bop != NULL)
80		*ap->a_bop = &VTOI(ap->a_vp)->i_devvp->v_bufobj;
81	if (ap->a_bnp == NULL)
82		return (0);
83
84	error = ufs_bmaparray(ap->a_vp, ap->a_bn, &blkno, NULL,
85	    ap->a_runp, ap->a_runb);
86	*ap->a_bnp = blkno;
87	return (error);
88}
89
90/*
91 * Indirect blocks are now on the vnode for the file.  They are given negative
92 * logical block numbers.  Indirect blocks are addressed by the negative
93 * address of the first data block to which they point.  Double indirect blocks
94 * are addressed by one less than the address of the first indirect block to
95 * which they point.  Triple indirect blocks are addressed by one less than
96 * the address of the first double indirect block to which they point.
97 *
98 * ufs_bmaparray does the bmap conversion, and if requested returns the
99 * array of logical blocks which must be traversed to get to a block.
100 * Each entry contains the offset into that block that gets you to the
101 * next block and the disk address of the block (if it is assigned).
102 */
103
104int
105ufs_bmaparray(vp, bn, bnp, nbp, runp, runb)
106	struct vnode *vp;
107	ufs2_daddr_t bn;
108	ufs2_daddr_t *bnp;
109	struct buf *nbp;
110	int *runp;
111	int *runb;
112{
113	struct inode *ip;
114	struct buf *bp;
115	struct ufsmount *ump;
116	struct mount *mp;
117	struct indir a[NIADDR+1], *ap;
118	ufs2_daddr_t daddr;
119	ufs_lbn_t metalbn;
120	int error, num, maxrun = 0;
121	int *nump;
122
123	ap = NULL;
124	ip = VTOI(vp);
125	mp = vp->v_mount;
126	ump = VFSTOUFS(mp);
127
128	if (runp) {
129		maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
130		*runp = 0;
131	}
132
133	if (runb) {
134		*runb = 0;
135	}
136
137
138	ap = a;
139	nump = &num;
140	error = ufs_getlbns(vp, bn, ap, nump);
141	if (error)
142		return (error);
143
144	num = *nump;
145	if (num == 0) {
146		if (bn >= 0 && bn < NDADDR) {
147			*bnp = blkptrtodb(ump, DIP(ip, i_db[bn]));
148		} else if (bn < 0 && bn >= -NXADDR) {
149			*bnp = blkptrtodb(ump, ip->i_din2->di_extb[-1 - bn]);
150			if (*bnp == 0)
151				*bnp = -1;
152			if (nbp == NULL)
153				panic("ufs_bmaparray: mapping ext data");
154			nbp->b_xflags |= BX_ALTDATA;
155			return (0);
156		} else {
157			panic("ufs_bmaparray: blkno out of range");
158		}
159		/*
160		 * Since this is FFS independent code, we are out of
161		 * scope for the definitions of BLK_NOCOPY and
162		 * BLK_SNAP, but we do know that they will fall in
163		 * the range 1..um_seqinc, so we use that test and
164		 * return a request for a zeroed out buffer if attempts
165		 * are made to read a BLK_NOCOPY or BLK_SNAP block.
166		 */
167		if ((ip->i_flags & SF_SNAPSHOT) && DIP(ip, i_db[bn]) > 0 &&
168		    DIP(ip, i_db[bn]) < ump->um_seqinc) {
169			*bnp = -1;
170		} else if (*bnp == 0) {
171			if (ip->i_flags & SF_SNAPSHOT)
172				*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
173			else
174				*bnp = -1;
175		} else if (runp) {
176			ufs2_daddr_t bnb = bn;
177			for (++bn; bn < NDADDR && *runp < maxrun &&
178			    is_sequential(ump, DIP(ip, i_db[bn - 1]),
179			    DIP(ip, i_db[bn]));
180			    ++bn, ++*runp);
181			bn = bnb;
182			if (runb && (bn > 0)) {
183				for (--bn; (bn >= 0) && (*runb < maxrun) &&
184					is_sequential(ump, DIP(ip, i_db[bn]),
185						DIP(ip, i_db[bn+1]));
186						--bn, ++*runb);
187			}
188		}
189		return (0);
190	}
191
192
193	/* Get disk address out of indirect block array */
194	daddr = DIP(ip, i_ib[ap->in_off]);
195
196	for (bp = NULL, ++ap; --num; ++ap) {
197		/*
198		 * Exit the loop if there is no disk address assigned yet and
199		 * the indirect block isn't in the cache, or if we were
200		 * looking for an indirect block and we've found it.
201		 */
202
203		metalbn = ap->in_lbn;
204		if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn)
205			break;
206		/*
207		 * If we get here, we've either got the block in the cache
208		 * or we have a disk address for it, go fetch it.
209		 */
210		if (bp)
211			bqrelse(bp);
212
213		bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0, 0);
214		if ((bp->b_flags & B_CACHE) == 0) {
215#ifdef INVARIANTS
216			if (!daddr)
217				panic("ufs_bmaparray: indirect block not in cache");
218#endif
219			bp->b_blkno = blkptrtodb(ump, daddr);
220			bp->b_iocmd = BIO_READ;
221			bp->b_flags &= ~B_INVAL;
222			bp->b_ioflags &= ~BIO_ERROR;
223			vfs_busy_pages(bp, 0);
224			bp->b_iooffset = dbtob(bp->b_blkno);
225			bstrategy(bp);
226			curthread->td_ru.ru_inblock++;
227			error = bufwait(bp);
228			if (error) {
229				brelse(bp);
230				return (error);
231			}
232		}
233
234		if (ip->i_ump->um_fstype == UFS1) {
235			daddr = ((ufs1_daddr_t *)bp->b_data)[ap->in_off];
236			if (num == 1 && daddr && runp) {
237				for (bn = ap->in_off + 1;
238				    bn < MNINDIR(ump) && *runp < maxrun &&
239				    is_sequential(ump,
240				    ((ufs1_daddr_t *)bp->b_data)[bn - 1],
241				    ((ufs1_daddr_t *)bp->b_data)[bn]);
242				    ++bn, ++*runp);
243				bn = ap->in_off;
244				if (runb && bn) {
245					for (--bn; bn >= 0 && *runb < maxrun &&
246					    is_sequential(ump,
247					    ((ufs1_daddr_t *)bp->b_data)[bn],
248					    ((ufs1_daddr_t *)bp->b_data)[bn+1]);
249					    --bn, ++*runb);
250				}
251			}
252			continue;
253		}
254		daddr = ((ufs2_daddr_t *)bp->b_data)[ap->in_off];
255		if (num == 1 && daddr && runp) {
256			for (bn = ap->in_off + 1;
257			    bn < MNINDIR(ump) && *runp < maxrun &&
258			    is_sequential(ump,
259			    ((ufs2_daddr_t *)bp->b_data)[bn - 1],
260			    ((ufs2_daddr_t *)bp->b_data)[bn]);
261			    ++bn, ++*runp);
262			bn = ap->in_off;
263			if (runb && bn) {
264				for (--bn; bn >= 0 && *runb < maxrun &&
265				    is_sequential(ump,
266				    ((ufs2_daddr_t *)bp->b_data)[bn],
267				    ((ufs2_daddr_t *)bp->b_data)[bn + 1]);
268				    --bn, ++*runb);
269			}
270		}
271	}
272	if (bp)
273		bqrelse(bp);
274
275	/*
276	 * Since this is FFS independent code, we are out of scope for the
277	 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they
278	 * will fall in the range 1..um_seqinc, so we use that test and
279	 * return a request for a zeroed out buffer if attempts are made
280	 * to read a BLK_NOCOPY or BLK_SNAP block.
281	 */
282	if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){
283		*bnp = -1;
284		return (0);
285	}
286	*bnp = blkptrtodb(ump, daddr);
287	if (*bnp == 0) {
288		if (ip->i_flags & SF_SNAPSHOT)
289			*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
290		else
291			*bnp = -1;
292	}
293	return (0);
294}
295
296/*
297 * Create an array of logical block number/offset pairs which represent the
298 * path of indirect blocks required to access a data block.  The first "pair"
299 * contains the logical block number of the appropriate single, double or
300 * triple indirect block and the offset into the inode indirect block array.
301 * Note, the logical block number of the inode single/double/triple indirect
302 * block appears twice in the array, once with the offset into the i_ib and
303 * once with the offset into the page itself.
304 */
305int
306ufs_getlbns(vp, bn, ap, nump)
307	struct vnode *vp;
308	ufs2_daddr_t bn;
309	struct indir *ap;
310	int *nump;
311{
312	ufs2_daddr_t blockcnt;
313	ufs_lbn_t metalbn, realbn;
314	struct ufsmount *ump;
315	int i, numlevels, off;
316
317	ump = VFSTOUFS(vp->v_mount);
318	if (nump)
319		*nump = 0;
320	numlevels = 0;
321	realbn = bn;
322	if (bn < 0)
323		bn = -bn;
324
325	/* The first NDADDR blocks are direct blocks. */
326	if (bn < NDADDR)
327		return (0);
328
329	/*
330	 * Determine the number of levels of indirection.  After this loop
331	 * is done, blockcnt indicates the number of data blocks possible
332	 * at the previous level of indirection, and NIADDR - i is the number
333	 * of levels of indirection needed to locate the requested block.
334	 */
335	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
336		if (i == 0)
337			return (EFBIG);
338		blockcnt *= MNINDIR(ump);
339		if (bn < blockcnt)
340			break;
341	}
342
343	/* Calculate the address of the first meta-block. */
344	if (realbn >= 0)
345		metalbn = -(realbn - bn + NIADDR - i);
346	else
347		metalbn = -(-realbn - bn + NIADDR - i);
348
349	/*
350	 * At each iteration, off is the offset into the bap array which is
351	 * an array of disk addresses at the current level of indirection.
352	 * The logical block number and the offset in that block are stored
353	 * into the argument array.
354	 */
355	ap->in_lbn = metalbn;
356	ap->in_off = off = NIADDR - i;
357	ap++;
358	for (++numlevels; i <= NIADDR; i++) {
359		/* If searching for a meta-data block, quit when found. */
360		if (metalbn == realbn)
361			break;
362
363		blockcnt /= MNINDIR(ump);
364		off = (bn / blockcnt) % MNINDIR(ump);
365
366		++numlevels;
367		ap->in_lbn = metalbn;
368		ap->in_off = off;
369		++ap;
370
371		metalbn -= -1 + off * blockcnt;
372	}
373	if (nump)
374		*nump = numlevels;
375	return (0);
376}
377