1139778Simp/*-
21541Srgrimes * Copyright (c) 1989, 1991, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes * (c) UNIX System Laboratories, Inc.
51541Srgrimes * All or some portions of this file are derived from material licensed
61541Srgrimes * to the University of California by American Telephone and Telegraph
71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with
81541Srgrimes * the permission of UNIX System Laboratories, Inc.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 4. Neither the name of the University nor the names of its contributors
191541Srgrimes *    may be used to endorse or promote products derived from this software
201541Srgrimes *    without specific prior written permission.
211541Srgrimes *
221541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
231541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
241541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
251541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
261541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
271541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
281541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
291541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
301541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
311541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
321541Srgrimes * SUCH DAMAGE.
331541Srgrimes *
3422521Sdyson *	@(#)ufs_bmap.c	8.7 (Berkeley) 3/21/95
3550477Speter * $FreeBSD$
361541Srgrimes */
371541Srgrimes
381541Srgrimes#include <sys/param.h>
397430Sbde#include <sys/systm.h>
4060041Sphk#include <sys/bio.h>
411541Srgrimes#include <sys/buf.h>
421541Srgrimes#include <sys/proc.h>
431541Srgrimes#include <sys/vnode.h>
441541Srgrimes#include <sys/mount.h>
451541Srgrimes#include <sys/resourcevar.h>
4662976Smckusick#include <sys/stat.h>
471541Srgrimes
48202283Slulf#include <fs/ext2fs/inode.h>
49254260Spfg#include <fs/ext2fs/fs.h>
50202283Slulf#include <fs/ext2fs/ext2fs.h>
51254260Spfg#include <fs/ext2fs/ext2_dinode.h>
52251344Spfg#include <fs/ext2fs/ext2_extern.h>
53202283Slulf#include <fs/ext2fs/ext2_mount.h>
541541Srgrimes
55254260Spfgstatic int ext4_bmapext(struct vnode *, int32_t, int64_t *, int *, int *);
56254260Spfg
571541Srgrimes/*
58202283Slulf * Bmap converts the logical block number of a file to its physical block
591541Srgrimes * number on the disk. The conversion is done by using the logical block
601541Srgrimes * number to index into the array of block pointers described by the dinode.
611541Srgrimes */
621541Srgrimesint
63246634Spfgext2_bmap(struct vop_bmap_args *ap)
641541Srgrimes{
65254283Spfg	daddr_t blkno;
6692363Smckusick	int error;
6792363Smckusick
681541Srgrimes	/*
691541Srgrimes	 * Check for underlying vnode requests and ensure that logical
701541Srgrimes	 * to physical mapping is requested.
711541Srgrimes	 */
72137726Sphk	if (ap->a_bop != NULL)
73137726Sphk		*ap->a_bop = &VTOI(ap->a_vp)->i_devvp->v_bufobj;
741541Srgrimes	if (ap->a_bnp == NULL)
751541Srgrimes		return (0);
761541Srgrimes
77254260Spfg	if (VTOI(ap->a_vp)->i_flags & EXT4_EXTENTS)
78254260Spfg		error = ext4_bmapext(ap->a_vp, ap->a_bn, &blkno,
79254260Spfg		    ap->a_runp, ap->a_runb);
80254260Spfg	else
81254260Spfg		error = ext2_bmaparray(ap->a_vp, ap->a_bn, &blkno,
82254260Spfg		    ap->a_runp, ap->a_runb);
8392363Smckusick	*ap->a_bnp = blkno;
8492363Smckusick	return (error);
851541Srgrimes}
861541Srgrimes
871541Srgrimes/*
88254260Spfg * This function converts the logical block number of a file to
89254260Spfg * its physical block number on the disk within ext4 extents.
90254260Spfg */
91254260Spfgstatic int
92254260Spfgext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb)
93254260Spfg{
94254260Spfg	struct inode *ip;
95254260Spfg	struct m_ext2fs *fs;
96254260Spfg	struct ext4_extent *ep;
97254260Spfg	struct ext4_extent_path path;
98254260Spfg	daddr_t lbn;
99254260Spfg
100254260Spfg	ip = VTOI(vp);
101254260Spfg	fs = ip->i_e2fs;
102254260Spfg	lbn = bn;
103254260Spfg
104254260Spfg	/*
105254260Spfg	 * TODO: need to implement read ahead to improve the performance.
106254260Spfg	 */
107254260Spfg	if (runp != NULL)
108254260Spfg		*runp = 0;
109254260Spfg
110254260Spfg	if (runb != NULL)
111254260Spfg		*runb = 0;
112254260Spfg
113254260Spfg	ext4_ext_find_extent(fs, ip, lbn, &path);
114254260Spfg	ep = path.ep_ext;
115254260Spfg	if (ep == NULL)
116254260Spfg		return (EIO);
117254260Spfg
118254260Spfg	*bnp = fsbtodb(fs, lbn - ep->e_blk +
119254260Spfg	    (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32));
120254260Spfg
121254260Spfg	if (*bnp == 0)
122254260Spfg		*bnp = -1;
123254260Spfg
124254260Spfg	return (0);
125254260Spfg}
126254260Spfg
127254260Spfg/*
1281541Srgrimes * Indirect blocks are now on the vnode for the file.  They are given negative
1291541Srgrimes * logical block numbers.  Indirect blocks are addressed by the negative
1301541Srgrimes * address of the first data block to which they point.  Double indirect blocks
1311541Srgrimes * are addressed by one less than the address of the first indirect block to
1321541Srgrimes * which they point.  Triple indirect blocks are addressed by one less than
1331541Srgrimes * the address of the first double indirect block to which they point.
1341541Srgrimes *
135202283Slulf * ext2_bmaparray does the bmap conversion, and if requested returns the
1361541Srgrimes * array of logical blocks which must be traversed to get to a block.
1371541Srgrimes * Each entry contains the offset into that block that gets you to the
1381541Srgrimes * next block and the disk address of the block (if it is assigned).
1391541Srgrimes */
1401541Srgrimes
1411541Srgrimesint
142254283Spfgext2_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, int *runp, int *runb)
1431541Srgrimes{
14496506Sphk	struct inode *ip;
1451541Srgrimes	struct buf *bp;
14696596Siedowse	struct ext2mount *ump;
1471541Srgrimes	struct mount *mp;
1481541Srgrimes	struct vnode *devvp;
14976128Sphk	struct indir a[NIADDR+1], *ap;
150252103Spfg	daddr_t daddr;
151252103Spfg	e2fs_lbn_t metalbn;
152137039Sphk	int error, num, maxrun = 0, bsize;
15376128Sphk	int *nump;
1541541Srgrimes
15576128Sphk	ap = NULL;
1561541Srgrimes	ip = VTOI(vp);
1571541Srgrimes	mp = vp->v_mount;
15896596Siedowse	ump = VFSTOEXT2(mp);
15951483Sphk	devvp = ump->um_devvp;
1601541Srgrimes
161137039Sphk	bsize = EXT2_BLOCK_SIZE(ump->um_e2fs);
162137039Sphk
1631541Srgrimes	if (runp) {
164137039Sphk		maxrun = mp->mnt_iosize_max / bsize - 1;
16532724Sdyson		*runp = 0;
16632724Sdyson	}
16732724Sdyson
16832724Sdyson	if (runb) {
16932724Sdyson		*runb = 0;
17032724Sdyson	}
17132724Sdyson
17232724Sdyson
17376128Sphk	ap = a;
17476128Sphk	nump = &num;
17596596Siedowse	error = ext2_getlbns(vp, bn, ap, nump);
1763427Sphk	if (error)
1771541Srgrimes		return (error);
1781541Srgrimes
1791541Srgrimes	num = *nump;
1801541Srgrimes	if (num == 0) {
1811541Srgrimes		*bnp = blkptrtodb(ump, ip->i_db[bn]);
18296596Siedowse		if (*bnp == 0) {
18363788Smckusick			*bnp = -1;
18462976Smckusick		} else if (runp) {
185254283Spfg			daddr_t bnb = bn;
1861541Srgrimes			for (++bn; bn < NDADDR && *runp < maxrun &&
1871541Srgrimes			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
1881541Srgrimes			    ++bn, ++*runp);
18910551Sdyson			bn = bnb;
19010551Sdyson			if (runb && (bn > 0)) {
19110551Sdyson				for (--bn; (bn >= 0) && (*runb < maxrun) &&
19210551Sdyson					is_sequential(ump, ip->i_db[bn],
193254283Spfg						ip->i_db[bn + 1]);
19410551Sdyson						--bn, ++*runb);
19510551Sdyson			}
19610551Sdyson		}
1971541Srgrimes		return (0);
1981541Srgrimes	}
1991541Srgrimes
2001541Srgrimes
2011541Srgrimes	/* Get disk address out of indirect block array */
20276128Sphk	daddr = ip->i_ib[ap->in_off];
2031541Srgrimes
20476128Sphk	for (bp = NULL, ++ap; --num; ++ap) {
2058876Srgrimes		/*
2061541Srgrimes		 * Exit the loop if there is no disk address assigned yet and
2071541Srgrimes		 * the indirect block isn't in the cache, or if we were
2081541Srgrimes		 * looking for an indirect block and we've found it.
2091541Srgrimes		 */
2101541Srgrimes
21176128Sphk		metalbn = ap->in_lbn;
212136767Sphk		if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn)
2131541Srgrimes			break;
2141541Srgrimes		/*
2151541Srgrimes		 * If we get here, we've either got the block in the cache
2161541Srgrimes		 * or we have a disk address for it, go fetch it.
2171541Srgrimes		 */
2181541Srgrimes		if (bp)
21913490Sdyson			bqrelse(bp);
2201541Srgrimes
221137039Sphk		bp = getblk(vp, metalbn, bsize, 0, 0, 0);
2226875Sdg		if ((bp->b_flags & B_CACHE) == 0) {
223251823Spfg#ifdef INVARIANTS
2246875Sdg			if (!daddr)
225239372Skevlo				panic("ext2_bmaparray: indirect block not in cache");
2261541Srgrimes#endif
2271541Srgrimes			bp->b_blkno = blkptrtodb(ump, daddr);
22858345Sphk			bp->b_iocmd = BIO_READ;
22958934Sphk			bp->b_flags &= ~B_INVAL;
23058934Sphk			bp->b_ioflags &= ~BIO_ERROR;
2315455Sdg			vfs_busy_pages(bp, 0);
232121205Sphk			bp->b_iooffset = dbtob(bp->b_blkno);
233136927Sphk			bstrategy(bp);
234170174Sjeff			curthread->td_ru.ru_inblock++;
23559762Sphk			error = bufwait(bp);
2363427Sphk			if (error) {
2371541Srgrimes				brelse(bp);
2381541Srgrimes				return (error);
2391541Srgrimes			}
2401541Srgrimes		}
2411541Srgrimes
242254283Spfg		daddr = ((e2fs_daddr_t *)bp->b_data)[ap->in_off];
24310551Sdyson		if (num == 1 && daddr && runp) {
24476128Sphk			for (bn = ap->in_off + 1;
2451541Srgrimes			    bn < MNINDIR(ump) && *runp < maxrun &&
24622521Sdyson			    is_sequential(ump,
247254283Spfg			    ((e2fs_daddr_t *)bp->b_data)[bn - 1],
248254283Spfg			    ((e2fs_daddr_t *)bp->b_data)[bn]);
2491541Srgrimes			    ++bn, ++*runp);
25076128Sphk			bn = ap->in_off;
25110551Sdyson			if (runb && bn) {
252228583Spfg				for (--bn; bn >= 0 && *runb < maxrun &&
253254283Spfg			    		is_sequential(ump,
254254283Spfg					((e2fs_daddr_t *)bp->b_data)[bn],
255254283Spfg					((e2fs_daddr_t *)bp->b_data)[bn + 1]);
25610551Sdyson			    		--bn, ++*runb);
25710551Sdyson			}
25810551Sdyson		}
2591541Srgrimes	}
2601541Srgrimes	if (bp)
26113490Sdyson		bqrelse(bp);
2621541Srgrimes
26363788Smckusick	/*
26463788Smckusick	 * Since this is FFS independent code, we are out of scope for the
26563788Smckusick	 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they
26663788Smckusick	 * will fall in the range 1..um_seqinc, so we use that test and
26763788Smckusick	 * return a request for a zeroed out buffer if attempts are made
26863788Smckusick	 * to read a BLK_NOCOPY or BLK_SNAP block.
26963788Smckusick	 */
27063788Smckusick	if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){
27163788Smckusick		*bnp = -1;
27263788Smckusick		return (0);
27363788Smckusick	}
27462976Smckusick	*bnp = blkptrtodb(ump, daddr);
27562976Smckusick	if (*bnp == 0) {
27696596Siedowse		*bnp = -1;
27762976Smckusick	}
2781541Srgrimes	return (0);
2791541Srgrimes}
2801541Srgrimes
2811541Srgrimes/*
2821541Srgrimes * Create an array of logical block number/offset pairs which represent the
2831541Srgrimes * path of indirect blocks required to access a data block.  The first "pair"
2841541Srgrimes * contains the logical block number of the appropriate single, double or
2851541Srgrimes * triple indirect block and the offset into the inode indirect block array.
2861541Srgrimes * Note, the logical block number of the inode single/double/triple indirect
2871541Srgrimes * block appears twice in the array, once with the offset into the i_ib and
2881541Srgrimes * once with the offset into the page itself.
2891541Srgrimes */
2901541Srgrimesint
291254283Spfgext2_getlbns(struct vnode *vp, daddr_t bn, struct indir *ap, int *nump)
2921541Srgrimes{
293252103Spfg	long blockcnt;
294252103Spfg	e2fs_lbn_t metalbn, realbn;
29596596Siedowse	struct ext2mount *ump;
29631394Sbde	int i, numlevels, off;
29731394Sbde	int64_t qblockcnt;
2981541Srgrimes
29996596Siedowse	ump = VFSTOEXT2(vp->v_mount);
3001541Srgrimes	if (nump)
3011541Srgrimes		*nump = 0;
3021541Srgrimes	numlevels = 0;
3031541Srgrimes	realbn = bn;
3041541Srgrimes	if ((long)bn < 0)
3051541Srgrimes		bn = -(long)bn;
3061541Srgrimes
3071541Srgrimes	/* The first NDADDR blocks are direct blocks. */
3081541Srgrimes	if (bn < NDADDR)
3091541Srgrimes		return (0);
3101541Srgrimes
3118876Srgrimes	/*
3121541Srgrimes	 * Determine the number of levels of indirection.  After this loop
3131541Srgrimes	 * is done, blockcnt indicates the number of data blocks possible
31431394Sbde	 * at the previous level of indirection, and NIADDR - i is the number
3151541Srgrimes	 * of levels of indirection needed to locate the requested block.
3161541Srgrimes	 */
3171541Srgrimes	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
3181541Srgrimes		if (i == 0)
3191541Srgrimes			return (EFBIG);
32031394Sbde		/*
32131394Sbde		 * Use int64_t's here to avoid overflow for triple indirect
32231394Sbde		 * blocks when longs have 32 bits and the block size is more
32331394Sbde		 * than 4K.
32431394Sbde		 */
32531394Sbde		qblockcnt = (int64_t)blockcnt * MNINDIR(ump);
32631394Sbde		if (bn < qblockcnt)
3271541Srgrimes			break;
32831394Sbde		blockcnt = qblockcnt;
3291541Srgrimes	}
3301541Srgrimes
3311541Srgrimes	/* Calculate the address of the first meta-block. */
3321541Srgrimes	if (realbn >= 0)
3331541Srgrimes		metalbn = -(realbn - bn + NIADDR - i);
3341541Srgrimes	else
3351541Srgrimes		metalbn = -(-realbn - bn + NIADDR - i);
3361541Srgrimes
3378876Srgrimes	/*
3381541Srgrimes	 * At each iteration, off is the offset into the bap array which is
3391541Srgrimes	 * an array of disk addresses at the current level of indirection.
3401541Srgrimes	 * The logical block number and the offset in that block are stored
3411541Srgrimes	 * into the argument array.
3421541Srgrimes	 */
3431541Srgrimes	ap->in_lbn = metalbn;
3441541Srgrimes	ap->in_off = off = NIADDR - i;
3451541Srgrimes	ap++;
3461541Srgrimes	for (++numlevels; i <= NIADDR; i++) {
3471541Srgrimes		/* If searching for a meta-data block, quit when found. */
3481541Srgrimes		if (metalbn == realbn)
3491541Srgrimes			break;
3501541Srgrimes
3511541Srgrimes		off = (bn / blockcnt) % MNINDIR(ump);
3521541Srgrimes
3531541Srgrimes		++numlevels;
3541541Srgrimes		ap->in_lbn = metalbn;
3551541Srgrimes		ap->in_off = off;
3561541Srgrimes		++ap;
3571541Srgrimes
3581541Srgrimes		metalbn -= -1 + off * blockcnt;
35931394Sbde		blockcnt /= MNINDIR(ump);
3601541Srgrimes	}
3611541Srgrimes	if (nump)
3621541Srgrimes		*nump = numlevels;
3631541Srgrimes	return (0);
3641541Srgrimes}
365