1139778Simp/*-
21541Srgrimes * Copyright (c) 1989, 1991, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes * (c) UNIX System Laboratories, Inc.
51541Srgrimes * All or some portions of this file are derived from material licensed
61541Srgrimes * to the University of California by American Telephone and Telegraph
71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with
81541Srgrimes * the permission of UNIX System Laboratories, Inc.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 4. Neither the name of the University nor the names of its contributors
191541Srgrimes *    may be used to endorse or promote products derived from this software
201541Srgrimes *    without specific prior written permission.
211541Srgrimes *
221541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
231541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
241541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
251541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
261541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
271541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
281541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
291541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
301541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
311541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
321541Srgrimes * SUCH DAMAGE.
331541Srgrimes *
3422521Sdyson *	@(#)ufs_bmap.c	8.7 (Berkeley) 3/21/95
3550477Speter * $FreeBSD: stable/10/sys/fs/ext2fs/ext2_bmap.c 317532 2017-04-27 23:14:01Z pfg $
361541Srgrimes */
371541Srgrimes
381541Srgrimes#include <sys/param.h>
397430Sbde#include <sys/systm.h>
4060041Sphk#include <sys/bio.h>
411541Srgrimes#include <sys/buf.h>
421541Srgrimes#include <sys/proc.h>
431541Srgrimes#include <sys/vnode.h>
441541Srgrimes#include <sys/mount.h>
451541Srgrimes#include <sys/resourcevar.h>
4662976Smckusick#include <sys/stat.h>
471541Srgrimes
48202283Slulf#include <fs/ext2fs/inode.h>
49254260Spfg#include <fs/ext2fs/fs.h>
50202283Slulf#include <fs/ext2fs/ext2fs.h>
51254260Spfg#include <fs/ext2fs/ext2_dinode.h>
52251344Spfg#include <fs/ext2fs/ext2_extern.h>
53202283Slulf#include <fs/ext2fs/ext2_mount.h>
541541Srgrimes
55254260Spfgstatic int ext4_bmapext(struct vnode *, int32_t, int64_t *, int *, int *);
56254260Spfg
571541Srgrimes/*
58202283Slulf * Bmap converts the logical block number of a file to its physical block
591541Srgrimes * number on the disk. The conversion is done by using the logical block
601541Srgrimes * number to index into the array of block pointers described by the dinode.
611541Srgrimes */
621541Srgrimesint
63246634Spfgext2_bmap(struct vop_bmap_args *ap)
641541Srgrimes{
65254283Spfg	daddr_t blkno;
6692363Smckusick	int error;
6792363Smckusick
681541Srgrimes	/*
691541Srgrimes	 * Check for underlying vnode requests and ensure that logical
701541Srgrimes	 * to physical mapping is requested.
711541Srgrimes	 */
72137726Sphk	if (ap->a_bop != NULL)
73137726Sphk		*ap->a_bop = &VTOI(ap->a_vp)->i_devvp->v_bufobj;
741541Srgrimes	if (ap->a_bnp == NULL)
751541Srgrimes		return (0);
761541Srgrimes
77261311Spfg	if (VTOI(ap->a_vp)->i_flag & IN_E4EXTENTS)
78254260Spfg		error = ext4_bmapext(ap->a_vp, ap->a_bn, &blkno,
79254260Spfg		    ap->a_runp, ap->a_runb);
80254260Spfg	else
81254260Spfg		error = ext2_bmaparray(ap->a_vp, ap->a_bn, &blkno,
82254260Spfg		    ap->a_runp, ap->a_runb);
8392363Smckusick	*ap->a_bnp = blkno;
8492363Smckusick	return (error);
851541Srgrimes}
861541Srgrimes
871541Srgrimes/*
88254260Spfg * This function converts the logical block number of a file to
89254260Spfg * its physical block number on the disk within ext4 extents.
90254260Spfg */
91254260Spfgstatic int
92254260Spfgext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb)
93254260Spfg{
94254260Spfg	struct inode *ip;
95254260Spfg	struct m_ext2fs *fs;
96254260Spfg	struct ext4_extent *ep;
97311232Spfg	struct ext4_extent_path path = {.ep_bp = NULL};
98254260Spfg	daddr_t lbn;
99293646Spfg	int ret = 0;
100254260Spfg
101254260Spfg	ip = VTOI(vp);
102254260Spfg	fs = ip->i_e2fs;
103254260Spfg	lbn = bn;
104254260Spfg
105317532Spfg	/*
106317532Spfg	 * TODO: need to implement read ahead to improve the performance.
107317532Spfg	 */
108254260Spfg	if (runp != NULL)
109254260Spfg		*runp = 0;
110254260Spfg
111254260Spfg	if (runb != NULL)
112254260Spfg		*runb = 0;
113254260Spfg
114254260Spfg	ext4_ext_find_extent(fs, ip, lbn, &path);
115317532Spfg	ep = path.ep_ext;
116317532Spfg	if (ep == NULL)
117317532Spfg		ret = EIO;
118317532Spfg	else {
119317532Spfg		*bnp = fsbtodb(fs, lbn - ep->e_blk +
120317532Spfg		    (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32));
121254260Spfg
122317532Spfg		if (*bnp == 0)
123317532Spfg			*bnp = -1;
124293646Spfg	}
125254260Spfg
126293646Spfg	if (path.ep_bp != NULL) {
127293646Spfg		brelse(path.ep_bp);
128293646Spfg		path.ep_bp = NULL;
129293646Spfg	}
130254260Spfg
131293646Spfg	return (ret);
132254260Spfg}
133254260Spfg
134254260Spfg/*
1351541Srgrimes * Indirect blocks are now on the vnode for the file.  They are given negative
1361541Srgrimes * logical block numbers.  Indirect blocks are addressed by the negative
1371541Srgrimes * address of the first data block to which they point.  Double indirect blocks
1381541Srgrimes * are addressed by one less than the address of the first indirect block to
1391541Srgrimes * which they point.  Triple indirect blocks are addressed by one less than
1401541Srgrimes * the address of the first double indirect block to which they point.
1411541Srgrimes *
142202283Slulf * ext2_bmaparray does the bmap conversion, and if requested returns the
1431541Srgrimes * array of logical blocks which must be traversed to get to a block.
1441541Srgrimes * Each entry contains the offset into that block that gets you to the
1451541Srgrimes * next block and the disk address of the block (if it is assigned).
1461541Srgrimes */
1471541Srgrimes
1481541Srgrimesint
149254283Spfgext2_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, int *runp, int *runb)
1501541Srgrimes{
15196506Sphk	struct inode *ip;
1521541Srgrimes	struct buf *bp;
15396596Siedowse	struct ext2mount *ump;
1541541Srgrimes	struct mount *mp;
155311232Spfg	struct indir a[NIADDR + 1], *ap;
156252103Spfg	daddr_t daddr;
157252103Spfg	e2fs_lbn_t metalbn;
158137039Sphk	int error, num, maxrun = 0, bsize;
15976128Sphk	int *nump;
1601541Srgrimes
16176128Sphk	ap = NULL;
1621541Srgrimes	ip = VTOI(vp);
1631541Srgrimes	mp = vp->v_mount;
16496596Siedowse	ump = VFSTOEXT2(mp);
1651541Srgrimes
166137039Sphk	bsize = EXT2_BLOCK_SIZE(ump->um_e2fs);
167137039Sphk
1681541Srgrimes	if (runp) {
169137039Sphk		maxrun = mp->mnt_iosize_max / bsize - 1;
17032724Sdyson		*runp = 0;
17132724Sdyson	}
172311232Spfg	if (runb)
17332724Sdyson		*runb = 0;
17432724Sdyson
17532724Sdyson
17676128Sphk	ap = a;
17776128Sphk	nump = &num;
17896596Siedowse	error = ext2_getlbns(vp, bn, ap, nump);
1793427Sphk	if (error)
1801541Srgrimes		return (error);
1811541Srgrimes
1821541Srgrimes	num = *nump;
1831541Srgrimes	if (num == 0) {
1841541Srgrimes		*bnp = blkptrtodb(ump, ip->i_db[bn]);
18596596Siedowse		if (*bnp == 0) {
18663788Smckusick			*bnp = -1;
18762976Smckusick		} else if (runp) {
188254283Spfg			daddr_t bnb = bn;
189311232Spfg
1901541Srgrimes			for (++bn; bn < NDADDR && *runp < maxrun &&
1911541Srgrimes			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
1921541Srgrimes			    ++bn, ++*runp);
19310551Sdyson			bn = bnb;
19410551Sdyson			if (runb && (bn > 0)) {
19510551Sdyson				for (--bn; (bn >= 0) && (*runb < maxrun) &&
19610551Sdyson					is_sequential(ump, ip->i_db[bn],
197254283Spfg						ip->i_db[bn + 1]);
19810551Sdyson						--bn, ++*runb);
19910551Sdyson			}
20010551Sdyson		}
2011541Srgrimes		return (0);
2021541Srgrimes	}
2031541Srgrimes
2041541Srgrimes	/* Get disk address out of indirect block array */
20576128Sphk	daddr = ip->i_ib[ap->in_off];
2061541Srgrimes
20776128Sphk	for (bp = NULL, ++ap; --num; ++ap) {
2088876Srgrimes		/*
2091541Srgrimes		 * Exit the loop if there is no disk address assigned yet and
2101541Srgrimes		 * the indirect block isn't in the cache, or if we were
2111541Srgrimes		 * looking for an indirect block and we've found it.
2121541Srgrimes		 */
2131541Srgrimes
21476128Sphk		metalbn = ap->in_lbn;
215136767Sphk		if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn)
2161541Srgrimes			break;
2171541Srgrimes		/*
2181541Srgrimes		 * If we get here, we've either got the block in the cache
2191541Srgrimes		 * or we have a disk address for it, go fetch it.
2201541Srgrimes		 */
2211541Srgrimes		if (bp)
22213490Sdyson			bqrelse(bp);
2231541Srgrimes
224137039Sphk		bp = getblk(vp, metalbn, bsize, 0, 0, 0);
2256875Sdg		if ((bp->b_flags & B_CACHE) == 0) {
226251823Spfg#ifdef INVARIANTS
2276875Sdg			if (!daddr)
228239372Skevlo				panic("ext2_bmaparray: indirect block not in cache");
2291541Srgrimes#endif
2301541Srgrimes			bp->b_blkno = blkptrtodb(ump, daddr);
23158345Sphk			bp->b_iocmd = BIO_READ;
23258934Sphk			bp->b_flags &= ~B_INVAL;
23358934Sphk			bp->b_ioflags &= ~BIO_ERROR;
2345455Sdg			vfs_busy_pages(bp, 0);
235121205Sphk			bp->b_iooffset = dbtob(bp->b_blkno);
236136927Sphk			bstrategy(bp);
237170174Sjeff			curthread->td_ru.ru_inblock++;
23859762Sphk			error = bufwait(bp);
2393427Sphk			if (error) {
2401541Srgrimes				brelse(bp);
2411541Srgrimes				return (error);
2421541Srgrimes			}
2431541Srgrimes		}
2441541Srgrimes
245254283Spfg		daddr = ((e2fs_daddr_t *)bp->b_data)[ap->in_off];
24610551Sdyson		if (num == 1 && daddr && runp) {
24776128Sphk			for (bn = ap->in_off + 1;
2481541Srgrimes			    bn < MNINDIR(ump) && *runp < maxrun &&
24922521Sdyson			    is_sequential(ump,
250254283Spfg			    ((e2fs_daddr_t *)bp->b_data)[bn - 1],
251254283Spfg			    ((e2fs_daddr_t *)bp->b_data)[bn]);
2521541Srgrimes			    ++bn, ++*runp);
25376128Sphk			bn = ap->in_off;
25410551Sdyson			if (runb && bn) {
255228583Spfg				for (--bn; bn >= 0 && *runb < maxrun &&
256262723Spfg					is_sequential(ump,
257254283Spfg					((e2fs_daddr_t *)bp->b_data)[bn],
258254283Spfg					((e2fs_daddr_t *)bp->b_data)[bn + 1]);
259262723Spfg					--bn, ++*runb);
26010551Sdyson			}
26110551Sdyson		}
2621541Srgrimes	}
2631541Srgrimes	if (bp)
26413490Sdyson		bqrelse(bp);
2651541Srgrimes
26663788Smckusick	/*
26763788Smckusick	 * Since this is FFS independent code, we are out of scope for the
26863788Smckusick	 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they
26963788Smckusick	 * will fall in the range 1..um_seqinc, so we use that test and
27063788Smckusick	 * return a request for a zeroed out buffer if attempts are made
27163788Smckusick	 * to read a BLK_NOCOPY or BLK_SNAP block.
27263788Smckusick	 */
273311232Spfg	if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc) {
27463788Smckusick		*bnp = -1;
27563788Smckusick		return (0);
27663788Smckusick	}
27762976Smckusick	*bnp = blkptrtodb(ump, daddr);
27862976Smckusick	if (*bnp == 0) {
27996596Siedowse		*bnp = -1;
28062976Smckusick	}
2811541Srgrimes	return (0);
2821541Srgrimes}
2831541Srgrimes
2841541Srgrimes/*
2851541Srgrimes * Create an array of logical block number/offset pairs which represent the
2861541Srgrimes * path of indirect blocks required to access a data block.  The first "pair"
2871541Srgrimes * contains the logical block number of the appropriate single, double or
2881541Srgrimes * triple indirect block and the offset into the inode indirect block array.
2891541Srgrimes * Note, the logical block number of the inode single/double/triple indirect
2901541Srgrimes * block appears twice in the array, once with the offset into the i_ib and
2911541Srgrimes * once with the offset into the page itself.
2921541Srgrimes */
2931541Srgrimesint
294254283Spfgext2_getlbns(struct vnode *vp, daddr_t bn, struct indir *ap, int *nump)
2951541Srgrimes{
296252103Spfg	long blockcnt;
297252103Spfg	e2fs_lbn_t metalbn, realbn;
29896596Siedowse	struct ext2mount *ump;
29931394Sbde	int i, numlevels, off;
30031394Sbde	int64_t qblockcnt;
3011541Srgrimes
30296596Siedowse	ump = VFSTOEXT2(vp->v_mount);
3031541Srgrimes	if (nump)
3041541Srgrimes		*nump = 0;
3051541Srgrimes	numlevels = 0;
3061541Srgrimes	realbn = bn;
3071541Srgrimes	if ((long)bn < 0)
3081541Srgrimes		bn = -(long)bn;
3091541Srgrimes
3101541Srgrimes	/* The first NDADDR blocks are direct blocks. */
3111541Srgrimes	if (bn < NDADDR)
3121541Srgrimes		return (0);
3131541Srgrimes
3148876Srgrimes	/*
3151541Srgrimes	 * Determine the number of levels of indirection.  After this loop
3161541Srgrimes	 * is done, blockcnt indicates the number of data blocks possible
31731394Sbde	 * at the previous level of indirection, and NIADDR - i is the number
3181541Srgrimes	 * of levels of indirection needed to locate the requested block.
3191541Srgrimes	 */
3201541Srgrimes	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
3211541Srgrimes		if (i == 0)
3221541Srgrimes			return (EFBIG);
32331394Sbde		/*
32431394Sbde		 * Use int64_t's here to avoid overflow for triple indirect
32531394Sbde		 * blocks when longs have 32 bits and the block size is more
32631394Sbde		 * than 4K.
32731394Sbde		 */
32831394Sbde		qblockcnt = (int64_t)blockcnt * MNINDIR(ump);
32931394Sbde		if (bn < qblockcnt)
3301541Srgrimes			break;
33131394Sbde		blockcnt = qblockcnt;
3321541Srgrimes	}
3331541Srgrimes
3341541Srgrimes	/* Calculate the address of the first meta-block. */
3351541Srgrimes	if (realbn >= 0)
3361541Srgrimes		metalbn = -(realbn - bn + NIADDR - i);
3371541Srgrimes	else
3381541Srgrimes		metalbn = -(-realbn - bn + NIADDR - i);
3391541Srgrimes
3408876Srgrimes	/*
3411541Srgrimes	 * At each iteration, off is the offset into the bap array which is
3421541Srgrimes	 * an array of disk addresses at the current level of indirection.
3431541Srgrimes	 * The logical block number and the offset in that block are stored
3441541Srgrimes	 * into the argument array.
3451541Srgrimes	 */
3461541Srgrimes	ap->in_lbn = metalbn;
3471541Srgrimes	ap->in_off = off = NIADDR - i;
3481541Srgrimes	ap++;
3491541Srgrimes	for (++numlevels; i <= NIADDR; i++) {
3501541Srgrimes		/* If searching for a meta-data block, quit when found. */
3511541Srgrimes		if (metalbn == realbn)
3521541Srgrimes			break;
3531541Srgrimes
3541541Srgrimes		off = (bn / blockcnt) % MNINDIR(ump);
3551541Srgrimes
3561541Srgrimes		++numlevels;
3571541Srgrimes		ap->in_lbn = metalbn;
3581541Srgrimes		ap->in_off = off;
3591541Srgrimes		++ap;
3601541Srgrimes
3611541Srgrimes		metalbn -= -1 + off * blockcnt;
36231394Sbde		blockcnt /= MNINDIR(ump);
3631541Srgrimes	}
3641541Srgrimes	if (nump)
3651541Srgrimes		*nump = numlevels;
3661541Srgrimes	return (0);
3671541Srgrimes}
368