1/* $NetBSD$ */ 2 3/* 4 * Copyright (c) 1989, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ufs_bmap.c 8.8 (Berkeley) 8/11/95 37 */ 38 39#include <sys/cdefs.h> 40__KERNEL_RCSID(0, "$NetBSD$"); 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/buf.h> 45#include <sys/proc.h> 46#include <sys/vnode.h> 47#include <sys/mount.h> 48#include <sys/resourcevar.h> 49#include <sys/trace.h> 50#include <sys/fstrans.h> 51 52#include <miscfs/specfs/specdev.h> 53 54#include <ufs/ufs/inode.h> 55#include <ufs/ufs/ufsmount.h> 56#include <ufs/ufs/ufs_extern.h> 57#include <ufs/ufs/ufs_bswap.h> 58 59static bool 60ufs_issequential(const struct ufsmount *ump, daddr_t daddr0, daddr_t daddr1) 61{ 62 63 /* for ufs, blocks in a hole is not 'contiguous'. */ 64 if (daddr0 == 0) 65 return false; 66 67 return (daddr0 + ump->um_seqinc == daddr1); 68} 69 70/* 71 * Bmap converts the logical block number of a file to its physical block 72 * number on the disk. The conversion is done by using the logical block 73 * number to index into the array of block pointers described by the dinode. 74 */ 75int 76ufs_bmap(void *v) 77{ 78 struct vop_bmap_args /* { 79 struct vnode *a_vp; 80 daddr_t a_bn; 81 struct vnode **a_vpp; 82 daddr_t *a_bnp; 83 int *a_runp; 84 } */ *ap = v; 85 int error; 86 87 /* 88 * Check for underlying vnode requests and ensure that logical 89 * to physical mapping is requested. 90 */ 91 if (ap->a_vpp != NULL) 92 *ap->a_vpp = VTOI(ap->a_vp)->i_devvp; 93 if (ap->a_bnp == NULL) 94 return (0); 95 96 fstrans_start(ap->a_vp->v_mount, FSTRANS_SHARED); 97 error = ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL, 98 ap->a_runp, ufs_issequential); 99 fstrans_done(ap->a_vp->v_mount); 100 return error; 101} 102 103/* 104 * Indirect blocks are now on the vnode for the file. They are given negative 105 * logical block numbers. Indirect blocks are addressed by the negative 106 * address of the first data block to which they point. Double indirect blocks 107 * are addressed by one less than the address of the first indirect block to 108 * which they point. Triple indirect blocks are addressed by one less than 109 * the address of the first double indirect block to which they point. 110 * 111 * ufs_bmaparray does the bmap conversion, and if requested returns the 112 * array of logical blocks which must be traversed to get to a block. 113 * Each entry contains the offset into that block that gets you to the 114 * next block and the disk address of the block (if it is assigned). 115 */ 116 117int 118ufs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap, 119 int *nump, int *runp, ufs_issequential_callback_t is_sequential) 120{ 121 struct inode *ip; 122 struct buf *bp, *cbp; 123 struct ufsmount *ump; 124 struct mount *mp; 125 struct indir a[NIADDR + 1], *xap; 126 daddr_t daddr; 127 daddr_t metalbn; 128 int error, maxrun = 0, num; 129 130 ip = VTOI(vp); 131 mp = vp->v_mount; 132 ump = ip->i_ump; 133#ifdef DIAGNOSTIC 134 if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL)) 135 panic("ufs_bmaparray: invalid arguments"); 136#endif 137 138 if (runp) { 139 /* 140 * XXX 141 * If MAXBSIZE is the largest transfer the disks can handle, 142 * we probably want maxrun to be 1 block less so that we 143 * don't create a block larger than the device can handle. 144 */ 145 *runp = 0; 146 maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1; 147 } 148 149 if (bn >= 0 && bn < NDADDR) { 150 if (nump != NULL) 151 *nump = 0; 152 if (ump->um_fstype == UFS1) 153 daddr = ufs_rw32(ip->i_ffs1_db[bn], 154 UFS_MPNEEDSWAP(ump)); 155 else 156 daddr = ufs_rw64(ip->i_ffs2_db[bn], 157 UFS_MPNEEDSWAP(ump)); 158 *bnp = blkptrtodb(ump, daddr); 159 /* 160 * Since this is FFS independent code, we are out of 161 * scope for the definitions of BLK_NOCOPY and 162 * BLK_SNAP, but we do know that they will fall in 163 * the range 1..um_seqinc, so we use that test and 164 * return a request for a zeroed out buffer if attempts 165 * are made to read a BLK_NOCOPY or BLK_SNAP block. 166 */ 167 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT 168 && daddr > 0 && 169 daddr < ump->um_seqinc) { 170 *bnp = -1; 171 } else if (*bnp == 0) { 172 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) 173 == SF_SNAPSHOT) { 174 *bnp = blkptrtodb(ump, bn * ump->um_seqinc); 175 } else { 176 *bnp = -1; 177 } 178 } else if (runp) { 179 if (ump->um_fstype == UFS1) { 180 for (++bn; bn < NDADDR && *runp < maxrun && 181 is_sequential(ump, 182 ufs_rw32(ip->i_ffs1_db[bn - 1], 183 UFS_MPNEEDSWAP(ump)), 184 ufs_rw32(ip->i_ffs1_db[bn], 185 UFS_MPNEEDSWAP(ump))); 186 ++bn, ++*runp); 187 } else { 188 for (++bn; bn < NDADDR && *runp < maxrun && 189 is_sequential(ump, 190 ufs_rw64(ip->i_ffs2_db[bn - 1], 191 UFS_MPNEEDSWAP(ump)), 192 ufs_rw64(ip->i_ffs2_db[bn], 193 UFS_MPNEEDSWAP(ump))); 194 ++bn, ++*runp); 195 } 196 } 197 return (0); 198 } 199 200 xap = ap == NULL ? a : ap; 201 if (!nump) 202 nump = # 203 if ((error = ufs_getlbns(vp, bn, xap, nump)) != 0) 204 return (error); 205 206 num = *nump; 207 208 /* Get disk address out of indirect block array */ 209 if (ump->um_fstype == UFS1) 210 daddr = ufs_rw32(ip->i_ffs1_ib[xap->in_off], 211 UFS_MPNEEDSWAP(ump)); 212 else 213 daddr = ufs_rw64(ip->i_ffs2_ib[xap->in_off], 214 UFS_MPNEEDSWAP(ump)); 215 216 for (bp = NULL, ++xap; --num; ++xap) { 217 /* 218 * Exit the loop if there is no disk address assigned yet and 219 * the indirect block isn't in the cache, or if we were 220 * looking for an indirect block and we've found it. 221 */ 222 223 metalbn = xap->in_lbn; 224 if (metalbn == bn) 225 break; 226 if (daddr == 0) { 227 mutex_enter(&bufcache_lock); 228 cbp = incore(vp, metalbn); 229 mutex_exit(&bufcache_lock); 230 if (cbp == NULL) 231 break; 232 } 233 234 /* 235 * If we get here, we've either got the block in the cache 236 * or we have a disk address for it, go fetch it. 237 */ 238 if (bp) 239 brelse(bp, 0); 240 241 xap->in_exists = 1; 242 bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); 243 if (bp == NULL) { 244 245 /* 246 * getblk() above returns NULL only iff we are 247 * pagedaemon. See the implementation of getblk 248 * for detail. 249 */ 250 251 return (ENOMEM); 252 } 253 if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { 254 trace(TR_BREADHIT, pack(vp, size), metalbn); 255 } 256#ifdef DIAGNOSTIC 257 else if (!daddr) 258 panic("ufs_bmaparray: indirect block not in cache"); 259#endif 260 else { 261 trace(TR_BREADMISS, pack(vp, size), metalbn); 262 bp->b_blkno = blkptrtodb(ump, daddr); 263 bp->b_flags |= B_READ; 264 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); 265 VOP_STRATEGY(vp, bp); 266 curlwp->l_ru.ru_inblock++; /* XXX */ 267 if ((error = biowait(bp)) != 0) { 268 brelse(bp, 0); 269 return (error); 270 } 271 } 272 if (ump->um_fstype == UFS1) { 273 daddr = ufs_rw32(((u_int32_t *)bp->b_data)[xap->in_off], 274 UFS_MPNEEDSWAP(ump)); 275 if (num == 1 && daddr && runp) { 276 for (bn = xap->in_off + 1; 277 bn < MNINDIR(ump) && *runp < maxrun && 278 is_sequential(ump, 279 ufs_rw32(((int32_t *)bp->b_data)[bn-1], 280 UFS_MPNEEDSWAP(ump)), 281 ufs_rw32(((int32_t *)bp->b_data)[bn], 282 UFS_MPNEEDSWAP(ump))); 283 ++bn, ++*runp); 284 } 285 } else { 286 daddr = ufs_rw64(((u_int64_t *)bp->b_data)[xap->in_off], 287 UFS_MPNEEDSWAP(ump)); 288 if (num == 1 && daddr && runp) { 289 for (bn = xap->in_off + 1; 290 bn < MNINDIR(ump) && *runp < maxrun && 291 is_sequential(ump, 292 ufs_rw64(((int64_t *)bp->b_data)[bn-1], 293 UFS_MPNEEDSWAP(ump)), 294 ufs_rw64(((int64_t *)bp->b_data)[bn], 295 UFS_MPNEEDSWAP(ump))); 296 ++bn, ++*runp); 297 } 298 } 299 } 300 if (bp) 301 brelse(bp, 0); 302 303 /* 304 * Since this is FFS independent code, we are out of scope for the 305 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they 306 * will fall in the range 1..um_seqinc, so we use that test and 307 * return a request for a zeroed out buffer if attempts are made 308 * to read a BLK_NOCOPY or BLK_SNAP block. 309 */ 310 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT 311 && daddr > 0 && daddr < ump->um_seqinc) { 312 *bnp = -1; 313 return (0); 314 } 315 *bnp = blkptrtodb(ump, daddr); 316 if (*bnp == 0) { 317 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) 318 == SF_SNAPSHOT) { 319 *bnp = blkptrtodb(ump, bn * ump->um_seqinc); 320 } else { 321 *bnp = -1; 322 } 323 } 324 return (0); 325} 326 327/* 328 * Create an array of logical block number/offset pairs which represent the 329 * path of indirect blocks required to access a data block. The first "pair" 330 * contains the logical block number of the appropriate single, double or 331 * triple indirect block and the offset into the inode indirect block array. 332 * Note, the logical block number of the inode single/double/triple indirect 333 * block appears twice in the array, once with the offset into the i_ffs1_ib and 334 * once with the offset into the page itself. 335 */ 336int 337ufs_getlbns(struct vnode *vp, daddr_t bn, struct indir *ap, int *nump) 338{ 339 daddr_t metalbn, realbn; 340 struct ufsmount *ump; 341 int64_t blockcnt; 342 int lbc; 343 int i, numlevels, off; 344 345 ump = VFSTOUFS(vp->v_mount); 346 if (nump) 347 *nump = 0; 348 numlevels = 0; 349 realbn = bn; 350 if (bn < 0) 351 bn = -bn; 352 KASSERT(bn >= NDADDR); 353 354 /* 355 * Determine the number of levels of indirection. After this loop 356 * is done, blockcnt indicates the number of data blocks possible 357 * at the given level of indirection, and NIADDR - i is the number 358 * of levels of indirection needed to locate the requested block. 359 */ 360 361 bn -= NDADDR; 362 for (lbc = 0, i = NIADDR;; i--, bn -= blockcnt) { 363 if (i == 0) 364 return (EFBIG); 365 366 lbc += ump->um_lognindir; 367 blockcnt = (int64_t)1 << lbc; 368 369 if (bn < blockcnt) 370 break; 371 } 372 373 /* Calculate the address of the first meta-block. */ 374 metalbn = -((realbn >= 0 ? realbn : -realbn) - bn + NIADDR - i); 375 376 /* 377 * At each iteration, off is the offset into the bap array which is 378 * an array of disk addresses at the current level of indirection. 379 * The logical block number and the offset in that block are stored 380 * into the argument array. 381 */ 382 ap->in_lbn = metalbn; 383 ap->in_off = off = NIADDR - i; 384 ap->in_exists = 0; 385 ap++; 386 for (++numlevels; i <= NIADDR; i++) { 387 /* If searching for a meta-data block, quit when found. */ 388 if (metalbn == realbn) 389 break; 390 391 lbc -= ump->um_lognindir; 392 off = (bn >> lbc) & (MNINDIR(ump) - 1); 393 394 ++numlevels; 395 ap->in_lbn = metalbn; 396 ap->in_off = off; 397 ap->in_exists = 0; 398 ++ap; 399 400 metalbn -= -1 + ((int64_t)off << lbc); 401 } 402 if (nump) 403 *nump = numlevels; 404 return (0); 405} 406