1/* $NetBSD: ulfs_bmap.c,v 1.9 2017/03/30 09:10:08 hannken Exp $ */ 2/* from NetBSD: ufs_bmap.c,v 1.50 2013/01/22 09:39:18 dholland Exp */ 3 4/* 5 * Copyright (c) 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ufs_bmap.c 8.8 (Berkeley) 8/11/95 38 */ 39 40#include <sys/cdefs.h> 41__KERNEL_RCSID(0, "$NetBSD: ulfs_bmap.c,v 1.9 2017/03/30 09:10:08 hannken Exp $"); 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/stat.h> 46#include <sys/buf.h> 47#include <sys/proc.h> 48#include <sys/vnode.h> 49#include <sys/mount.h> 50#include <sys/resourcevar.h> 51#include <sys/trace.h> 52 53#include <miscfs/specfs/specdev.h> 54 55#include <ufs/lfs/ulfs_inode.h> 56#include <ufs/lfs/ulfsmount.h> 57#include <ufs/lfs/ulfs_extern.h> 58#include <ufs/lfs/ulfs_bswap.h> 59 60static bool 61ulfs_issequential(const struct lfs *fs, daddr_t daddr0, daddr_t daddr1) 62{ 63 64 /* for ulfs, blocks in a hole is not 'contiguous'. */ 65 if (daddr0 == 0) 66 return false; 67 68 return (daddr0 + fs->um_seqinc == daddr1); 69} 70 71/* 72 * This is used for block pointers in inodes and elsewhere, which can 73 * contain the magic value UNWRITTEN, which is -2. This is mishandled 74 * by u32 -> u64 promotion unless special-cased. 75 * 76 * XXX this should be rolled into better inode accessors and go away. 77 */ 78static inline uint64_t 79ulfs_fix_unwritten(uint32_t val) 80{ 81 if (val == (uint32_t)UNWRITTEN) { 82 return (uint64_t)(int64_t)UNWRITTEN; 83 } else { 84 return val; 85 } 86} 87 88 89/* 90 * Bmap converts the logical block number of a file to its physical block 91 * number on the disk. The conversion is done by using the logical block 92 * number to index into the array of block pointers described by the dinode. 93 */ 94int 95ulfs_bmap(void *v) 96{ 97 struct vop_bmap_args /* { 98 struct vnode *a_vp; 99 daddr_t a_bn; 100 struct vnode **a_vpp; 101 daddr_t *a_bnp; 102 int *a_runp; 103 } */ *ap = v; 104 int error; 105 106 /* 107 * Check for underlying vnode requests and ensure that logical 108 * to physical mapping is requested. 109 */ 110 if (ap->a_vpp != NULL) 111 *ap->a_vpp = VTOI(ap->a_vp)->i_devvp; 112 if (ap->a_bnp == NULL) 113 return (0); 114 115 error = ulfs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL, 116 ap->a_runp, ulfs_issequential); 117 return error; 118} 119 120/* 121 * Indirect blocks are now on the vnode for the file. They are given negative 122 * logical block numbers. Indirect blocks are addressed by the negative 123 * address of the first data block to which they point. Double indirect blocks 124 * are addressed by one less than the address of the first indirect block to 125 * which they point. Triple indirect blocks are addressed by one less than 126 * the address of the first double indirect block to which they point. 127 * 128 * ulfs_bmaparray does the bmap conversion, and if requested returns the 129 * array of logical blocks which must be traversed to get to a block. 130 * Each entry contains the offset into that block that gets you to the 131 * next block and the disk address of the block (if it is assigned). 132 */ 133 134int 135ulfs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap, 136 int *nump, int *runp, ulfs_issequential_callback_t is_sequential) 137{ 138 struct inode *ip; 139 struct buf *bp, *cbp; 140 struct ulfsmount *ump; 141 struct lfs *fs; 142 struct mount *mp; 143 struct indir a[ULFS_NIADDR + 1], *xap; 144 daddr_t daddr; 145 daddr_t metalbn; 146 int error, maxrun = 0, num; 147 148 ip = VTOI(vp); 149 mp = vp->v_mount; 150 ump = ip->i_ump; 151 fs = ip->i_lfs; 152 KASSERTMSG(((ap == NULL) == (nump == NULL)), 153 "ulfs_bmaparray: invalid arguments: ap=%p, nump=%p", ap, nump); 154 155 if (runp) { 156 /* 157 * XXX 158 * If MAXBSIZE is the largest transfer the disks can handle, 159 * we probably want maxrun to be 1 block less so that we 160 * don't create a block larger than the device can handle. 161 */ 162 *runp = 0; 163 maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1; 164 } 165 166 if (bn >= 0 && bn < ULFS_NDADDR) { 167 if (nump != NULL) 168 *nump = 0; 169 if (ump->um_fstype == ULFS1) 170 daddr = ulfs_fix_unwritten(ulfs_rw32(ip->i_din->u_32.di_db[bn], 171 ULFS_MPNEEDSWAP(fs))); 172 else 173 daddr = ulfs_rw64(ip->i_din->u_64.di_db[bn], 174 ULFS_MPNEEDSWAP(fs)); 175 *bnp = blkptrtodb(fs, daddr); 176 /* 177 * Since this is FFS independent code, we are out of 178 * scope for the definitions of BLK_NOCOPY and 179 * BLK_SNAP, but we do know that they will fall in 180 * the range 1..um_seqinc, so we use that test and 181 * return a request for a zeroed out buffer if attempts 182 * are made to read a BLK_NOCOPY or BLK_SNAP block. 183 */ 184 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT 185 && daddr > 0 && 186 daddr < fs->um_seqinc) { 187 *bnp = -1; 188 } else if (*bnp == 0) { 189 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) 190 == SF_SNAPSHOT) { 191 *bnp = blkptrtodb(fs, bn * fs->um_seqinc); 192 } else { 193 *bnp = -1; 194 } 195 } else if (runp) { 196 if (ump->um_fstype == ULFS1) { 197 for (++bn; bn < ULFS_NDADDR && *runp < maxrun && 198 is_sequential(fs, 199 ulfs_fix_unwritten(ulfs_rw32(ip->i_din->u_32.di_db[bn - 1], 200 ULFS_MPNEEDSWAP(fs))), 201 ulfs_fix_unwritten(ulfs_rw32(ip->i_din->u_32.di_db[bn], 202 ULFS_MPNEEDSWAP(fs)))); 203 ++bn, ++*runp); 204 } else { 205 for (++bn; bn < ULFS_NDADDR && *runp < maxrun && 206 is_sequential(fs, 207 ulfs_rw64(ip->i_din->u_64.di_db[bn - 1], 208 ULFS_MPNEEDSWAP(fs)), 209 ulfs_rw64(ip->i_din->u_64.di_db[bn], 210 ULFS_MPNEEDSWAP(fs))); 211 ++bn, ++*runp); 212 } 213 } 214 return (0); 215 } 216 217 xap = ap == NULL ? a : ap; 218 if (!nump) 219 nump = # 220 if ((error = ulfs_getlbns(vp, bn, xap, nump)) != 0) 221 return (error); 222 223 num = *nump; 224 225 /* Get disk address out of indirect block array */ 226 // XXX clean this up 227 if (ump->um_fstype == ULFS1) 228 daddr = ulfs_fix_unwritten(ulfs_rw32(ip->i_din->u_32.di_ib[xap->in_off], 229 ULFS_MPNEEDSWAP(fs))); 230 else 231 daddr = ulfs_rw64(ip->i_din->u_64.di_ib[xap->in_off], 232 ULFS_MPNEEDSWAP(fs)); 233 234 for (bp = NULL, ++xap; --num; ++xap) { 235 /* 236 * Exit the loop if there is no disk address assigned yet and 237 * the indirect block isn't in the cache, or if we were 238 * looking for an indirect block and we've found it. 239 */ 240 241 metalbn = xap->in_lbn; 242 if (metalbn == bn) 243 break; 244 if (daddr == 0) { 245 mutex_enter(&bufcache_lock); 246 cbp = incore(vp, metalbn); 247 mutex_exit(&bufcache_lock); 248 if (cbp == NULL) 249 break; 250 } 251 252 /* 253 * If we get here, we've either got the block in the cache 254 * or we have a disk address for it, go fetch it. 255 */ 256 if (bp) 257 brelse(bp, 0); 258 259 xap->in_exists = 1; 260 bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); 261 if (bp == NULL) { 262 263 /* 264 * getblk() above returns NULL only iff we are 265 * pagedaemon. See the implementation of getblk 266 * for detail. 267 */ 268 269 return (ENOMEM); 270 } 271 if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { 272 trace(TR_BREADHIT, pack(vp, size), metalbn); 273 } else { 274 KASSERTMSG(daddr, 275 "ulfs_bmaparray: indirect block not in cache"); 276 trace(TR_BREADMISS, pack(vp, size), metalbn); 277 bp->b_blkno = blkptrtodb(fs, daddr); 278 bp->b_flags |= B_READ; 279 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); 280 VOP_STRATEGY(vp, bp); 281 curlwp->l_ru.ru_inblock++; /* XXX */ 282 if ((error = biowait(bp)) != 0) { 283 brelse(bp, 0); 284 return (error); 285 } 286 } 287 if (ump->um_fstype == ULFS1) { 288 daddr = ulfs_fix_unwritten(ulfs_rw32(((u_int32_t *)bp->b_data)[xap->in_off], 289 ULFS_MPNEEDSWAP(fs))); 290 if (num == 1 && daddr && runp) { 291 for (bn = xap->in_off + 1; 292 bn < MNINDIR(fs) && *runp < maxrun && 293 is_sequential(fs, 294 ulfs_fix_unwritten(ulfs_rw32(((int32_t *)bp->b_data)[bn-1], 295 ULFS_MPNEEDSWAP(fs))), 296 ulfs_fix_unwritten(ulfs_rw32(((int32_t *)bp->b_data)[bn], 297 ULFS_MPNEEDSWAP(fs)))); 298 ++bn, ++*runp); 299 } 300 } else { 301 daddr = ulfs_rw64(((u_int64_t *)bp->b_data)[xap->in_off], 302 ULFS_MPNEEDSWAP(fs)); 303 if (num == 1 && daddr && runp) { 304 for (bn = xap->in_off + 1; 305 bn < MNINDIR(fs) && *runp < maxrun && 306 is_sequential(fs, 307 ulfs_rw64(((int64_t *)bp->b_data)[bn-1], 308 ULFS_MPNEEDSWAP(fs)), 309 ulfs_rw64(((int64_t *)bp->b_data)[bn], 310 ULFS_MPNEEDSWAP(fs))); 311 ++bn, ++*runp); 312 } 313 } 314 } 315 if (bp) 316 brelse(bp, 0); 317 318 /* 319 * Since this is FFS independent code, we are out of scope for the 320 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they 321 * will fall in the range 1..um_seqinc, so we use that test and 322 * return a request for a zeroed out buffer if attempts are made 323 * to read a BLK_NOCOPY or BLK_SNAP block. 324 */ 325 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT 326 && daddr > 0 && daddr < fs->um_seqinc) { 327 *bnp = -1; 328 return (0); 329 } 330 *bnp = blkptrtodb(fs, daddr); 331 if (*bnp == 0) { 332 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) 333 == SF_SNAPSHOT) { 334 *bnp = blkptrtodb(fs, bn * fs->um_seqinc); 335 } else { 336 *bnp = -1; 337 } 338 } 339 return (0); 340} 341 342/* 343 * Create an array of logical block number/offset pairs which represent the 344 * path of indirect blocks required to access a data block. The first "pair" 345 * contains the logical block number of the appropriate single, double or 346 * triple indirect block and the offset into the inode indirect block array. 347 * Note, the logical block number of the inode single/double/triple indirect 348 * block appears twice in the array, once with the offset into the i_ffs1_ib and 349 * once with the offset into the page itself. 350 */ 351int 352ulfs_getlbns(struct vnode *vp, daddr_t bn, struct indir *ap, int *nump) 353{ 354 daddr_t metalbn, realbn; 355 struct ulfsmount *ump; 356 struct lfs *fs; 357 int64_t blockcnt; 358 int lbc; 359 int i, numlevels, off; 360 361 ump = VFSTOULFS(vp->v_mount); 362 fs = ump->um_lfs; 363 if (nump) 364 *nump = 0; 365 numlevels = 0; 366 realbn = bn; 367 if (bn < 0) 368 bn = -bn; 369 KASSERT(bn >= ULFS_NDADDR); 370 371 /* 372 * Determine the number of levels of indirection. After this loop 373 * is done, blockcnt indicates the number of data blocks possible 374 * at the given level of indirection, and ULFS_NIADDR - i is the number 375 * of levels of indirection needed to locate the requested block. 376 */ 377 378 bn -= ULFS_NDADDR; 379 for (lbc = 0, i = ULFS_NIADDR;; i--, bn -= blockcnt) { 380 if (i == 0) 381 return (EFBIG); 382 383 lbc += fs->um_lognindir; 384 blockcnt = (int64_t)1 << lbc; 385 386 if (bn < blockcnt) 387 break; 388 } 389 390 /* Calculate the address of the first meta-block. */ 391 metalbn = -((realbn >= 0 ? realbn : -realbn) - bn + ULFS_NIADDR - i); 392 393 /* 394 * At each iteration, off is the offset into the bap array which is 395 * an array of disk addresses at the current level of indirection. 396 * The logical block number and the offset in that block are stored 397 * into the argument array. 398 */ 399 ap->in_lbn = metalbn; 400 ap->in_off = off = ULFS_NIADDR - i; 401 ap->in_exists = 0; 402 ap++; 403 for (++numlevels; i <= ULFS_NIADDR; i++) { 404 /* If searching for a meta-data block, quit when found. */ 405 if (metalbn == realbn) 406 break; 407 408 lbc -= fs->um_lognindir; 409 off = (bn >> lbc) & (MNINDIR(fs) - 1); 410 411 ++numlevels; 412 ap->in_lbn = metalbn; 413 ap->in_off = off; 414 ap->in_exists = 0; 415 ++ap; 416 417 metalbn -= -1 + ((int64_t)off << lbc); 418 } 419 if (nump) 420 *nump = numlevels; 421 return (0); 422} 423