1/*- 2 * Copyright (c) 2012 Semihalf 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h> 27__FBSDID("$FreeBSD: releng/10.3/sys/fs/nandfs/bmap.c 240358 2012-09-11 09:38:32Z kevlo $"); 28 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/namei.h> 32#include <sys/kernel.h> 33#include <sys/stat.h> 34#include <sys/buf.h> 35#include <sys/bio.h> 36#include <sys/proc.h> 37#include <sys/mount.h> 38#include <sys/vnode.h> 39#include <sys/signalvar.h> 40#include <sys/malloc.h> 41#include <sys/dirent.h> 42#include <sys/lockf.h> 43#include <sys/ktr.h> 44#include <sys/kdb.h> 45 46#include <vm/vm.h> 47#include <vm/vm_extern.h> 48#include <vm/vm_object.h> 49#include <vm/vnode_pager.h> 50 51#include <machine/_inttypes.h> 52 53#include <vm/vm.h> 54#include <vm/vm_extern.h> 55#include <vm/vm_object.h> 56#include <vm/vnode_pager.h> 57 58#include "nandfs_mount.h" 59#include "nandfs.h" 60#include "nandfs_subr.h" 61#include "bmap.h" 62 63static int bmap_getlbns(struct nandfs_node *, nandfs_lbn_t, 64 struct nandfs_indir *, int *); 65 66int 67bmap_lookup(struct nandfs_node *node, nandfs_lbn_t lblk, nandfs_daddr_t *vblk) 68{ 69 struct nandfs_inode *ip; 70 struct nandfs_indir a[NIADDR + 1], *ap; 71 nandfs_daddr_t daddr; 72 struct buf *bp; 73 int error; 74 int num, *nump; 75 76 DPRINTF(BMAP, ("%s: node %p lblk %jx enter\n", __func__, node, lblk)); 77 ip = &node->nn_inode; 78 79 ap = a; 80 nump = # 81 82 error = bmap_getlbns(node, lblk, ap, nump); 83 if (error) 84 return (error); 85 86 if (num == 0) { 87 *vblk = ip->i_db[lblk]; 88 return (0); 89 } 90 91 DPRINTF(BMAP, ("%s: node %p lblk=%jx trying ip->i_ib[%x]\n", __func__, 92 node, lblk, ap->in_off)); 93 daddr = ip->i_ib[ap->in_off]; 94 for (bp = NULL, ++ap; --num; ap++) { 95 if (daddr == 0) { 96 DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with " 97 "vblk 0\n", __func__, node, lblk)); 98 *vblk = 0; 99 return (0); 100 } 101 if (ap->in_lbn == lblk) { 102 DPRINTF(BMAP, ("%s: node %p lblk=%jx ap->in_lbn=%jx " 103 "returning address of indirect block (%jx)\n", 104 __func__, node, lblk, ap->in_lbn, daddr)); 105 *vblk = daddr; 106 return (0); 107 } 108 109 DPRINTF(BMAP, ("%s: node %p lblk=%jx reading block " 110 "ap->in_lbn=%jx\n", __func__, node, lblk, ap->in_lbn)); 111 112 error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp); 113 if (error) { 114 brelse(bp); 115 return (error); 116 } 117 118 daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off]; 119 brelse(bp); 120 } 121 122 DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with %jx\n", __func__, 123 node, lblk, daddr)); 124 *vblk = daddr; 125 126 return (0); 127} 128 129int 130bmap_dirty_meta(struct nandfs_node *node, nandfs_lbn_t lblk, int force) 131{ 132 struct nandfs_indir a[NIADDR+1], *ap; 133#ifdef DEBUG 134 nandfs_daddr_t daddr; 135#endif 136 struct buf *bp; 137 int error; 138 int num, *nump; 139 140 DPRINTF(BMAP, ("%s: node %p lblk=%jx\n", __func__, node, lblk)); 141 142 ap = a; 143 nump = # 144 145 error = bmap_getlbns(node, lblk, ap, nump); 146 if (error) 147 return (error); 148 149 /* 150 * Direct block, nothing to do 151 */ 152 if (num == 0) 153 return (0); 154 155 DPRINTF(BMAP, ("%s: node %p reading blocks\n", __func__, node)); 156 157 for (bp = NULL, ++ap; --num; ap++) { 158 error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp); 159 if (error) { 160 brelse(bp); 161 return (error); 162 } 163 164#ifdef DEBUG 165 daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off]; 166 MPASS(daddr != 0 || node->nn_ino == 3); 167#endif 168 169 error = nandfs_dirty_buf_meta(bp, force); 170 if (error) 171 return (error); 172 } 173 174 return (0); 175} 176 177int 178bmap_insert_block(struct nandfs_node *node, nandfs_lbn_t lblk, 179 nandfs_daddr_t vblk) 180{ 181 struct nandfs_inode *ip; 182 struct nandfs_indir a[NIADDR+1], *ap; 183 struct buf *bp; 184 nandfs_daddr_t daddr; 185 int error; 186 int num, *nump, i; 187 188 DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx\n", __func__, node, lblk, 189 vblk)); 190 191 ip = &node->nn_inode; 192 193 ap = a; 194 nump = # 195 196 error = bmap_getlbns(node, lblk, ap, nump); 197 if (error) 198 return (error); 199 200 DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx got num=%d\n", __func__, 201 node, lblk, vblk, num)); 202 203 if (num == 0) { 204 DPRINTF(BMAP, ("%s: node %p lblk=%jx direct block\n", __func__, 205 node, lblk)); 206 ip->i_db[lblk] = vblk; 207 return (0); 208 } 209 210 DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block level %d\n", 211 __func__, node, lblk, ap->in_off)); 212 213 if (num == 1) { 214 DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block: inserting " 215 "%jx as vblk for indirect block %d\n", __func__, node, 216 lblk, vblk, ap->in_off)); 217 ip->i_ib[ap->in_off] = vblk; 218 return (0); 219 } 220 221 bp = NULL; 222 daddr = ip->i_ib[a[0].in_off]; 223 for (i = 1; i < num; i++) { 224 if (bp) 225 brelse(bp); 226 if (daddr == 0) { 227 DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx create " 228 "block %jx %d\n", __func__, node, lblk, vblk, 229 a[i].in_lbn, a[i].in_off)); 230 error = nandfs_bcreate_meta(node, a[i].in_lbn, NOCRED, 231 0, &bp); 232 if (error) 233 return (error); 234 } else { 235 DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx read " 236 "block %jx %d\n", __func__, node, daddr, vblk, 237 a[i].in_lbn, a[i].in_off)); 238 error = nandfs_bread_meta(node, a[i].in_lbn, NOCRED, 0, &bp); 239 if (error) { 240 brelse(bp); 241 return (error); 242 } 243 } 244 daddr = ((nandfs_daddr_t *)bp->b_data)[a[i].in_off]; 245 } 246 i--; 247 248 DPRINTF(BMAP, 249 ("%s: bmap node %p lblk=%jx vblk=%jx inserting vblk level %d at " 250 "offset %d at %jx\n", __func__, node, lblk, vblk, i, a[i].in_off, 251 daddr)); 252 253 if (!bp) { 254 nandfs_error("%s: cannot find indirect block\n", __func__); 255 return (-1); 256 } 257 ((nandfs_daddr_t *)bp->b_data)[a[i].in_off] = vblk; 258 259 error = nandfs_dirty_buf_meta(bp, 0); 260 if (error) { 261 nandfs_warning("%s: dirty failed buf: %p\n", __func__, bp); 262 return (error); 263 } 264 DPRINTF(BMAP, ("%s: exiting node %p lblk=%jx vblk=%jx\n", __func__, 265 node, lblk, vblk)); 266 267 return (error); 268} 269 270CTASSERT(NIADDR <= 3); 271#define SINGLE 0 /* index of single indirect block */ 272#define DOUBLE 1 /* index of double indirect block */ 273#define TRIPLE 2 /* index of triple indirect block */ 274 275static __inline nandfs_lbn_t 276lbn_offset(struct nandfs_device *fsdev, int level) 277{ 278 nandfs_lbn_t res; 279 280 for (res = 1; level > 0; level--) 281 res *= MNINDIR(fsdev); 282 return (res); 283} 284 285static nandfs_lbn_t 286blocks_inside(struct nandfs_device *fsdev, int level, struct nandfs_indir *nip) 287{ 288 nandfs_lbn_t blocks; 289 290 for (blocks = 1; level >= SINGLE; level--, nip++) { 291 MPASS(nip->in_off >= 0 && nip->in_off < MNINDIR(fsdev)); 292 blocks += nip->in_off * lbn_offset(fsdev, level); 293 } 294 295 return (blocks); 296} 297 298static int 299bmap_truncate_indirect(struct nandfs_node *node, int level, nandfs_lbn_t *left, 300 int *cleaned, struct nandfs_indir *ap, struct nandfs_indir *fp, 301 nandfs_daddr_t *copy) 302{ 303 struct buf *bp; 304 nandfs_lbn_t i, lbn, nlbn, factor, tosub; 305 struct nandfs_device *fsdev; 306 int error, lcleaned, modified; 307 308 DPRINTF(BMAP, ("%s: node %p level %d left %jx\n", __func__, 309 node, level, *left)); 310 311 fsdev = node->nn_nandfsdev; 312 313 MPASS(ap->in_off >= 0 && ap->in_off < MNINDIR(fsdev)); 314 315 factor = lbn_offset(fsdev, level); 316 lbn = ap->in_lbn; 317 318 error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp); 319 if (error) { 320 brelse(bp); 321 return (error); 322 } 323 324 bcopy(bp->b_data, copy, fsdev->nd_blocksize); 325 bqrelse(bp); 326 327 modified = 0; 328 329 i = ap->in_off; 330 331 if (ap != fp) 332 ap++; 333 for (nlbn = lbn + 1 - i * factor; i >= 0 && *left > 0; i--, 334 nlbn += factor) { 335 lcleaned = 0; 336 337 DPRINTF(BMAP, 338 ("%s: node %p i=%jx nlbn=%jx left=%jx ap=%p vblk %jx\n", 339 __func__, node, i, nlbn, *left, ap, copy[i])); 340 341 if (copy[i] == 0) { 342 tosub = blocks_inside(fsdev, level - 1, ap); 343 if (tosub > *left) 344 tosub = 0; 345 346 *left -= tosub; 347 } else { 348 if (level > SINGLE) { 349 if (ap == fp) 350 ap->in_lbn = nlbn; 351 352 error = bmap_truncate_indirect(node, level - 1, 353 left, &lcleaned, ap, fp, 354 copy + MNINDIR(fsdev)); 355 if (error) 356 return (error); 357 } else { 358 error = nandfs_bdestroy(node, copy[i]); 359 if (error) 360 return (error); 361 lcleaned = 1; 362 *left -= 1; 363 } 364 } 365 366 if (lcleaned) { 367 if (level > SINGLE) { 368 error = nandfs_vblock_end(fsdev, copy[i]); 369 if (error) 370 return (error); 371 } 372 copy[i] = 0; 373 modified++; 374 } 375 376 ap = fp; 377 } 378 379 if (i == -1) 380 *cleaned = 1; 381 382 error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp); 383 if (error) { 384 brelse(bp); 385 return (error); 386 } 387 if (modified) 388 bcopy(copy, bp->b_data, fsdev->nd_blocksize); 389 390 error = nandfs_dirty_buf_meta(bp, 0); 391 if (error) 392 return (error); 393 394 return (error); 395} 396 397int 398bmap_truncate_mapping(struct nandfs_node *node, nandfs_lbn_t lastblk, 399 nandfs_lbn_t todo) 400{ 401 struct nandfs_inode *ip; 402 struct nandfs_indir a[NIADDR + 1], f[NIADDR], *ap; 403 nandfs_daddr_t indir_lbn[NIADDR]; 404 nandfs_daddr_t *copy; 405 int error, level; 406 nandfs_lbn_t left, tosub; 407 struct nandfs_device *fsdev; 408 int cleaned, i; 409 int num, *nump; 410 411 DPRINTF(BMAP, ("%s: node %p lastblk %jx truncating by %jx\n", __func__, 412 node, lastblk, todo)); 413 414 ip = &node->nn_inode; 415 fsdev = node->nn_nandfsdev; 416 417 ap = a; 418 nump = # 419 420 error = bmap_getlbns(node, lastblk, ap, nump); 421 if (error) 422 return (error); 423 424 indir_lbn[SINGLE] = -NDADDR; 425 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - MNINDIR(fsdev) - 1; 426 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - MNINDIR(fsdev) 427 * MNINDIR(fsdev) - 1; 428 429 for (i = 0; i < NIADDR; i++) { 430 f[i].in_off = MNINDIR(fsdev) - 1; 431 f[i].in_lbn = 0xdeadbeef; 432 } 433 434 left = todo; 435 436#ifdef DEBUG 437 a[num].in_off = -1; 438#endif 439 440 ap++; 441 num -= 2; 442 443 if (num < 0) 444 goto direct; 445 446 copy = malloc(MNINDIR(fsdev) * sizeof(nandfs_daddr_t) * (num + 1), 447 M_NANDFSTEMP, M_WAITOK); 448 449 for (level = num; level >= SINGLE && left > 0; level--) { 450 cleaned = 0; 451 452 if (ip->i_ib[level] == 0) { 453 tosub = blocks_inside(fsdev, level, ap); 454 if (tosub > left) 455 left = 0; 456 else 457 left -= tosub; 458 } else { 459 if (ap == f) 460 ap->in_lbn = indir_lbn[level]; 461 error = bmap_truncate_indirect(node, level, &left, 462 &cleaned, ap, f, copy); 463 if (error) { 464 nandfs_error("%s: error %d when truncate " 465 "at level %d\n", __func__, error, level); 466 return (error); 467 } 468 } 469 470 if (cleaned) { 471 nandfs_vblock_end(fsdev, ip->i_ib[level]); 472 ip->i_ib[level] = 0; 473 } 474 475 ap = f; 476 } 477 478 free(copy, M_NANDFSTEMP); 479 480direct: 481 if (num < 0) 482 i = lastblk; 483 else 484 i = NDADDR - 1; 485 486 for (; i >= 0 && left > 0; i--) { 487 if (ip->i_db[i] != 0) { 488 error = nandfs_bdestroy(node, ip->i_db[i]); 489 if (error) { 490 nandfs_error("%s: cannot destroy " 491 "block %jx, error %d\n", __func__, 492 (uintmax_t)ip->i_db[i], error); 493 return (error); 494 } 495 ip->i_db[i] = 0; 496 } 497 498 left--; 499 } 500 501 KASSERT(left == 0, 502 ("truncated wrong number of blocks (%jd should be 0)", left)); 503 504 return (error); 505} 506 507nandfs_lbn_t 508get_maxfilesize(struct nandfs_device *fsdev) 509{ 510 struct nandfs_indir f[NIADDR]; 511 nandfs_lbn_t max; 512 int i; 513 514 max = NDADDR; 515 516 for (i = 0; i < NIADDR; i++) { 517 f[i].in_off = MNINDIR(fsdev) - 1; 518 max += blocks_inside(fsdev, i, f); 519 } 520 521 max *= fsdev->nd_blocksize; 522 523 return (max); 524} 525 526/* 527 * This is ufs_getlbns with minor modifications. 528 */ 529/* 530 * Create an array of logical block number/offset pairs which represent the 531 * path of indirect blocks required to access a data block. The first "pair" 532 * contains the logical block number of the appropriate single, double or 533 * triple indirect block and the offset into the inode indirect block array. 534 * Note, the logical block number of the inode single/double/triple indirect 535 * block appears twice in the array, once with the offset into the i_ib and 536 * once with the offset into the page itself. 537 */ 538static int 539bmap_getlbns(struct nandfs_node *node, nandfs_lbn_t bn, struct nandfs_indir *ap, int *nump) 540{ 541 nandfs_daddr_t blockcnt; 542 nandfs_lbn_t metalbn, realbn; 543 struct nandfs_device *fsdev; 544 int i, numlevels, off; 545 546 fsdev = node->nn_nandfsdev; 547 548 DPRINTF(BMAP, ("%s: node %p bn=%jx mnindir=%zd enter\n", __func__, 549 node, bn, MNINDIR(fsdev))); 550 551 if (nump) 552 *nump = 0; 553 numlevels = 0; 554 realbn = bn; 555 556 if (bn < 0) 557 bn = -bn; 558 559 /* The first NDADDR blocks are direct blocks. */ 560 if (bn < NDADDR) 561 return (0); 562 563 /* 564 * Determine the number of levels of indirection. After this loop 565 * is done, blockcnt indicates the number of data blocks possible 566 * at the previous level of indirection, and NIADDR - i is the number 567 * of levels of indirection needed to locate the requested block. 568 */ 569 for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) { 570 DPRINTF(BMAP, ("%s: blockcnt=%jd i=%d bn=%jd\n", __func__, 571 blockcnt, i, bn)); 572 if (i == 0) 573 return (EFBIG); 574 blockcnt *= MNINDIR(fsdev); 575 if (bn < blockcnt) 576 break; 577 } 578 579 /* Calculate the address of the first meta-block. */ 580 if (realbn >= 0) 581 metalbn = -(realbn - bn + NIADDR - i); 582 else 583 metalbn = -(-realbn - bn + NIADDR - i); 584 585 /* 586 * At each iteration, off is the offset into the bap array which is 587 * an array of disk addresses at the current level of indirection. 588 * The logical block number and the offset in that block are stored 589 * into the argument array. 590 */ 591 ap->in_lbn = metalbn; 592 ap->in_off = off = NIADDR - i; 593 594 DPRINTF(BMAP, ("%s: initial: ap->in_lbn=%jx ap->in_off=%d\n", __func__, 595 metalbn, off)); 596 597 ap++; 598 for (++numlevels; i <= NIADDR; i++) { 599 /* If searching for a meta-data block, quit when found. */ 600 if (metalbn == realbn) 601 break; 602 603 blockcnt /= MNINDIR(fsdev); 604 off = (bn / blockcnt) % MNINDIR(fsdev); 605 606 ++numlevels; 607 ap->in_lbn = metalbn; 608 ap->in_off = off; 609 610 DPRINTF(BMAP, ("%s: in_lbn=%jx in_off=%d\n", __func__, 611 ap->in_lbn, ap->in_off)); 612 ++ap; 613 614 metalbn -= -1 + off * blockcnt; 615 } 616 if (nump) 617 *nump = numlevels; 618 619 DPRINTF(BMAP, ("%s: numlevels=%d\n", __func__, numlevels)); 620 621 return (0); 622} 623