1/* 2 * Copyright (c) International Business Machines Corp., 2000-2002 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18/* 19 * jfs_xtree.c: extent allocation descriptor B+-tree manager 20 */ 21 22#include <linux/fs.h> 23#include "jfs_incore.h" 24#include "jfs_filsys.h" 25#include "jfs_metapage.h" 26#include "jfs_dmap.h" 27#include "jfs_dinode.h" 28#include "jfs_superblock.h" 29#include "jfs_debug.h" 30 31/* 32 * xtree local flag 33 */ 34#define XT_INSERT 0x00000001 35 36/* 37 * xtree key/entry comparison: extent offset 38 * 39 * return: 40 * -1: k < start of extent 41 * 0: start_of_extent <= k <= end_of_extent 42 * 1: k > end_of_extent 43 */ 44#define XT_CMP(CMP, K, X, OFFSET64)\ 45{\ 46 OFFSET64 = offsetXAD(X);\ 47 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 48 ((K) < OFFSET64) ? -1 : 0;\ 49} 50 51/* write a xad entry */ 52#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 53{\ 54 (XAD)->flag = (FLAG);\ 55 XADoffset((XAD), (OFF));\ 56 XADlength((XAD), (LEN));\ 57 XADaddress((XAD), (ADDR));\ 58} 59 60#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 61 62/* get page buffer for specified block address */ 63#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 64{\ 65 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ 66 if (!(RC))\ 67 {\ 68 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ 69 (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ 70 (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ 71 {\ 72 jERROR(1,("XT_GETPAGE: xtree page corrupt\n"));\ 73 BT_PUTPAGE(MP);\ 74 updateSuper((IP)->i_sb, FM_DIRTY);\ 75 MP = NULL;\ 76 RC = EIO;\ 77 }\ 78 }\ 79} 80 81/* for consistency */ 82#define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 83 84#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 85 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 86/* xtree entry parameter descriptor */ 87struct xtsplit { 88 struct metapage *mp; 89 s16 index; 90 u8 flag; 91 s64 off; 92 s64 addr; 93 int len; 94 struct pxdlist *pxdlist; 95}; 96 97 98/* 99 * statistics 100 */ 101#ifdef CONFIG_JFS_STATISTICS 102static struct { 103 uint search; 104 uint fastSearch; 105 uint split; 106} xtStat; 107#endif 108 109 110/* 111 * forward references 112 */ 113static int xtSearch(struct inode *ip, 114 s64 xoff, int *cmpp, struct btstack * btstack, int flag); 115 116static int xtSplitUp(tid_t tid, 117 struct inode *ip, 118 struct xtsplit * split, struct btstack * btstack); 119 120static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, 121 struct metapage ** rmpp, s64 * rbnp); 122 123static int xtSplitRoot(tid_t tid, struct inode *ip, 124 struct xtsplit * split, struct metapage ** rmpp); 125 126#ifdef _STILL_TO_PORT 127static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 128 xtpage_t * fp, struct btstack * btstack); 129 130static int xtSearchNode(struct inode *ip, 131 xad_t * xad, 132 int *cmpp, struct btstack * btstack, int flag); 133 134static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 135#endif /* _STILL_TO_PORT */ 136 137/* External references */ 138 139/* 140 * debug control 141 */ 142/* #define _JFS_DEBUG_XTREE 1 */ 143 144 145/* 146 * xtLookup() 147 * 148 * function: map a single page into a physical extent; 149 */ 150int xtLookup(struct inode *ip, s64 lstart, 151 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) 152{ 153 int rc = 0; 154 struct btstack btstack; 155 int cmp; 156 s64 bn; 157 struct metapage *mp; 158 xtpage_t *p; 159 int index; 160 xad_t *xad; 161 s64 size, xoff, xend; 162 int xlen; 163 s64 xaddr; 164 165 *plen = 0; 166 167 if (!no_check) { 168 /* is lookup offset beyond eof ? */ 169 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 170 JFS_SBI(ip->i_sb)->l2bsize; 171 if (lstart >= size) { 172 jERROR(1, 173 ("xtLookup: lstart (0x%lx) >= size (0x%lx)\n", 174 (ulong) lstart, (ulong) size)); 175 return 0; 176 } 177 } 178 179 /* 180 * search for the xad entry covering the logical extent 181 */ 182//search: 183 if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) { 184 jERROR(1, ("xtLookup: xtSearch returned %d\n", rc)); 185 return rc; 186 } 187 188 /* 189 * compute the physical extent covering logical extent 190 * 191 * N.B. search may have failed (e.g., hole in sparse file), 192 * and returned the index of the next entry. 193 */ 194 /* retrieve search result */ 195 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 196 197 /* is xad found covering start of logical extent ? 198 * lstart is a page start address, 199 * i.e., lstart cannot start in a hole; 200 */ 201 if (cmp) { 202 jFYI(1, ("xtLookup: cmp = %d\n", cmp)); 203 goto out; 204 } 205 206 /* 207 * lxd covered by xad 208 */ 209 xad = &p->xad[index]; 210 xoff = offsetXAD(xad); 211 xlen = lengthXAD(xad); 212 xend = xoff + xlen; 213 xaddr = addressXAD(xad); 214 215 jEVENT(0, 216 ("index = %d, xoff = 0x%lx, xlen = 0x%x, xaddr = 0x%lx\n", 217 index, (ulong) xoff, xlen, (ulong) xaddr)); 218 219 /* initialize new pxd */ 220 *pflag = xad->flag; 221 *paddr = xaddr + (lstart - xoff); 222 /* a page must be fully covered by an xad */ 223 *plen = min(xend - lstart, llen); 224 225 out: 226 XT_PUTPAGE(mp); 227 228 return rc; 229} 230 231 232/* 233 * xtLookupList() 234 * 235 * function: map a single logical extent into a list of physical extent; 236 * 237 * parameter: 238 * struct inode *ip, 239 * struct lxdlist *lxdlist, lxd list (in) 240 * struct xadlist *xadlist, xad list (in/out) 241 * int flag) 242 * 243 * coverage of lxd by xad under assumption of 244 * . lxd's are ordered and disjoint. 245 * . xad's are ordered and disjoint. 246 * 247 * return: 248 * 0: success 249 * 250 * note: a page being written (even a single byte) is backed fully, 251 * except the last page which is only backed with blocks 252 * required to cover the last byte; 253 * the extent backing a page is fully contained within an xad; 254 */ 255int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, 256 struct xadlist * xadlist, int flag) 257{ 258 int rc = 0; 259 struct btstack btstack; 260 int cmp; 261 s64 bn; 262 struct metapage *mp; 263 xtpage_t *p; 264 int index; 265 lxd_t *lxd; 266 xad_t *xad, *pxd; 267 s64 size, lstart, lend, xstart, xend, pstart; 268 s64 llen, xlen, plen; 269 s64 xaddr, paddr; 270 int nlxd, npxd, maxnpxd; 271 272 npxd = xadlist->nxad = 0; 273 maxnpxd = xadlist->maxnxad; 274 pxd = xadlist->xad; 275 276 nlxd = lxdlist->nlxd; 277 lxd = lxdlist->lxd; 278 279 lstart = offsetLXD(lxd); 280 llen = lengthLXD(lxd); 281 lend = lstart + llen; 282 283 size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 284 JFS_SBI(ip->i_sb)->l2bsize; 285 286 /* 287 * search for the xad entry covering the logical extent 288 */ 289 search: 290 if (lstart >= size) 291 return 0; 292 293 if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) 294 return rc; 295 296 /* 297 * compute the physical extent covering logical extent 298 * 299 * N.B. search may have failed (e.g., hole in sparse file), 300 * and returned the index of the next entry. 301 */ 302//map: 303 /* retrieve search result */ 304 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 305 306 /* is xad on the next sibling page ? */ 307 if (index == le16_to_cpu(p->header.nextindex)) { 308 if (p->header.flag & BT_ROOT) 309 goto mapend; 310 311 if ((bn = le64_to_cpu(p->header.next)) == 0) 312 goto mapend; 313 314 XT_PUTPAGE(mp); 315 316 /* get next sibling page */ 317 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 318 if (rc) 319 return rc; 320 321 index = XTENTRYSTART; 322 } 323 324 xad = &p->xad[index]; 325 326 /* 327 * is lxd covered by xad ? 328 */ 329 compare: 330 xstart = offsetXAD(xad); 331 xlen = lengthXAD(xad); 332 xend = xstart + xlen; 333 xaddr = addressXAD(xad); 334 335 compare1: 336 if (xstart < lstart) 337 goto compare2; 338 339 /* (lstart <= xstart) */ 340 341 /* lxd is NOT covered by xad */ 342 if (lend <= xstart) { 343 /* 344 * get next lxd 345 */ 346 if (--nlxd == 0) 347 goto mapend; 348 lxd++; 349 350 lstart = offsetLXD(lxd); 351 llen = lengthLXD(lxd); 352 lend = lstart + llen; 353 if (lstart >= size) 354 goto mapend; 355 356 /* compare with the current xad */ 357 goto compare1; 358 } 359 /* lxd is covered by xad */ 360 else { /* (xstart < lend) */ 361 362 /* initialize new pxd */ 363 pstart = xstart; 364 plen = min(lend - xstart, xlen); 365 paddr = xaddr; 366 367 goto cover; 368 } 369 370 /* (xstart < lstart) */ 371 compare2: 372 /* lxd is covered by xad */ 373 if (lstart < xend) { 374 /* initialize new pxd */ 375 pstart = lstart; 376 plen = min(xend - lstart, llen); 377 paddr = xaddr + (lstart - xstart); 378 379 goto cover; 380 } 381 /* lxd is NOT covered by xad */ 382 else { /* (xend <= lstart) */ 383 384 /* 385 * get next xad 386 * 387 * linear search next xad covering lxd on 388 * the current xad page, and then tree search 389 */ 390 if (index == le16_to_cpu(p->header.nextindex) - 1) { 391 if (p->header.flag & BT_ROOT) 392 goto mapend; 393 394 XT_PUTPAGE(mp); 395 goto search; 396 } else { 397 index++; 398 xad++; 399 400 /* compare with new xad */ 401 goto compare; 402 } 403 } 404 405 /* 406 * lxd is covered by xad and a new pxd has been initialized 407 * (lstart <= xstart < lend) or (xstart < lstart < xend) 408 */ 409 cover: 410 /* finalize pxd corresponding to current xad */ 411 XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr); 412 413 if (++npxd >= maxnpxd) 414 goto mapend; 415 pxd++; 416 417 /* 418 * lxd is fully covered by xad 419 */ 420 if (lend <= xend) { 421 /* 422 * get next lxd 423 */ 424 if (--nlxd == 0) 425 goto mapend; 426 lxd++; 427 428 lstart = offsetLXD(lxd); 429 llen = lengthLXD(lxd); 430 lend = lstart + llen; 431 if (lstart >= size) 432 goto mapend; 433 434 /* 435 * test for old xad covering new lxd 436 * (old xstart < new lstart) 437 */ 438 goto compare2; 439 } 440 /* 441 * lxd is partially covered by xad 442 */ 443 else { /* (xend < lend) */ 444 445 /* 446 * get next xad 447 * 448 * linear search next xad covering lxd on 449 * the current xad page, and then next xad page search 450 */ 451 if (index == le16_to_cpu(p->header.nextindex) - 1) { 452 if (p->header.flag & BT_ROOT) 453 goto mapend; 454 455 if ((bn = le64_to_cpu(p->header.next)) == 0) 456 goto mapend; 457 458 XT_PUTPAGE(mp); 459 460 /* get next sibling page */ 461 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 462 if (rc) 463 return rc; 464 465 index = XTENTRYSTART; 466 xad = &p->xad[index]; 467 } else { 468 index++; 469 xad++; 470 } 471 472 /* 473 * test for new xad covering old lxd 474 * (old lstart < new xstart) 475 */ 476 goto compare; 477 } 478 479 mapend: 480 xadlist->nxad = npxd; 481 482//out: 483 XT_PUTPAGE(mp); 484 485 return rc; 486} 487 488 489/* 490 * xtSearch() 491 * 492 * function: search for the xad entry covering specified offset. 493 * 494 * parameters: 495 * ip - file object; 496 * xoff - extent offset; 497 * cmpp - comparison result: 498 * btstack - traverse stack; 499 * flag - search process flag (XT_INSERT); 500 * 501 * returns: 502 * btstack contains (bn, index) of search path traversed to the entry. 503 * *cmpp is set to result of comparison with the entry returned. 504 * the page containing the entry is pinned at exit. 505 */ 506static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ 507 int *cmpp, struct btstack * btstack, int flag) 508{ 509 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 510 int rc = 0; 511 int cmp = 1; /* init for empty page */ 512 s64 bn; /* block number */ 513 struct metapage *mp; /* page buffer */ 514 xtpage_t *p; /* page */ 515 xad_t *xad; 516 int base, index, lim, btindex; 517 struct btframe *btsp; 518 int nsplit = 0; /* number of pages to split */ 519 s64 t64; 520 521 INCREMENT(xtStat.search); 522 523 BT_CLR(btstack); 524 525 btstack->nsplit = 0; 526 527 /* 528 * search down tree from root: 529 * 530 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 531 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 532 * 533 * if entry with search key K is not found 534 * internal page search find the entry with largest key Ki 535 * less than K which point to the child page to search; 536 * leaf page search find the entry with smallest key Kj 537 * greater than K so that the returned index is the position of 538 * the entry to be shifted right for insertion of new entry. 539 * for empty tree, search key is greater than any key of the tree. 540 * 541 * by convention, root bn = 0. 542 */ 543 for (bn = 0;;) { 544 /* get/pin the page to search */ 545 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 546 if (rc) 547 return rc; 548 549 /* try sequential access heuristics with the previous 550 * access entry in target leaf page: 551 * once search narrowed down into the target leaf, 552 * key must either match an entry in the leaf or 553 * key entry does not exist in the tree; 554 */ 555//fastSearch: 556 if ((jfs_ip->btorder & BT_SEQUENTIAL) && 557 (p->header.flag & BT_LEAF) && 558 (index = jfs_ip->btindex) < 559 le16_to_cpu(p->header.nextindex)) { 560 xad = &p->xad[index]; 561 t64 = offsetXAD(xad); 562 if (xoff < t64 + lengthXAD(xad)) { 563 if (xoff >= t64) { 564 *cmpp = 0; 565 goto out; 566 } 567 568 /* stop sequential access heuristics */ 569 goto binarySearch; 570 } else { /* (t64 + lengthXAD(xad)) <= xoff */ 571 572 /* try next sequential entry */ 573 index++; 574 if (index < 575 le16_to_cpu(p->header.nextindex)) { 576 xad++; 577 t64 = offsetXAD(xad); 578 if (xoff < t64 + lengthXAD(xad)) { 579 if (xoff >= t64) { 580 *cmpp = 0; 581 goto out; 582 } 583 584 /* miss: key falls between 585 * previous and this entry 586 */ 587 *cmpp = 1; 588 goto out; 589 } 590 591 /* (xoff >= t64 + lengthXAD(xad)); 592 * matching entry may be further out: 593 * stop heuristic search 594 */ 595 /* stop sequential access heuristics */ 596 goto binarySearch; 597 } 598 599 /* (index == p->header.nextindex); 600 * miss: key entry does not exist in 601 * the target leaf/tree 602 */ 603 *cmpp = 1; 604 goto out; 605 } 606 607 /* 608 * if hit, return index of the entry found, and 609 * if miss, where new entry with search key is 610 * to be inserted; 611 */ 612 out: 613 /* compute number of pages to split */ 614 if (flag & XT_INSERT) { 615 if (p->header.nextindex == /* little-endian */ 616 p->header.maxentry) 617 nsplit++; 618 else 619 nsplit = 0; 620 btstack->nsplit = nsplit; 621 } 622 623 /* save search result */ 624 btsp = btstack->top; 625 btsp->bn = bn; 626 btsp->index = index; 627 btsp->mp = mp; 628 629 /* update sequential access heuristics */ 630 jfs_ip->btindex = index; 631 632 INCREMENT(xtStat.fastSearch); 633 return 0; 634 } 635 636 /* well, ... full search now */ 637 binarySearch: 638 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 639 640 /* 641 * binary search with search key K on the current page 642 */ 643 for (base = XTENTRYSTART; lim; lim >>= 1) { 644 index = base + (lim >> 1); 645 646 XT_CMP(cmp, xoff, &p->xad[index], t64); 647 if (cmp == 0) { 648 /* 649 * search hit 650 */ 651 /* search hit - leaf page: 652 * return the entry found 653 */ 654 if (p->header.flag & BT_LEAF) { 655 *cmpp = cmp; 656 657 /* compute number of pages to split */ 658 if (flag & XT_INSERT) { 659 if (p->header.nextindex == 660 p->header.maxentry) 661 nsplit++; 662 else 663 nsplit = 0; 664 btstack->nsplit = nsplit; 665 } 666 667 /* save search result */ 668 btsp = btstack->top; 669 btsp->bn = bn; 670 btsp->index = index; 671 btsp->mp = mp; 672 673 /* init sequential access heuristics */ 674 btindex = jfs_ip->btindex; 675 if (index == btindex || 676 index == btindex + 1) 677 jfs_ip->btorder = BT_SEQUENTIAL; 678 else 679 jfs_ip->btorder = BT_RANDOM; 680 jfs_ip->btindex = index; 681 682 return 0; 683 } 684 685 /* search hit - internal page: 686 * descend/search its child page 687 */ 688 goto next; 689 } 690 691 if (cmp > 0) { 692 base = index + 1; 693 --lim; 694 } 695 } 696 697 /* 698 * search miss 699 * 700 * base is the smallest index with key (Kj) greater than 701 * search key (K) and may be zero or maxentry index. 702 */ 703 /* 704 * search miss - leaf page: 705 * 706 * return location of entry (base) where new entry with 707 * search key K is to be inserted. 708 */ 709 if (p->header.flag & BT_LEAF) { 710 *cmpp = cmp; 711 712 /* compute number of pages to split */ 713 if (flag & XT_INSERT) { 714 if (p->header.nextindex == 715 p->header.maxentry) 716 nsplit++; 717 else 718 nsplit = 0; 719 btstack->nsplit = nsplit; 720 } 721 722 /* save search result */ 723 btsp = btstack->top; 724 btsp->bn = bn; 725 btsp->index = base; 726 btsp->mp = mp; 727 728 /* init sequential access heuristics */ 729 btindex = jfs_ip->btindex; 730 if (base == btindex || base == btindex + 1) 731 jfs_ip->btorder = BT_SEQUENTIAL; 732 else 733 jfs_ip->btorder = BT_RANDOM; 734 jfs_ip->btindex = base; 735 736 return 0; 737 } 738 739 /* 740 * search miss - non-leaf page: 741 * 742 * if base is non-zero, decrement base by one to get the parent 743 * entry of the child page to search. 744 */ 745 index = base ? base - 1 : base; 746 747 /* 748 * go down to child page 749 */ 750 next: 751 /* update number of pages to split */ 752 if (p->header.nextindex == p->header.maxentry) 753 nsplit++; 754 else 755 nsplit = 0; 756 757 /* push (bn, index) of the parent page/entry */ 758 BT_PUSH(btstack, bn, index); 759 760 /* get the child page block number */ 761 bn = addressXAD(&p->xad[index]); 762 763 /* unpin the parent page */ 764 XT_PUTPAGE(mp); 765 } 766} 767 768/* 769 * xtInsert() 770 * 771 * function: 772 * 773 * parameter: 774 * tid - transaction id; 775 * ip - file object; 776 * xflag - extent flag (XAD_NOTRECORDED): 777 * xoff - extent offset; 778 * xlen - extent length; 779 * xaddrp - extent address pointer (in/out): 780 * if (*xaddrp) 781 * caller allocated data extent at *xaddrp; 782 * else 783 * allocate data extent and return its xaddr; 784 * flag - 785 * 786 * return: 787 */ 788int xtInsert(tid_t tid, /* transaction id */ 789 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, 790 int flag) 791{ 792 int rc = 0; 793 s64 xaddr, hint; 794 struct metapage *mp; /* meta-page buffer */ 795 xtpage_t *p; /* base B+-tree index page */ 796 s64 bn; 797 int index, nextindex; 798 struct btstack btstack; /* traverse stack */ 799 struct xtsplit split; /* split information */ 800 xad_t *xad; 801 int cmp; 802 struct tlock *tlck; 803 struct xtlock *xtlck; 804 805 jFYI(1, 806 ("xtInsert: nxoff:0x%lx nxlen:0x%x\n", (ulong) xoff, xlen)); 807 808 /* 809 * search for the entry location at which to insert: 810 * 811 * xtFastSearch() and xtSearch() both returns (leaf page 812 * pinned, index at which to insert). 813 * n.b. xtSearch() may return index of maxentry of 814 * the full page. 815 */ 816 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) 817 return rc; 818 819 /* retrieve search result */ 820 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 821 822 /* This test must follow XT_GETSEARCH since mp must be valid if 823 * we branch to out: */ 824 if (cmp == 0) { 825 rc = EEXIST; 826 goto out; 827 } 828 829 /* 830 * allocate data extent requested 831 * 832 * allocation hint: last xad 833 */ 834 if ((xaddr = *xaddrp) == 0) { 835 if (index > XTENTRYSTART) { 836 xad = &p->xad[index - 1]; 837 hint = addressXAD(xad) + lengthXAD(xad) - 1; 838 } else 839 hint = 0; 840 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) 841 goto out; 842 } 843 844 /* 845 * insert entry for new extent 846 */ 847 xflag |= XAD_NEW; 848 849 /* 850 * if the leaf page is full, split the page and 851 * propagate up the router entry for the new page from split 852 * 853 * The xtSplitUp() will insert the entry and unpin the leaf page. 854 */ 855 nextindex = le16_to_cpu(p->header.nextindex); 856 if (nextindex == le16_to_cpu(p->header.maxentry)) { 857 split.mp = mp; 858 split.index = index; 859 split.flag = xflag; 860 split.off = xoff; 861 split.len = xlen; 862 split.addr = xaddr; 863 split.pxdlist = NULL; 864 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 865 /* undo data extent allocation */ 866 if (*xaddrp == 0) 867 dbFree(ip, xaddr, (s64) xlen); 868 return rc; 869 } 870 871 *xaddrp = xaddr; 872 return 0; 873 } 874 875 /* 876 * insert the new entry into the leaf page 877 */ 878 /* 879 * acquire a transaction lock on the leaf page; 880 * 881 * action: xad insertion/extension; 882 */ 883 BT_MARK_DIRTY(mp, ip); 884 885 /* if insert into middle, shift right remaining entries. */ 886 if (index < nextindex) 887 memmove(&p->xad[index + 1], &p->xad[index], 888 (nextindex - index) * sizeof(xad_t)); 889 890 /* insert the new entry: mark the entry NEW */ 891 xad = &p->xad[index]; 892 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 893 894 /* advance next available entry index */ 895 p->header.nextindex = 896 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 897 898 /* Don't log it if there are no links to the file */ 899 if (!test_cflag(COMMIT_Nolink, ip)) { 900 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 901 xtlck = (struct xtlock *) & tlck->lock; 902 xtlck->lwm.offset = 903 (xtlck->lwm.offset) ? min(index, 904 (int)xtlck->lwm.offset) : index; 905 xtlck->lwm.length = 906 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 907 } 908 909 *xaddrp = xaddr; 910 911 out: 912 /* unpin the leaf page */ 913 XT_PUTPAGE(mp); 914 915 return rc; 916} 917 918 919/* 920 * xtSplitUp() 921 * 922 * function: 923 * split full pages as propagating insertion up the tree 924 * 925 * parameter: 926 * tid - transaction id; 927 * ip - file object; 928 * split - entry parameter descriptor; 929 * btstack - traverse stack from xtSearch() 930 * 931 * return: 932 */ 933static int 934xtSplitUp(tid_t tid, 935 struct inode *ip, struct xtsplit * split, struct btstack * btstack) 936{ 937 int rc = 0; 938 struct metapage *smp; 939 xtpage_t *sp; /* split page */ 940 struct metapage *rmp; 941 s64 rbn; /* new right page block number */ 942 struct metapage *rcmp; 943 xtpage_t *rcp; /* right child page */ 944 s64 rcbn; /* right child page block number */ 945 int skip; /* index of entry of insertion */ 946 int nextindex; /* next available entry index of p */ 947 struct btframe *parent; /* parent page entry on traverse stack */ 948 xad_t *xad; 949 s64 xaddr; 950 int xlen; 951 int nsplit; /* number of pages split */ 952 struct pxdlist pxdlist; 953 pxd_t *pxd; 954 struct tlock *tlck; 955 struct xtlock *xtlck; 956 957 smp = split->mp; 958 sp = XT_PAGE(ip, smp); 959 960 /* is inode xtree root extension/inline EA area free ? */ 961 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && 962 (sp->header.maxentry < cpu_to_le16(XTROOTMAXSLOT)) && 963 (JFS_IP(ip)->mode2 & INLINEEA)) { 964 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); 965 JFS_IP(ip)->mode2 &= ~INLINEEA; 966 967 BT_MARK_DIRTY(smp, ip); 968 /* 969 * acquire a transaction lock on the leaf page; 970 * 971 * action: xad insertion/extension; 972 */ 973 974 /* if insert into middle, shift right remaining entries. */ 975 skip = split->index; 976 nextindex = le16_to_cpu(sp->header.nextindex); 977 if (skip < nextindex) 978 memmove(&sp->xad[skip + 1], &sp->xad[skip], 979 (nextindex - skip) * sizeof(xad_t)); 980 981 /* insert the new entry: mark the entry NEW */ 982 xad = &sp->xad[skip]; 983 XT_PUTENTRY(xad, split->flag, split->off, split->len, 984 split->addr); 985 986 /* advance next available entry index */ 987 sp->header.nextindex = 988 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1); 989 990 /* Don't log it if there are no links to the file */ 991 if (!test_cflag(COMMIT_Nolink, ip)) { 992 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 993 xtlck = (struct xtlock *) & tlck->lock; 994 xtlck->lwm.offset = (xtlck->lwm.offset) ? 995 min(skip, (int)xtlck->lwm.offset) : skip; 996 xtlck->lwm.length = 997 le16_to_cpu(sp->header.nextindex) - 998 xtlck->lwm.offset; 999 } 1000 1001 return 0; 1002 } 1003 1004 /* 1005 * allocate new index blocks to cover index page split(s) 1006 * 1007 * allocation hint: ? 1008 */ 1009 if (split->pxdlist == NULL) { 1010 nsplit = btstack->nsplit; 1011 split->pxdlist = &pxdlist; 1012 pxdlist.maxnpxd = pxdlist.npxd = 0; 1013 pxd = &pxdlist.pxd[0]; 1014 xlen = JFS_SBI(ip->i_sb)->nbperpage; 1015 for (; nsplit > 0; nsplit--, pxd++) { 1016 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) 1017 == 0) { 1018 PXDaddress(pxd, xaddr); 1019 PXDlength(pxd, xlen); 1020 1021 pxdlist.maxnpxd++; 1022 1023 continue; 1024 } 1025 1026 /* undo allocation */ 1027 1028 XT_PUTPAGE(smp); 1029 return rc; 1030 } 1031 } 1032 1033 /* 1034 * Split leaf page <sp> into <sp> and a new right page <rp>. 1035 * 1036 * The split routines insert the new entry into the leaf page, 1037 * and acquire txLock as appropriate. 1038 * return <rp> pinned and its block number <rpbn>. 1039 */ 1040 rc = (sp->header.flag & BT_ROOT) ? 1041 xtSplitRoot(tid, ip, split, &rmp) : 1042 xtSplitPage(tid, ip, split, &rmp, &rbn); 1043 if (rc) 1044 return EIO; 1045 1046 XT_PUTPAGE(smp); 1047 1048 /* 1049 * propagate up the router entry for the leaf page just split 1050 * 1051 * insert a router entry for the new page into the parent page, 1052 * propagate the insert/split up the tree by walking back the stack 1053 * of (bn of parent page, index of child page entry in parent page) 1054 * that were traversed during the search for the page that split. 1055 * 1056 * the propagation of insert/split up the tree stops if the root 1057 * splits or the page inserted into doesn't have to split to hold 1058 * the new entry. 1059 * 1060 * the parent entry for the split page remains the same, and 1061 * a new entry is inserted at its right with the first key and 1062 * block number of the new right page. 1063 * 1064 * There are a maximum of 3 pages pinned at any time: 1065 * right child, left parent and right parent (when the parent splits) 1066 * to keep the child page pinned while working on the parent. 1067 * make sure that all pins are released at exit. 1068 */ 1069 while ((parent = BT_POP(btstack)) != NULL) { 1070 /* parent page specified by stack frame <parent> */ 1071 1072 /* keep current child pages <rcp> pinned */ 1073 rcmp = rmp; 1074 rcbn = rbn; 1075 rcp = XT_PAGE(ip, rcmp); 1076 1077 /* 1078 * insert router entry in parent for new right child page <rp> 1079 */ 1080 /* get/pin the parent page <sp> */ 1081 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1082 if (rc) 1083 goto errout2; 1084 1085 /* 1086 * The new key entry goes ONE AFTER the index of parent entry, 1087 * because the split was to the right. 1088 */ 1089 skip = parent->index + 1; 1090 1091 /* 1092 * split or shift right remaining entries of the parent page 1093 */ 1094 nextindex = le16_to_cpu(sp->header.nextindex); 1095 /* 1096 * parent page is full - split the parent page 1097 */ 1098 if (nextindex == le16_to_cpu(sp->header.maxentry)) { 1099 /* init for parent page split */ 1100 split->mp = smp; 1101 split->index = skip; /* index at insert */ 1102 split->flag = XAD_NEW; 1103 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); 1104 split->len = JFS_SBI(ip->i_sb)->nbperpage; 1105 split->addr = rcbn; 1106 1107 /* unpin previous right child page */ 1108 XT_PUTPAGE(rcmp); 1109 1110 /* The split routines insert the new entry, 1111 * and acquire txLock as appropriate. 1112 * return <rp> pinned and its block number <rpbn>. 1113 */ 1114 rc = (sp->header.flag & BT_ROOT) ? 1115 xtSplitRoot(tid, ip, split, &rmp) : 1116 xtSplitPage(tid, ip, split, &rmp, &rbn); 1117 if (rc) 1118 goto errout1; 1119 1120 XT_PUTPAGE(smp); 1121 /* keep new child page <rp> pinned */ 1122 } 1123 /* 1124 * parent page is not full - insert in parent page 1125 */ 1126 else { 1127 /* 1128 * insert router entry in parent for the right child 1129 * page from the first entry of the right child page: 1130 */ 1131 /* 1132 * acquire a transaction lock on the parent page; 1133 * 1134 * action: router xad insertion; 1135 */ 1136 BT_MARK_DIRTY(smp, ip); 1137 1138 /* 1139 * if insert into middle, shift right remaining entries 1140 */ 1141 if (skip < nextindex) 1142 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1143 (nextindex - 1144 skip) << L2XTSLOTSIZE); 1145 1146 /* insert the router entry */ 1147 xad = &sp->xad[skip]; 1148 XT_PUTENTRY(xad, XAD_NEW, 1149 offsetXAD(&rcp->xad[XTENTRYSTART]), 1150 JFS_SBI(ip->i_sb)->nbperpage, rcbn); 1151 1152 /* advance next available entry index. */ 1153 sp->header.nextindex = 1154 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1155 1); 1156 1157 /* Don't log it if there are no links to the file */ 1158 if (!test_cflag(COMMIT_Nolink, ip)) { 1159 tlck = txLock(tid, ip, smp, 1160 tlckXTREE | tlckGROW); 1161 xtlck = (struct xtlock *) & tlck->lock; 1162 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1163 min(skip, (int)xtlck->lwm.offset) : skip; 1164 xtlck->lwm.length = 1165 le16_to_cpu(sp->header.nextindex) - 1166 xtlck->lwm.offset; 1167 } 1168 1169 /* unpin parent page */ 1170 XT_PUTPAGE(smp); 1171 1172 /* exit propagate up */ 1173 break; 1174 } 1175 } 1176 1177 /* unpin current right page */ 1178 XT_PUTPAGE(rmp); 1179 1180 return 0; 1181 1182 /* 1183 * If something fails in the above loop we were already walking back 1184 * up the tree and the tree is now inconsistent. 1185 * release all pages we're holding. 1186 */ 1187 errout1: 1188 XT_PUTPAGE(smp); 1189 1190 errout2: 1191 XT_PUTPAGE(rcmp); 1192 1193 return rc; 1194} 1195 1196 1197/* 1198 * xtSplitPage() 1199 * 1200 * function: 1201 * split a full non-root page into 1202 * original/split/left page and new right page 1203 * i.e., the original/split page remains as left page. 1204 * 1205 * parameter: 1206 * int tid, 1207 * struct inode *ip, 1208 * struct xtsplit *split, 1209 * struct metapage **rmpp, 1210 * u64 *rbnp, 1211 * 1212 * return: 1213 * Pointer to page in which to insert or NULL on error. 1214 */ 1215static int 1216xtSplitPage(tid_t tid, struct inode *ip, 1217 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) 1218{ 1219 int rc = 0; 1220 struct metapage *smp; 1221 xtpage_t *sp; 1222 struct metapage *rmp; 1223 xtpage_t *rp; /* new right page allocated */ 1224 s64 rbn; /* new right page block number */ 1225 struct metapage *mp; 1226 xtpage_t *p; 1227 s64 nextbn; 1228 int skip, maxentry, middle, righthalf, n; 1229 xad_t *xad; 1230 struct pxdlist *pxdlist; 1231 pxd_t *pxd; 1232 struct tlock *tlck; 1233 struct xtlock *sxtlck = 0, *rxtlck = 0; 1234 1235 smp = split->mp; 1236 sp = XT_PAGE(ip, smp); 1237 1238 INCREMENT(xtStat.split); 1239 1240 /* 1241 * allocate the new right page for the split 1242 */ 1243 pxdlist = split->pxdlist; 1244 pxd = &pxdlist->pxd[pxdlist->npxd]; 1245 pxdlist->npxd++; 1246 rbn = addressPXD(pxd); 1247 rmp = get_metapage(ip, rbn, PSIZE, 1); 1248 if (rmp == NULL) 1249 return EIO; 1250 1251 jEVENT(0, 1252 ("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p\n", ip, smp, rmp)); 1253 1254 BT_MARK_DIRTY(rmp, ip); 1255 /* 1256 * action: new page; 1257 */ 1258 1259 rp = (xtpage_t *) rmp->data; 1260 rp->header.self = *pxd; 1261 rp->header.flag = sp->header.flag & BT_TYPE; 1262 rp->header.maxentry = sp->header.maxentry; /* little-endian */ 1263 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1264 1265 BT_MARK_DIRTY(smp, ip); 1266 /* Don't log it if there are no links to the file */ 1267 if (!test_cflag(COMMIT_Nolink, ip)) { 1268 /* 1269 * acquire a transaction lock on the new right page; 1270 */ 1271 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1272 rxtlck = (struct xtlock *) & tlck->lock; 1273 rxtlck->lwm.offset = XTENTRYSTART; 1274 /* 1275 * acquire a transaction lock on the split page 1276 */ 1277 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1278 sxtlck = (struct xtlock *) & tlck->lock; 1279 } 1280 1281 /* 1282 * initialize/update sibling pointers of <sp> and <rp> 1283 */ 1284 nextbn = le64_to_cpu(sp->header.next); 1285 rp->header.next = cpu_to_le64(nextbn); 1286 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1287 sp->header.next = cpu_to_le64(rbn); 1288 1289 skip = split->index; 1290 1291 /* 1292 * sequential append at tail (after last entry of last page) 1293 * 1294 * if splitting the last page on a level because of appending 1295 * a entry to it (skip is maxentry), it's likely that the access is 1296 * sequential. adding an empty page on the side of the level is less 1297 * work and can push the fill factor much higher than normal. 1298 * if we're wrong it's no big deal - we will do the split the right 1299 * way next time. 1300 * (it may look like it's equally easy to do a similar hack for 1301 * reverse sorted data, that is, split the tree left, but it's not. 1302 * Be my guest.) 1303 */ 1304 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { 1305 /* 1306 * acquire a transaction lock on the new/right page; 1307 * 1308 * action: xad insertion; 1309 */ 1310 /* insert entry at the first entry of the new right page */ 1311 xad = &rp->xad[XTENTRYSTART]; 1312 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1313 split->addr); 1314 1315 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1316 1317 if (!test_cflag(COMMIT_Nolink, ip)) { 1318 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1319 rxtlck->lwm.length = 1; 1320 } 1321 1322 *rmpp = rmp; 1323 *rbnp = rbn; 1324 1325 ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd)); 1326 1327 jEVENT(0, ("xtSplitPage: sp:0x%p rp:0x%p\n", sp, rp)); 1328 return 0; 1329 } 1330 1331 /* 1332 * non-sequential insert (at possibly middle page) 1333 */ 1334 1335 /* 1336 * update previous pointer of old next/right page of <sp> 1337 */ 1338 if (nextbn != 0) { 1339 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1340 if (rc) { 1341 XT_PUTPAGE(rmp); 1342 return rc; 1343 } 1344 1345 BT_MARK_DIRTY(mp, ip); 1346 /* 1347 * acquire a transaction lock on the next page; 1348 * 1349 * action:sibling pointer update; 1350 */ 1351 if (!test_cflag(COMMIT_Nolink, ip)) 1352 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 1353 1354 p->header.prev = cpu_to_le64(rbn); 1355 1356 /* sibling page may have been updated previously, or 1357 * it may be updated later; 1358 */ 1359 1360 XT_PUTPAGE(mp); 1361 } 1362 1363 /* 1364 * split the data between the split and new/right pages 1365 */ 1366 maxentry = le16_to_cpu(sp->header.maxentry); 1367 middle = maxentry >> 1; 1368 righthalf = maxentry - middle; 1369 1370 /* 1371 * skip index in old split/left page - insert into left page: 1372 */ 1373 if (skip <= middle) { 1374 /* move right half of split page to the new right page */ 1375 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1376 righthalf << L2XTSLOTSIZE); 1377 1378 /* shift right tail of left half to make room for new entry */ 1379 if (skip < middle) 1380 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1381 (middle - skip) << L2XTSLOTSIZE); 1382 1383 /* insert new entry */ 1384 xad = &sp->xad[skip]; 1385 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1386 split->addr); 1387 1388 /* update page header */ 1389 sp->header.nextindex = cpu_to_le16(middle + 1); 1390 if (!test_cflag(COMMIT_Nolink, ip)) { 1391 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1392 min(skip, (int)sxtlck->lwm.offset) : skip; 1393 } 1394 1395 rp->header.nextindex = 1396 cpu_to_le16(XTENTRYSTART + righthalf); 1397 } 1398 /* 1399 * skip index in new right page - insert into right page: 1400 */ 1401 else { 1402 /* move left head of right half to right page */ 1403 n = skip - middle; 1404 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1405 n << L2XTSLOTSIZE); 1406 1407 /* insert new entry */ 1408 n += XTENTRYSTART; 1409 xad = &rp->xad[n]; 1410 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1411 split->addr); 1412 1413 /* move right tail of right half to right page */ 1414 if (skip < maxentry) 1415 memmove(&rp->xad[n + 1], &sp->xad[skip], 1416 (maxentry - skip) << L2XTSLOTSIZE); 1417 1418 /* update page header */ 1419 sp->header.nextindex = cpu_to_le16(middle); 1420 if (!test_cflag(COMMIT_Nolink, ip)) { 1421 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1422 min(middle, (int)sxtlck->lwm.offset) : middle; 1423 } 1424 1425 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1426 righthalf + 1); 1427 } 1428 1429 if (!test_cflag(COMMIT_Nolink, ip)) { 1430 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - 1431 sxtlck->lwm.offset; 1432 1433 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1434 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1435 XTENTRYSTART; 1436 } 1437 1438 *rmpp = rmp; 1439 *rbnp = rbn; 1440 1441 ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd)); 1442 1443 jEVENT(0, ("xtSplitPage: sp:0x%p rp:0x%p\n", sp, rp)); 1444 return rc; 1445} 1446 1447 1448/* 1449 * xtSplitRoot() 1450 * 1451 * function: 1452 * split the full root page into 1453 * original/root/split page and new right page 1454 * i.e., root remains fixed in tree anchor (inode) and 1455 * the root is copied to a single new right child page 1456 * since root page << non-root page, and 1457 * the split root page contains a single entry for the 1458 * new right child page. 1459 * 1460 * parameter: 1461 * int tid, 1462 * struct inode *ip, 1463 * struct xtsplit *split, 1464 * struct metapage **rmpp) 1465 * 1466 * return: 1467 * Pointer to page in which to insert or NULL on error. 1468 */ 1469static int 1470xtSplitRoot(tid_t tid, 1471 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) 1472{ 1473 xtpage_t *sp; 1474 struct metapage *rmp; 1475 xtpage_t *rp; 1476 s64 rbn; 1477 int skip, nextindex; 1478 xad_t *xad; 1479 pxd_t *pxd; 1480 struct pxdlist *pxdlist; 1481 struct tlock *tlck; 1482 struct xtlock *xtlck; 1483 1484 sp = &JFS_IP(ip)->i_xtroot; 1485 1486 INCREMENT(xtStat.split); 1487 1488 /* 1489 * allocate a single (right) child page 1490 */ 1491 pxdlist = split->pxdlist; 1492 pxd = &pxdlist->pxd[pxdlist->npxd]; 1493 pxdlist->npxd++; 1494 rbn = addressPXD(pxd); 1495 rmp = get_metapage(ip, rbn, PSIZE, 1); 1496 if (rmp == NULL) 1497 return EIO; 1498 1499 jEVENT(0, ("xtSplitRoot: ip:0x%p rmp:0x%p\n", ip, rmp)); 1500 1501 /* 1502 * acquire a transaction lock on the new right page; 1503 * 1504 * action: new page; 1505 */ 1506 BT_MARK_DIRTY(rmp, ip); 1507 1508 rp = (xtpage_t *) rmp->data; 1509 rp->header.flag = 1510 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1511 rp->header.self = *pxd; 1512 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1513 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); 1514 1515 /* initialize sibling pointers */ 1516 rp->header.next = 0; 1517 rp->header.prev = 0; 1518 1519 /* 1520 * copy the in-line root page into new right page extent 1521 */ 1522 nextindex = le16_to_cpu(sp->header.maxentry); 1523 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], 1524 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); 1525 1526 /* 1527 * insert the new entry into the new right/child page 1528 * (skip index in the new right page will not change) 1529 */ 1530 skip = split->index; 1531 /* if insert into middle, shift right remaining entries */ 1532 if (skip != nextindex) 1533 memmove(&rp->xad[skip + 1], &rp->xad[skip], 1534 (nextindex - skip) * sizeof(xad_t)); 1535 1536 xad = &rp->xad[skip]; 1537 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); 1538 1539 /* update page header */ 1540 rp->header.nextindex = cpu_to_le16(nextindex + 1); 1541 1542 if (!test_cflag(COMMIT_Nolink, ip)) { 1543 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1544 xtlck = (struct xtlock *) & tlck->lock; 1545 xtlck->lwm.offset = XTENTRYSTART; 1546 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1547 XTENTRYSTART; 1548 } 1549 1550 /* 1551 * reset the root 1552 * 1553 * init root with the single entry for the new right page 1554 * set the 1st entry offset to 0, which force the left-most key 1555 * at any level of the tree to be less than any search key. 1556 */ 1557 /* 1558 * acquire a transaction lock on the root page (in-memory inode); 1559 * 1560 * action: root split; 1561 */ 1562 BT_MARK_DIRTY(split->mp, ip); 1563 1564 xad = &sp->xad[XTENTRYSTART]; 1565 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); 1566 1567 /* update page header of root */ 1568 sp->header.flag &= ~BT_LEAF; 1569 sp->header.flag |= BT_INTERNAL; 1570 1571 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1572 1573 if (!test_cflag(COMMIT_Nolink, ip)) { 1574 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); 1575 xtlck = (struct xtlock *) & tlck->lock; 1576 xtlck->lwm.offset = XTENTRYSTART; 1577 xtlck->lwm.length = 1; 1578 } 1579 1580 *rmpp = rmp; 1581 1582 ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd)); 1583 1584 jEVENT(0, ("xtSplitRoot: sp:0x%p rp:0x%p\n", sp, rp)); 1585 return 0; 1586} 1587 1588 1589/* 1590 * xtExtend() 1591 * 1592 * function: extend in-place; 1593 * 1594 * note: existing extent may or may not have been committed. 1595 * caller is responsible for pager buffer cache update, and 1596 * working block allocation map update; 1597 * update pmap: alloc whole extended extent; 1598 */ 1599int xtExtend(tid_t tid, /* transaction id */ 1600 struct inode *ip, s64 xoff, /* delta extent offset */ 1601 s32 xlen, /* delta extent length */ 1602 int flag) 1603{ 1604 int rc = 0; 1605 int cmp; 1606 struct metapage *mp; /* meta-page buffer */ 1607 xtpage_t *p; /* base B+-tree index page */ 1608 s64 bn; 1609 int index, nextindex, len; 1610 struct btstack btstack; /* traverse stack */ 1611 struct xtsplit split; /* split information */ 1612 xad_t *xad; 1613 s64 xaddr; 1614 struct tlock *tlck; 1615 struct xtlock *xtlck = 0; 1616 int rootsplit = 0; 1617 1618 jFYI(1, 1619 ("xtExtend: nxoff:0x%lx nxlen:0x%x\n", (ulong) xoff, xlen)); 1620 1621 /* there must exist extent to be extended */ 1622 if ((rc = xtSearch(ip, xoff - 1, &cmp, &btstack, XT_INSERT))) 1623 return rc; 1624 assert(cmp == 0); 1625 1626 /* retrieve search result */ 1627 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1628 1629 /* extension must be contiguous */ 1630 xad = &p->xad[index]; 1631 jFYI(0, ("xtExtend: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1632 (ulong) offsetXAD(xad), lengthXAD(xad), 1633 (ulong) addressXAD(xad))); 1634 assert((offsetXAD(xad) + lengthXAD(xad)) == xoff); 1635 1636 /* 1637 * acquire a transaction lock on the leaf page; 1638 * 1639 * action: xad insertion/extension; 1640 */ 1641 BT_MARK_DIRTY(mp, ip); 1642 if (!test_cflag(COMMIT_Nolink, ip)) { 1643 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1644 xtlck = (struct xtlock *) & tlck->lock; 1645 } 1646 1647 /* extend will overflow extent ? */ 1648 xlen = lengthXAD(xad) + xlen; 1649 if ((len = xlen - MAXXLEN) <= 0) 1650 goto extendOld; 1651 1652 /* 1653 * extent overflow: insert entry for new extent 1654 */ 1655//insertNew: 1656 xoff = offsetXAD(xad) + MAXXLEN; 1657 xaddr = addressXAD(xad) + MAXXLEN; 1658 nextindex = le16_to_cpu(p->header.nextindex); 1659 1660 /* 1661 * if the leaf page is full, insert the new entry and 1662 * propagate up the router entry for the new page from split 1663 * 1664 * The xtSplitUp() will insert the entry and unpin the leaf page. 1665 */ 1666 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1667 rootsplit = p->header.flag & BT_ROOT; 1668 1669 /* xtSpliUp() unpins leaf pages */ 1670 split.mp = mp; 1671 split.index = index + 1; 1672 split.flag = XAD_NEW; 1673 split.off = xoff; /* split offset */ 1674 split.len = len; 1675 split.addr = xaddr; 1676 split.pxdlist = NULL; 1677 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1678 return rc; 1679 1680 /* 1681 * if leaf root has been split, original root has been 1682 * copied to new child page, i.e., original entry now 1683 * resides on the new child page; 1684 */ 1685 if (rootsplit) { 1686 if (p->header.nextindex == 1687 cpu_to_le16(XTENTRYSTART + 1)) { 1688 xad = &p->xad[XTENTRYSTART]; 1689 bn = addressXAD(xad); 1690 1691 /* get new child page */ 1692 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1693 1694 BT_MARK_DIRTY(mp, ip); 1695 if (!test_cflag(COMMIT_Nolink, ip)) { 1696 tlck = txLock(tid, ip, mp, 1697 tlckXTREE | 1698 tlckGROW); 1699 xtlck = (struct xtlock *) & tlck->lock; 1700 } 1701 } 1702 } else 1703 /* get back old page */ 1704 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1705 } 1706 /* 1707 * insert the new entry into the leaf page 1708 */ 1709 else { 1710 /* insert the new entry: mark the entry NEW */ 1711 xad = &p->xad[index + 1]; 1712 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); 1713 1714 /* advance next available entry index */ 1715 p->header.nextindex = 1716 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1717 } 1718 1719 /* get back old entry */ 1720 xad = &p->xad[index]; 1721 xlen = MAXXLEN; 1722 1723 /* 1724 * extend old extent 1725 */ 1726 extendOld: 1727 XADlength(xad, xlen); 1728 if (!(xad->flag & XAD_NEW)) 1729 xad->flag |= XAD_EXTENDED; 1730 1731 if (!test_cflag(COMMIT_Nolink, ip)) { 1732 xtlck->lwm.offset = 1733 (xtlck->lwm.offset) ? min(index, 1734 (int)xtlck->lwm.offset) : index; 1735 xtlck->lwm.length = 1736 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 1737 } 1738 1739 /* unpin the leaf page */ 1740 XT_PUTPAGE(mp); 1741 1742 return rc; 1743} 1744 1745 1746/* 1747 * xtTailgate() 1748 * 1749 * function: split existing 'tail' extent 1750 * (split offset >= start offset of tail extent), and 1751 * relocate and extend the split tail half; 1752 * 1753 * note: existing extent may or may not have been committed. 1754 * caller is responsible for pager buffer cache update, and 1755 * working block allocation map update; 1756 * update pmap: free old split tail extent, alloc new extent; 1757 */ 1758int xtTailgate(tid_t tid, /* transaction id */ 1759 struct inode *ip, s64 xoff, /* split/new extent offset */ 1760 s32 xlen, /* new extent length */ 1761 s64 xaddr, /* new extent address */ 1762 int flag) 1763{ 1764 int rc = 0; 1765 int cmp; 1766 struct metapage *mp; /* meta-page buffer */ 1767 xtpage_t *p; /* base B+-tree index page */ 1768 s64 bn; 1769 int index, nextindex, llen, rlen; 1770 struct btstack btstack; /* traverse stack */ 1771 struct xtsplit split; /* split information */ 1772 xad_t *xad; 1773 struct tlock *tlck; 1774 struct xtlock *xtlck = 0; 1775 struct tlock *mtlck; 1776 struct maplock *pxdlock; 1777 int rootsplit = 0; 1778 1779/* 1780printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1781 (ulong)xoff, xlen, (ulong)xaddr); 1782*/ 1783 1784 /* there must exist extent to be tailgated */ 1785 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) 1786 return rc; 1787 assert(cmp == 0); 1788 1789 /* retrieve search result */ 1790 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1791 1792 /* entry found must be last entry */ 1793 nextindex = le16_to_cpu(p->header.nextindex); 1794 assert(index == nextindex - 1); 1795 1796 BT_MARK_DIRTY(mp, ip); 1797 /* 1798 * acquire tlock of the leaf page containing original entry 1799 */ 1800 if (!test_cflag(COMMIT_Nolink, ip)) { 1801 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1802 xtlck = (struct xtlock *) & tlck->lock; 1803 } 1804 1805 /* completely replace extent ? */ 1806 xad = &p->xad[index]; 1807/* 1808printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1809 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1810*/ 1811 if ((llen = xoff - offsetXAD(xad)) == 0) 1812 goto updateOld; 1813 1814 /* 1815 * partially replace extent: insert entry for new extent 1816 */ 1817//insertNew: 1818 /* 1819 * if the leaf page is full, insert the new entry and 1820 * propagate up the router entry for the new page from split 1821 * 1822 * The xtSplitUp() will insert the entry and unpin the leaf page. 1823 */ 1824 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1825 rootsplit = p->header.flag & BT_ROOT; 1826 1827 /* xtSpliUp() unpins leaf pages */ 1828 split.mp = mp; 1829 split.index = index + 1; 1830 split.flag = XAD_NEW; 1831 split.off = xoff; /* split offset */ 1832 split.len = xlen; 1833 split.addr = xaddr; 1834 split.pxdlist = NULL; 1835 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1836 return rc; 1837 1838 /* 1839 * if leaf root has been split, original root has been 1840 * copied to new child page, i.e., original entry now 1841 * resides on the new child page; 1842 */ 1843 if (rootsplit) { 1844 if (p->header.nextindex == 1845 cpu_to_le16(XTENTRYSTART + 1)) { 1846 xad = &p->xad[XTENTRYSTART]; 1847 bn = addressXAD(xad); 1848 1849 /* get new child page */ 1850 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1851 1852 BT_MARK_DIRTY(mp, ip); 1853 if (!test_cflag(COMMIT_Nolink, ip)) { 1854 tlck = txLock(tid, ip, mp, 1855 tlckXTREE | 1856 tlckGROW); 1857 xtlck = (struct xtlock *) & tlck->lock; 1858 } 1859 } 1860 } else 1861 /* get back old page */ 1862 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1863 } 1864 /* 1865 * insert the new entry into the leaf page 1866 */ 1867 else { 1868 /* insert the new entry: mark the entry NEW */ 1869 xad = &p->xad[index + 1]; 1870 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1871 1872 /* advance next available entry index */ 1873 p->header.nextindex = 1874 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1875 } 1876 1877 /* get back old XAD */ 1878 xad = &p->xad[index]; 1879 1880 /* 1881 * truncate/relocate old extent at split offset 1882 */ 1883 updateOld: 1884 /* update dmap for old/committed/truncated extent */ 1885 rlen = lengthXAD(xad) - llen; 1886 if (!(xad->flag & XAD_NEW)) { 1887 /* free from PWMAP at commit */ 1888 if (!test_cflag(COMMIT_Nolink, ip)) { 1889 mtlck = txMaplock(tid, ip, tlckMAP); 1890 pxdlock = (struct maplock *) & mtlck->lock; 1891 pxdlock->flag = mlckFREEPXD; 1892 PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); 1893 PXDlength(&pxdlock->pxd, rlen); 1894 pxdlock->index = 1; 1895 } 1896 jEVENT(0, 1897 ("xtTailgate: free extent xaddr:0x%lx xlen:0x%x\n", 1898 (ulong) addressPXD(&pxdlock->pxd), 1899 lengthPXD(&pxdlock->pxd))); 1900 } else 1901 /* free from WMAP */ 1902 dbFree(ip, addressXAD(xad) + llen, (s64) rlen); 1903 1904 if (llen) 1905 /* truncate */ 1906 XADlength(xad, llen); 1907 else 1908 /* replace */ 1909 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1910 1911 if (!test_cflag(COMMIT_Nolink, ip)) { 1912 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1913 min(index, (int)xtlck->lwm.offset) : index; 1914 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 1915 xtlck->lwm.offset; 1916 } 1917 1918 /* unpin the leaf page */ 1919 XT_PUTPAGE(mp); 1920 1921 return rc; 1922} 1923 1924 1925/* 1926 * xtUpdate() 1927 * 1928 * function: update XAD; 1929 * 1930 * update extent for allocated_but_not_recorded or 1931 * compressed extent; 1932 * 1933 * parameter: 1934 * nxad - new XAD; 1935 * logical extent of the specified XAD must be completely 1936 * contained by an existing XAD; 1937 */ 1938int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1939{ /* new XAD */ 1940 int rc = 0; 1941 int cmp; 1942 struct metapage *mp; /* meta-page buffer */ 1943 xtpage_t *p; /* base B+-tree index page */ 1944 s64 bn; 1945 int index0, index, newindex, nextindex; 1946 struct btstack btstack; /* traverse stack */ 1947 struct xtsplit split; /* split information */ 1948 xad_t *xad, *lxad, *rxad; 1949 int xflag; 1950 s64 nxoff, xoff; 1951 int nxlen, xlen, lxlen, rxlen; 1952 s64 nxaddr, xaddr; 1953 struct tlock *tlck; 1954 struct xtlock *xtlck = 0; 1955 int rootsplit = 0, newpage = 0; 1956 1957 /* there must exist extent to be tailgated */ 1958 nxoff = offsetXAD(nxad); 1959 nxlen = lengthXAD(nxad); 1960 nxaddr = addressXAD(nxad); 1961/* 1962printf("xtUpdate: nxflag:0x%x nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1963 nxad->flag, (ulong)nxoff, nxlen, (ulong)nxaddr); 1964*/ 1965 if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT))) 1966 return rc; 1967 assert(cmp == 0); 1968 1969 /* retrieve search result */ 1970 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 1971 1972 BT_MARK_DIRTY(mp, ip); 1973 /* 1974 * acquire tlock of the leaf page containing original entry 1975 */ 1976 if (!test_cflag(COMMIT_Nolink, ip)) { 1977 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1978 xtlck = (struct xtlock *) & tlck->lock; 1979 } 1980 1981 xad = &p->xad[index0]; 1982 xflag = xad->flag; 1983 xoff = offsetXAD(xad); 1984 xlen = lengthXAD(xad); 1985 xaddr = addressXAD(xad); 1986/* 1987printf("xtUpdate: xflag:0x%x xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1988 xflag, (ulong)xoff, xlen, (ulong)xaddr); 1989*/ 1990 1991 /* nXAD must be completely contained within XAD */ 1992 assert(xoff <= nxoff); 1993 assert(nxoff + nxlen <= xoff + xlen); 1994 1995 index = index0; 1996 newindex = index + 1; 1997 nextindex = le16_to_cpu(p->header.nextindex); 1998 1999#ifdef _JFS_WIP_NOCOALESCE 2000 if (xoff < nxoff) 2001 goto updateRight; 2002 2003 /* 2004 * replace XAD with nXAD 2005 */ 2006 replace: /* (nxoff == xoff) */ 2007 if (nxlen == xlen) { 2008 /* replace XAD with nXAD:recorded */ 2009 *xad = *nxad; 2010 xad->flag = xflag & ~XAD_NOTRECORDED; 2011 2012 goto out; 2013 } else /* (nxlen < xlen) */ 2014 goto updateLeft; 2015#endif /* _JFS_WIP_NOCOALESCE */ 2016 2017/* #ifdef _JFS_WIP_COALESCE */ 2018 if (xoff < nxoff) 2019 goto coalesceRight; 2020 2021 /* 2022 * coalesce with left XAD 2023 */ 2024//coalesceLeft: /* (xoff == nxoff) */ 2025 /* is XAD first entry of page ? */ 2026 if (index == XTENTRYSTART) 2027 goto replace; 2028 2029 /* is nXAD logically and physically contiguous with lXAD ? */ 2030 lxad = &p->xad[index - 1]; 2031 lxlen = lengthXAD(lxad); 2032 if (!(lxad->flag & XAD_NOTRECORDED) && 2033 (nxoff == offsetXAD(lxad) + lxlen) && 2034 (nxaddr == addressXAD(lxad) + lxlen) && 2035 (lxlen + nxlen < MAXXLEN)) { 2036 /* extend right lXAD */ 2037 index0 = index - 1; 2038 XADlength(lxad, lxlen + nxlen); 2039 2040 /* If we just merged two extents together, need to make sure the 2041 * right extent gets logged. If the left one is marked XAD_NEW, 2042 * then we know it will be logged. Otherwise, mark as 2043 * XAD_EXTENDED 2044 */ 2045 if (!(lxad->flag & XAD_NEW)) 2046 lxad->flag |= XAD_EXTENDED; 2047 2048 if (xlen > nxlen) { 2049 /* truncate XAD */ 2050 XADoffset(xad, xoff + nxlen); 2051 XADlength(xad, xlen - nxlen); 2052 XADaddress(xad, xaddr + nxlen); 2053 goto out; 2054 } else { /* (xlen == nxlen) */ 2055 2056 /* remove XAD */ 2057 if (index < nextindex - 1) 2058 memmove(&p->xad[index], &p->xad[index + 1], 2059 (nextindex - index - 2060 1) << L2XTSLOTSIZE); 2061 2062 p->header.nextindex = 2063 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2064 1); 2065 2066 index = index0; 2067 newindex = index + 1; 2068 nextindex = le16_to_cpu(p->header.nextindex); 2069 xoff = nxoff = offsetXAD(lxad); 2070 xlen = nxlen = lxlen + nxlen; 2071 xaddr = nxaddr = addressXAD(lxad); 2072 goto coalesceRight; 2073 } 2074 } 2075 2076 /* 2077 * replace XAD with nXAD 2078 */ 2079 replace: /* (nxoff == xoff) */ 2080 if (nxlen == xlen) { 2081 /* replace XAD with nXAD:recorded */ 2082 *xad = *nxad; 2083 xad->flag = xflag & ~XAD_NOTRECORDED; 2084 2085 goto coalesceRight; 2086 } else /* (nxlen < xlen) */ 2087 goto updateLeft; 2088 2089 /* 2090 * coalesce with right XAD 2091 */ 2092 coalesceRight: /* (xoff <= nxoff) */ 2093 /* is XAD last entry of page ? */ 2094 if (newindex == nextindex) { 2095 if (xoff == nxoff) 2096 goto out; 2097 goto updateRight; 2098 } 2099 2100 /* is nXAD logically and physically contiguous with rXAD ? */ 2101 rxad = &p->xad[index + 1]; 2102 rxlen = lengthXAD(rxad); 2103 if (!(rxad->flag & XAD_NOTRECORDED) && 2104 (nxoff + nxlen == offsetXAD(rxad)) && 2105 (nxaddr + nxlen == addressXAD(rxad)) && 2106 (rxlen + nxlen < MAXXLEN)) { 2107 /* extend left rXAD */ 2108 XADoffset(rxad, nxoff); 2109 XADlength(rxad, rxlen + nxlen); 2110 XADaddress(rxad, nxaddr); 2111 2112 /* If we just merged two extents together, need to make sure 2113 * the left extent gets logged. If the right one is marked 2114 * XAD_NEW, then we know it will be logged. Otherwise, mark as 2115 * XAD_EXTENDED 2116 */ 2117 if (!(rxad->flag & XAD_NEW)) 2118 rxad->flag |= XAD_EXTENDED; 2119 2120 if (xlen > nxlen) 2121 /* truncate XAD */ 2122 XADlength(xad, xlen - nxlen); 2123 else { /* (xlen == nxlen) */ 2124 2125 /* remove XAD */ 2126 memmove(&p->xad[index], &p->xad[index + 1], 2127 (nextindex - index - 1) << L2XTSLOTSIZE); 2128 2129 p->header.nextindex = 2130 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2131 1); 2132 } 2133 2134 goto out; 2135 } else if (xoff == nxoff) 2136 goto out; 2137 2138 assert(xoff < nxoff); 2139/* #endif _JFS_WIP_COALESCE */ 2140 2141 /* 2142 * split XAD into (lXAD, nXAD): 2143 * 2144 * |---nXAD---> 2145 * --|----------XAD----------|-- 2146 * |-lXAD-| 2147 */ 2148 updateRight: /* (xoff < nxoff) */ 2149 /* truncate old XAD as lXAD:not_recorded */ 2150 xad = &p->xad[index]; 2151 XADlength(xad, nxoff - xoff); 2152 2153 /* insert nXAD:recorded */ 2154 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2155/* 2156printf("xtUpdate.updateRight.split p:0x%p\n", p); 2157*/ 2158 rootsplit = p->header.flag & BT_ROOT; 2159 2160 /* xtSpliUp() unpins leaf pages */ 2161 split.mp = mp; 2162 split.index = newindex; 2163 split.flag = xflag & ~XAD_NOTRECORDED; 2164 split.off = nxoff; 2165 split.len = nxlen; 2166 split.addr = nxaddr; 2167 split.pxdlist = NULL; 2168 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2169 return rc; 2170 2171 /* 2172 * if leaf root has been split, original root has been 2173 * copied to new child page, i.e., original entry now 2174 * resides on the new child page; 2175 */ 2176 if (rootsplit) { 2177 if (p->header.nextindex == 2178 cpu_to_le16(XTENTRYSTART + 1)) { 2179 xad = &p->xad[XTENTRYSTART]; 2180 bn = addressXAD(xad); 2181 2182 /* get new child page */ 2183 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2184 2185 BT_MARK_DIRTY(mp, ip); 2186 if (!test_cflag(COMMIT_Nolink, ip)) { 2187 tlck = txLock(tid, ip, mp, 2188 tlckXTREE | 2189 tlckGROW); 2190 xtlck = (struct xtlock *) & tlck->lock; 2191 } 2192 } 2193 } else { 2194 /* get back old page */ 2195 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2196 2197 /* is nXAD on new page ? */ 2198 if (newindex > 2199 (le16_to_cpu(p->header.maxentry) >> 1)) { 2200 newindex = 2201 newindex - 2202 le16_to_cpu(p->header.nextindex) + 2203 XTENTRYSTART; 2204 newpage = 1; 2205 } 2206 } 2207 } else { 2208 /* if insert into middle, shift right remaining entries */ 2209 if (newindex < nextindex) 2210 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2211 (nextindex - newindex) << L2XTSLOTSIZE); 2212 2213 /* insert the entry */ 2214 xad = &p->xad[newindex]; 2215 *xad = *nxad; 2216 xad->flag = xflag & ~XAD_NOTRECORDED; 2217 2218 /* advance next available entry index. */ 2219 p->header.nextindex = 2220 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2221 } 2222 2223 /* 2224 * does nXAD force 3-way split ? 2225 * 2226 * |---nXAD--->| 2227 * --|----------XAD-------------|-- 2228 * |-lXAD-| |-rXAD -| 2229 */ 2230 if (nxoff + nxlen == xoff + xlen) 2231 goto out; 2232 2233 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ 2234 if (newpage) { 2235 /* close out old page */ 2236 if (!test_cflag(COMMIT_Nolink, ip)) { 2237 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2238 min(index0, (int)xtlck->lwm.offset) : index0; 2239 xtlck->lwm.length = 2240 le16_to_cpu(p->header.nextindex) - 2241 xtlck->lwm.offset; 2242 } 2243 2244 bn = le64_to_cpu(p->header.next); 2245 XT_PUTPAGE(mp); 2246 2247 /* get new right page */ 2248 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2249 2250 BT_MARK_DIRTY(mp, ip); 2251 if (!test_cflag(COMMIT_Nolink, ip)) { 2252 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2253 xtlck = (struct xtlock *) & tlck->lock; 2254 } 2255 2256 index0 = index = newindex; 2257 } else 2258 index++; 2259 2260 newindex = index + 1; 2261 nextindex = le16_to_cpu(p->header.nextindex); 2262 xlen = xlen - (nxoff - xoff); 2263 xoff = nxoff; 2264 xaddr = nxaddr; 2265 2266 /* recompute split pages */ 2267 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2268/* 2269printf("xtUpdate: updateRight+Left recompute split pages: p:0x%p\n", p); 2270*/ 2271 XT_PUTPAGE(mp); 2272 2273 if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT))) 2274 return rc; 2275 assert(cmp == 0); 2276 2277 /* retrieve search result */ 2278 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 2279 assert(index0 == index); 2280 } 2281 2282 /* 2283 * split XAD into (nXAD, rXAD) 2284 * 2285 * ---nXAD---| 2286 * --|----------XAD----------|-- 2287 * |-rXAD-| 2288 */ 2289 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ 2290 /* update old XAD with nXAD:recorded */ 2291 xad = &p->xad[index]; 2292 *xad = *nxad; 2293 xad->flag = xflag & ~XAD_NOTRECORDED; 2294 2295 /* insert rXAD:not_recorded */ 2296 xoff = xoff + nxlen; 2297 xlen = xlen - nxlen; 2298 xaddr = xaddr + nxlen; 2299 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2300 rootsplit = p->header.flag & BT_ROOT; 2301 2302/* 2303printf("xtUpdate.updateLeft.split p:0x%p\n", p); 2304*/ 2305 /* xtSpliUp() unpins leaf pages */ 2306 split.mp = mp; 2307 split.index = newindex; 2308 split.flag = xflag; 2309 split.off = xoff; 2310 split.len = xlen; 2311 split.addr = xaddr; 2312 split.pxdlist = NULL; 2313 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2314 return rc; 2315 2316 /* 2317 * if leaf root has been split, original root has been 2318 * copied to new child page, i.e., original entry now 2319 * resides on the new child page; 2320 */ 2321 if (rootsplit) { 2322 if (p->header.nextindex == 2323 cpu_to_le16(XTENTRYSTART + 1)) { 2324 xad = &p->xad[XTENTRYSTART]; 2325 bn = addressXAD(xad); 2326 2327 /* get new child page */ 2328 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2329 2330 BT_MARK_DIRTY(mp, ip); 2331 if (!test_cflag(COMMIT_Nolink, ip)) { 2332 tlck = txLock(tid, ip, mp, 2333 tlckXTREE | 2334 tlckGROW); 2335 xtlck = (struct xtlock *) & tlck->lock; 2336 } 2337 } 2338 } else 2339 /* get back old page */ 2340 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2341 } else { 2342 /* if insert into middle, shift right remaining entries */ 2343 if (newindex < nextindex) 2344 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2345 (nextindex - newindex) << L2XTSLOTSIZE); 2346 2347 /* insert the entry */ 2348 xad = &p->xad[newindex]; 2349 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2350 2351 /* advance next available entry index. */ 2352 p->header.nextindex = 2353 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2354 } 2355 2356 out: 2357 if (!test_cflag(COMMIT_Nolink, ip)) { 2358 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2359 min(index0, (int)xtlck->lwm.offset) : index0; 2360 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2361 xtlck->lwm.offset; 2362 } 2363 2364 /* unpin the leaf page */ 2365 XT_PUTPAGE(mp); 2366 2367 return rc; 2368} 2369 2370 2371/* 2372 * xtAppend() 2373 * 2374 * function: grow in append mode from contiguous region specified ; 2375 * 2376 * parameter: 2377 * tid - transaction id; 2378 * ip - file object; 2379 * xflag - extent flag: 2380 * xoff - extent offset; 2381 * maxblocks - max extent length; 2382 * xlen - extent length (in/out); 2383 * xaddrp - extent address pointer (in/out): 2384 * flag - 2385 * 2386 * return: 2387 */ 2388int xtAppend(tid_t tid, /* transaction id */ 2389 struct inode *ip, int xflag, s64 xoff, s32 maxblocks, 2390 s32 * xlenp, /* (in/out) */ 2391 s64 * xaddrp, /* (in/out) */ 2392 int flag) 2393{ 2394 int rc = 0; 2395 struct metapage *mp; /* meta-page buffer */ 2396 xtpage_t *p; /* base B+-tree index page */ 2397 s64 bn, xaddr; 2398 int index, nextindex; 2399 struct btstack btstack; /* traverse stack */ 2400 struct xtsplit split; /* split information */ 2401 xad_t *xad; 2402 int cmp; 2403 struct tlock *tlck; 2404 struct xtlock *xtlck; 2405 int nsplit, nblocks, xlen; 2406 struct pxdlist pxdlist; 2407 pxd_t *pxd; 2408 2409 xaddr = *xaddrp; 2410 xlen = *xlenp; 2411 jEVENT(0, 2412 ("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx\n", 2413 (ulong) xoff, maxblocks, xlen, (ulong) xaddr)); 2414 2415 /* 2416 * search for the entry location at which to insert: 2417 * 2418 * xtFastSearch() and xtSearch() both returns (leaf page 2419 * pinned, index at which to insert). 2420 * n.b. xtSearch() may return index of maxentry of 2421 * the full page. 2422 */ 2423 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) 2424 return rc; 2425 2426 /* retrieve search result */ 2427 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2428 2429 if (cmp == 0) { 2430 rc = EEXIST; 2431 goto out; 2432 } 2433//insert: 2434 /* 2435 * insert entry for new extent 2436 */ 2437 xflag |= XAD_NEW; 2438 2439 /* 2440 * if the leaf page is full, split the page and 2441 * propagate up the router entry for the new page from split 2442 * 2443 * The xtSplitUp() will insert the entry and unpin the leaf page. 2444 */ 2445 nextindex = le16_to_cpu(p->header.nextindex); 2446 if (nextindex < le16_to_cpu(p->header.maxentry)) 2447 goto insertLeaf; 2448 2449 /* 2450 * allocate new index blocks to cover index page split(s) 2451 */ 2452 nsplit = btstack.nsplit; 2453 split.pxdlist = &pxdlist; 2454 pxdlist.maxnpxd = pxdlist.npxd = 0; 2455 pxd = &pxdlist.pxd[0]; 2456 nblocks = JFS_SBI(ip->i_sb)->nbperpage; 2457 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { 2458 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { 2459 PXDaddress(pxd, xaddr); 2460 PXDlength(pxd, nblocks); 2461 2462 pxdlist.maxnpxd++; 2463 2464 continue; 2465 } 2466 2467 /* undo allocation */ 2468 2469 goto out; 2470 } 2471 2472 xlen = min(xlen, maxblocks); 2473 2474 /* 2475 * allocate data extent requested 2476 */ 2477 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2478 goto out; 2479 2480 split.mp = mp; 2481 split.index = index; 2482 split.flag = xflag; 2483 split.off = xoff; 2484 split.len = xlen; 2485 split.addr = xaddr; 2486 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 2487 /* undo data extent allocation */ 2488 dbFree(ip, *xaddrp, (s64) * xlenp); 2489 2490 return rc; 2491 } 2492 2493 *xaddrp = xaddr; 2494 *xlenp = xlen; 2495 return 0; 2496 2497 /* 2498 * insert the new entry into the leaf page 2499 */ 2500 insertLeaf: 2501 /* 2502 * allocate data extent requested 2503 */ 2504 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2505 goto out; 2506 2507 BT_MARK_DIRTY(mp, ip); 2508 /* 2509 * acquire a transaction lock on the leaf page; 2510 * 2511 * action: xad insertion/extension; 2512 */ 2513 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2514 xtlck = (struct xtlock *) & tlck->lock; 2515 2516 /* insert the new entry: mark the entry NEW */ 2517 xad = &p->xad[index]; 2518 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2519 2520 /* advance next available entry index */ 2521 p->header.nextindex = 2522 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2523 2524 xtlck->lwm.offset = 2525 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; 2526 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2527 xtlck->lwm.offset; 2528 2529 *xaddrp = xaddr; 2530 *xlenp = xlen; 2531 2532 out: 2533 /* unpin the leaf page */ 2534 XT_PUTPAGE(mp); 2535 2536 return rc; 2537} 2538#ifdef _STILL_TO_PORT 2539 2540/* - TBD for defragmentaion/reorganization - 2541 * 2542 * xtDelete() 2543 * 2544 * function: 2545 * delete the entry with the specified key. 2546 * 2547 * N.B.: whole extent of the entry is assumed to be deleted. 2548 * 2549 * parameter: 2550 * 2551 * return: 2552 * ENOENT: if the entry is not found. 2553 * 2554 * exception: 2555 */ 2556int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) 2557{ 2558 int rc = 0; 2559 struct btstack btstack; 2560 int cmp; 2561 s64 bn; 2562 struct metapage *mp; 2563 xtpage_t *p; 2564 int index, nextindex; 2565 struct tlock *tlck; 2566 struct xtlock *xtlck; 2567 2568 /* 2569 * find the matching entry; xtSearch() pins the page 2570 */ 2571 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0))) 2572 return rc; 2573 2574 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2575 if (cmp) { 2576 /* unpin the leaf page */ 2577 XT_PUTPAGE(mp); 2578 return ENOENT; 2579 } 2580 2581 /* 2582 * delete the entry from the leaf page 2583 */ 2584 nextindex = le16_to_cpu(p->header.nextindex); 2585 p->header.nextindex = 2586 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1); 2587 2588 /* 2589 * if the leaf page bocome empty, free the page 2590 */ 2591 if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) 2592 return (xtDeleteUp(tid, ip, mp, p, &btstack)); 2593 2594 BT_MARK_DIRTY(mp, ip); 2595 /* 2596 * acquire a transaction lock on the leaf page; 2597 * 2598 * action:xad deletion; 2599 */ 2600 tlck = txLock(tid, ip, mp, tlckXTREE); 2601 xtlck = (struct xtlock *) & tlck->lock; 2602 xtlck->lwm.offset = 2603 (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; 2604 2605 /* if delete from middle, shift left/compact the remaining entries */ 2606 if (index < nextindex - 1) 2607 memmove(&p->xad[index], &p->xad[index + 1], 2608 (nextindex - index - 1) * sizeof(xad_t)); 2609 2610 XT_PUTPAGE(mp); 2611 2612 return 0; 2613} 2614 2615 2616/* - TBD for defragmentaion/reorganization - 2617 * 2618 * xtDeleteUp() 2619 * 2620 * function: 2621 * free empty pages as propagating deletion up the tree 2622 * 2623 * parameter: 2624 * 2625 * return: 2626 */ 2627static int 2628xtDeleteUp(tid_t tid, struct inode *ip, 2629 struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) 2630{ 2631 int rc = 0; 2632 struct metapage *mp; 2633 xtpage_t *p; 2634 int index, nextindex; 2635 s64 xaddr; 2636 int xlen; 2637 struct btframe *parent; 2638 struct tlock *tlck; 2639 struct xtlock *xtlck; 2640 2641 /* 2642 * keep root leaf page which has become empty 2643 */ 2644 if (fp->header.flag & BT_ROOT) { 2645 /* keep the root page */ 2646 fp->header.flag &= ~BT_INTERNAL; 2647 fp->header.flag |= BT_LEAF; 2648 fp->header.nextindex = cpu_to_le16(XTENTRYSTART); 2649 2650 /* XT_PUTPAGE(fmp); */ 2651 2652 return 0; 2653 } 2654 2655 /* 2656 * free non-root leaf page 2657 */ 2658 if ((rc = xtRelink(tid, ip, fp))) 2659 return rc; 2660 2661 xaddr = addressPXD(&fp->header.self); 2662 xlen = lengthPXD(&fp->header.self); 2663 /* free the page extent */ 2664 dbFree(ip, xaddr, (s64) xlen); 2665 2666 /* free the buffer page */ 2667 discard_metapage(fmp); 2668 2669 /* 2670 * propagate page deletion up the index tree 2671 * 2672 * If the delete from the parent page makes it empty, 2673 * continue all the way up the tree. 2674 * stop if the root page is reached (which is never deleted) or 2675 * if the entry deletion does not empty the page. 2676 */ 2677 while ((parent = BT_POP(btstack)) != NULL) { 2678 /* get/pin the parent page <sp> */ 2679 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2680 if (rc) 2681 return rc; 2682 2683 index = parent->index; 2684 2685 /* delete the entry for the freed child page from parent. 2686 */ 2687 nextindex = le16_to_cpu(p->header.nextindex); 2688 2689 /* 2690 * the parent has the single entry being deleted: 2691 * free the parent page which has become empty. 2692 */ 2693 if (nextindex == 1) { 2694 if (p->header.flag & BT_ROOT) { 2695 /* keep the root page */ 2696 p->header.flag &= ~BT_INTERNAL; 2697 p->header.flag |= BT_LEAF; 2698 p->header.nextindex = 2699 cpu_to_le16(XTENTRYSTART); 2700 2701 /* XT_PUTPAGE(fmp); */ 2702 2703 break; 2704 } else { 2705 /* free the parent page */ 2706 if ((rc = xtRelink(tid, ip, p))) 2707 return rc; 2708 2709 xaddr = addressPXD(&p->header.self); 2710 /* free the page extent */ 2711 dbFree(ip, xaddr, 2712 (s64) JFS_SBI(ip->i_sb)->nbperpage); 2713 2714 /* unpin/free the buffer page */ 2715 discard_metapage(fmp); 2716 2717 /* propagate up */ 2718 continue; 2719 } 2720 } 2721 /* 2722 * the parent has other entries remaining: 2723 * delete the router entry from the parent page. 2724 */ 2725 else { 2726 BT_MARK_DIRTY(mp, ip); 2727 /* 2728 * acquire a transaction lock on the leaf page; 2729 * 2730 * action:xad deletion; 2731 */ 2732 tlck = txLock(tid, ip, mp, tlckXTREE); 2733 xtlck = (struct xtlock *) & tlck->lock; 2734 xtlck->lwm.offset = 2735 (xtlck->lwm.offset) ? min(index, 2736 xtlck->lwm. 2737 offset) : index; 2738 2739 /* if delete from middle, 2740 * shift left/compact the remaining entries in the page 2741 */ 2742 if (index < nextindex - 1) 2743 memmove(&p->xad[index], &p->xad[index + 1], 2744 (nextindex - index - 2745 1) << L2XTSLOTSIZE); 2746 2747 p->header.nextindex = 2748 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2749 1); 2750 jEVENT(0, 2751 ("xtDeleteUp(entry): 0x%lx[%d]\n", 2752 (ulong) parent->bn, index)); 2753 } 2754 2755 /* unpin the parent page */ 2756 XT_PUTPAGE(mp); 2757 2758 /* exit propagation up */ 2759 break; 2760 } 2761 2762 return 0; 2763} 2764 2765 2766/* 2767 * NAME: xtRelocate() 2768 * 2769 * FUNCTION: relocate xtpage or data extent of regular file; 2770 * This function is mainly used by defragfs utility. 2771 * 2772 * NOTE: This routine does not have the logic to handle 2773 * uncommitted allocated extent. The caller should call 2774 * txCommit() to commit all the allocation before call 2775 * this routine. 2776 */ 2777xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2778 s64 nxaddr, /* new xaddr */ 2779 int xtype) 2780{ /* extent type: XTPAGE or DATAEXT */ 2781 int rc = 0; 2782 struct tblock *tblk; 2783 struct tlock *tlck; 2784 struct xtlock *xtlck; 2785 struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ 2786 xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ 2787 xad_t *xad; 2788 pxd_t *pxd; 2789 s64 xoff, xsize; 2790 int xlen; 2791 s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; 2792 cbuf_t *cp; 2793 s64 offset, nbytes, nbrd, pno; 2794 int nb, npages, nblks; 2795 s64 bn; 2796 int cmp; 2797 int index; 2798 struct pxd_lock *pxdlock; 2799 struct btstack btstack; /* traverse stack */ 2800 2801 xtype = xtype & EXTENT_TYPE; 2802 2803 xoff = offsetXAD(oxad); 2804 oxaddr = addressXAD(oxad); 2805 xlen = lengthXAD(oxad); 2806 2807 /* validate extent offset */ 2808 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2809 if (offset >= ip->i_size) 2810 return ESTALE; /* stale extent */ 2811 2812 jEVENT(0, 2813 ("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx\n", 2814 xtype, (ulong) xoff, xlen, (ulong) oxaddr, 2815 (ulong) nxaddr)); 2816 2817 /* 2818 * 1. get and validate the parent xtpage/xad entry 2819 * covering the source extent to be relocated; 2820 */ 2821 if (xtype == DATAEXT) { 2822 /* search in leaf entry */ 2823 rc = xtSearch(ip, xoff, &cmp, &btstack, 0); 2824 if (rc) 2825 return rc; 2826 if (cmp) { 2827 XT_PUTPAGE(pmp); 2828 return ESTALE; 2829 } 2830 2831 /* retrieve search result */ 2832 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2833 2834 /* validate for exact match with a single entry */ 2835 xad = &pp->xad[index]; 2836 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { 2837 XT_PUTPAGE(pmp); 2838 return ESTALE; 2839 } 2840 } else { /* (xtype == XTPAGE) */ 2841 2842 /* search in internal entry */ 2843 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); 2844 if (rc) 2845 return rc; 2846 if (cmp) { 2847 XT_PUTPAGE(pmp); 2848 return ESTALE; 2849 } 2850 2851 /* retrieve search result */ 2852 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2853 2854 /* xtSearchNode() validated for exact match with a single entry 2855 */ 2856 xad = &pp->xad[index]; 2857 } 2858 jEVENT(0, ("xtRelocate: parent xad entry validated.\n")); 2859 2860 /* 2861 * 2. relocate the extent 2862 */ 2863 if (xtype == DATAEXT) { 2864 /* if the extent is allocated-but-not-recorded 2865 * there is no real data to be moved in this extent, 2866 */ 2867 if (xad->flag & XAD_NOTRECORDED) 2868 goto out; 2869 else 2870 /* release xtpage for cmRead()/xtLookup() */ 2871 XT_PUTPAGE(pmp); 2872 2873 /* 2874 * cmRelocate() 2875 * 2876 * copy target data pages to be relocated; 2877 * 2878 * data extent must start at page boundary and 2879 * multiple of page size (except the last data extent); 2880 * read in each page of the source data extent into cbuf, 2881 * update the cbuf extent descriptor of the page to be 2882 * homeward bound to new dst data extent 2883 * copy the data from the old extent to new extent. 2884 * copy is essential for compressed files to avoid problems 2885 * that can arise if there was a change in compression 2886 * algorithms. 2887 * it is a good strategy because it may disrupt cache 2888 * policy to keep the pages in memory afterwards. 2889 */ 2890 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2891 assert((offset & CM_OFFSET) == 0); 2892 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2893 pno = offset >> CM_L2BSIZE; 2894 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2895/* 2896 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2897 (offset >> CM_L2BSIZE) + 1; 2898*/ 2899 sxaddr = oxaddr; 2900 dxaddr = nxaddr; 2901 2902 /* process the request one cache buffer at a time */ 2903 for (nbrd = 0; nbrd < nbytes; nbrd += nb, 2904 offset += nb, pno++, npages--) { 2905 /* compute page size */ 2906 nb = min(nbytes - nbrd, CM_BSIZE); 2907 2908 /* get the cache buffer of the page */ 2909 if (rc = cmRead(ip, offset, npages, &cp)) 2910 break; 2911 2912 assert(addressPXD(&cp->cm_pxd) == sxaddr); 2913 assert(!cp->cm_modified); 2914 2915 /* bind buffer with the new extent address */ 2916 nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; 2917 cmSetXD(ip, cp, pno, dxaddr, nblks); 2918 2919 /* release the cbuf, mark it as modified */ 2920 cmPut(cp, TRUE); 2921 2922 dxaddr += nblks; 2923 sxaddr += nblks; 2924 } 2925 2926 /* get back parent page */ 2927 rc = xtSearch(ip, xoff, &cmp, &btstack, 0); 2928 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2929 jEVENT(0, ("xtRelocate: target data extent relocated.\n")); 2930 } else { /* (xtype == XTPAGE) */ 2931 2932 /* 2933 * read in the target xtpage from the source extent; 2934 */ 2935 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2936 if (rc) { 2937 XT_PUTPAGE(pmp); 2938 return rc; 2939 } 2940 2941 /* 2942 * read in sibling pages if any to update sibling pointers; 2943 */ 2944 rmp = NULL; 2945 if (p->header.next) { 2946 nextbn = le64_to_cpu(p->header.next); 2947 XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2948 if (rc) { 2949 XT_PUTPAGE(pmp); 2950 XT_PUTPAGE(mp); 2951 return (rc); 2952 } 2953 } 2954 2955 lmp = NULL; 2956 if (p->header.prev) { 2957 prevbn = le64_to_cpu(p->header.prev); 2958 XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 2959 if (rc) { 2960 XT_PUTPAGE(pmp); 2961 XT_PUTPAGE(mp); 2962 if (rmp) 2963 XT_PUTPAGE(rmp); 2964 return (rc); 2965 } 2966 } 2967 2968 /* at this point, all xtpages to be updated are in memory */ 2969 2970 /* 2971 * update sibling pointers of sibling xtpages if any; 2972 */ 2973 if (lmp) { 2974 BT_MARK_DIRTY(lmp, ip); 2975 tlck = 2976 txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); 2977 lp->header.next = cpu_to_le64(nxaddr); 2978 XT_PUTPAGE(lmp); 2979 } 2980 2981 if (rmp) { 2982 BT_MARK_DIRTY(rmp, ip); 2983 tlck = 2984 txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); 2985 rp->header.prev = cpu_to_le64(nxaddr); 2986 XT_PUTPAGE(rmp); 2987 } 2988 2989 /* 2990 * update the target xtpage to be relocated 2991 * 2992 * update the self address of the target page 2993 * and write to destination extent; 2994 * redo image covers the whole xtpage since it is new page 2995 * to the destination extent; 2996 * update of bmap for the free of source extent 2997 * of the target xtpage itself: 2998 * update of bmap for the allocation of destination extent 2999 * of the target xtpage itself: 3000 * update of bmap for the extents covered by xad entries in 3001 * the target xtpage is not necessary since they are not 3002 * updated; 3003 * if not committed before this relocation, 3004 * target page may contain XAD_NEW entries which must 3005 * be scanned for bmap update (logredo() always 3006 * scan xtpage REDOPAGE image for bmap update); 3007 * if committed before this relocation (tlckRELOCATE), 3008 * scan may be skipped by commit() and logredo(); 3009 */ 3010 BT_MARK_DIRTY(mp, ip); 3011 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 3012 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 3013 xtlck = (struct xtlock *) & tlck->lock; 3014 3015 /* update the self address in the xtpage header */ 3016 pxd = &p->header.self; 3017 PXDaddress(pxd, nxaddr); 3018 3019 /* linelock for the after image of the whole page */ 3020 xtlck->lwm.length = 3021 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 3022 3023 /* update the buffer extent descriptor of target xtpage */ 3024 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 3025 bmSetXD(mp, nxaddr, xsize); 3026 3027 /* unpin the target page to new homeward bound */ 3028 XT_PUTPAGE(mp); 3029 jEVENT(0, ("xtRelocate: target xtpage relocated.\n")); 3030 } 3031 3032 /* 3033 * 3. acquire maplock for the source extent to be freed; 3034 * 3035 * acquire a maplock saving the src relocated extent address; 3036 * to free of the extent at commit time; 3037 */ 3038 out: 3039 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for 3040 * free PXD of the source data extent (logredo() will update 3041 * bmap for free of source data extent), and update bmap for 3042 * free of the source data extent; 3043 */ 3044 if (xtype == DATAEXT) 3045 tlck = txMaplock(tid, ip, tlckMAP); 3046 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record 3047 * for the source xtpage (logredo() will init NoRedoPage 3048 * filter and will also update bmap for free of the source 3049 * xtpage), and update bmap for free of the source xtpage; 3050 * N.B. We use tlckMAP instead of tlkcXTREE because there 3051 * is no buffer associated with this lock since the buffer 3052 * has been redirected to the target location. 3053 */ 3054 else /* (xtype == XTPAGE) */ 3055 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 3056 3057 pxdlock = (struct pxd_lock *) & tlck->lock; 3058 pxdlock->flag = mlckFREEPXD; 3059 PXDaddress(&pxdlock->pxd, oxaddr); 3060 PXDlength(&pxdlock->pxd, xlen); 3061 pxdlock->index = 1; 3062 3063 /* 3064 * 4. update the parent xad entry for relocation; 3065 * 3066 * acquire tlck for the parent entry with XAD_NEW as entry 3067 * update which will write LOG_REDOPAGE and update bmap for 3068 * allocation of XAD_NEW destination extent; 3069 */ 3070 jEVENT(0, ("xtRelocate: update parent xad entry.\n")); 3071 BT_MARK_DIRTY(pmp, ip); 3072 tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); 3073 xtlck = (struct xtlock *) & tlck->lock; 3074 3075 /* update the XAD with the new destination extent; */ 3076 xad = &pp->xad[index]; 3077 xad->flag |= XAD_NEW; 3078 XADaddress(xad, nxaddr); 3079 3080 xtlck->lwm.offset = min(index, xtlck->lwm.offset); 3081 xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - 3082 xtlck->lwm.offset; 3083 3084 /* unpin the parent xtpage */ 3085 XT_PUTPAGE(pmp); 3086 3087 return rc; 3088} 3089 3090 3091/* 3092 * xtSearchNode() 3093 * 3094 * function: search for the internal xad entry covering specified extent. 3095 * This function is mainly used by defragfs utility. 3096 * 3097 * parameters: 3098 * ip - file object; 3099 * xad - extent to find; 3100 * cmpp - comparison result: 3101 * btstack - traverse stack; 3102 * flag - search process flag; 3103 * 3104 * returns: 3105 * btstack contains (bn, index) of search path traversed to the entry. 3106 * *cmpp is set to result of comparison with the entry returned. 3107 * the page containing the entry is pinned at exit. 3108 */ 3109static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 3110 int *cmpp, struct btstack * btstack, int flag) 3111{ 3112 int rc = 0; 3113 s64 xoff, xaddr; 3114 int xlen; 3115 int cmp = 1; /* init for empty page */ 3116 s64 bn; /* block number */ 3117 struct metapage *mp; /* meta-page buffer */ 3118 xtpage_t *p; /* page */ 3119 int base, index, lim; 3120 struct btframe *btsp; 3121 s64 t64; 3122 3123 BT_CLR(btstack); 3124 3125 xoff = offsetXAD(xad); 3126 xlen = lengthXAD(xad); 3127 xaddr = addressXAD(xad); 3128 3129 /* 3130 * search down tree from root: 3131 * 3132 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 3133 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 3134 * 3135 * if entry with search key K is not found 3136 * internal page search find the entry with largest key Ki 3137 * less than K which point to the child page to search; 3138 * leaf page search find the entry with smallest key Kj 3139 * greater than K so that the returned index is the position of 3140 * the entry to be shifted right for insertion of new entry. 3141 * for empty tree, search key is greater than any key of the tree. 3142 * 3143 * by convention, root bn = 0. 3144 */ 3145 for (bn = 0;;) { 3146 /* get/pin the page to search */ 3147 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3148 if (rc) 3149 return rc; 3150 if (p->header.flag & BT_LEAF) 3151 return ESTALE; 3152 3153 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3154 3155 /* 3156 * binary search with search key K on the current page 3157 */ 3158 for (base = XTENTRYSTART; lim; lim >>= 1) { 3159 index = base + (lim >> 1); 3160 3161 XT_CMP(cmp, xoff, &p->xad[index], t64); 3162 if (cmp == 0) { 3163 /* 3164 * search hit 3165 * 3166 * verify for exact match; 3167 */ 3168 if (xaddr == addressXAD(&p->xad[index]) && 3169 xoff == offsetXAD(&p->xad[index])) { 3170 *cmpp = cmp; 3171 3172 /* save search result */ 3173 btsp = btstack->top; 3174 btsp->bn = bn; 3175 btsp->index = index; 3176 btsp->mp = mp; 3177 3178 return 0; 3179 } 3180 3181 /* descend/search its child page */ 3182 goto next; 3183 } 3184 3185 if (cmp > 0) { 3186 base = index + 1; 3187 --lim; 3188 } 3189 } 3190 3191 /* 3192 * search miss - non-leaf page: 3193 * 3194 * base is the smallest index with key (Kj) greater than 3195 * search key (K) and may be zero or maxentry index. 3196 * if base is non-zero, decrement base by one to get the parent 3197 * entry of the child page to search. 3198 */ 3199 index = base ? base - 1 : base; 3200 3201 /* 3202 * go down to child page 3203 */ 3204 next: 3205 /* get the child page block number */ 3206 bn = addressXAD(&p->xad[index]); 3207 3208 /* unpin the parent page */ 3209 XT_PUTPAGE(mp); 3210 } 3211} 3212 3213 3214/* 3215 * xtRelink() 3216 * 3217 * function: 3218 * link around a freed page. 3219 * 3220 * Parameter: 3221 * int tid, 3222 * struct inode *ip, 3223 * xtpage_t *p) 3224 * 3225 * returns: 3226 */ 3227static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) 3228{ 3229 int rc = 0; 3230 struct metapage *mp; 3231 s64 nextbn, prevbn; 3232 struct tlock *tlck; 3233 3234 nextbn = le64_to_cpu(p->header.next); 3235 prevbn = le64_to_cpu(p->header.prev); 3236 3237 /* update prev pointer of the next page */ 3238 if (nextbn != 0) { 3239 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 3240 if (rc) 3241 return rc; 3242 3243 /* 3244 * acquire a transaction lock on the page; 3245 * 3246 * action: update prev pointer; 3247 */ 3248 BT_MARK_DIRTY(mp, ip); 3249 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3250 3251 /* the page may already have been tlock'd */ 3252 3253 p->header.prev = cpu_to_le64(prevbn); 3254 3255 XT_PUTPAGE(mp); 3256 } 3257 3258 /* update next pointer of the previous page */ 3259 if (prevbn != 0) { 3260 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 3261 if (rc) 3262 return rc; 3263 3264 /* 3265 * acquire a transaction lock on the page; 3266 * 3267 * action: update next pointer; 3268 */ 3269 BT_MARK_DIRTY(mp, ip); 3270 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3271 3272 /* the page may already have been tlock'd */ 3273 3274 p->header.next = le64_to_cpu(nextbn); 3275 3276 XT_PUTPAGE(mp); 3277 } 3278 3279 return 0; 3280} 3281#endif /* _STILL_TO_PORT */ 3282 3283 3284/* 3285 * xtInitRoot() 3286 * 3287 * initialize file root (inline in inode) 3288 */ 3289void xtInitRoot(tid_t tid, struct inode *ip) 3290{ 3291 xtpage_t *p; 3292 struct tlock *tlck; 3293 3294 /* 3295 * acquire a transaction lock on the root 3296 * 3297 * action: 3298 */ 3299 tlck = txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, 3300 tlckXTREE | tlckNEW); 3301 p = &JFS_IP(ip)->i_xtroot; 3302 3303 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 3304 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3305 3306 if (S_ISDIR(ip->i_mode)) 3307 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); 3308 else { 3309 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); 3310 ip->i_size = 0; 3311 } 3312 3313 3314 return; 3315} 3316 3317 3318/* 3319 * We can run into a deadlock truncating a file with a large number of 3320 * xtree pages (large fragmented file). A robust fix would entail a 3321 * reservation system where we would reserve a number of metadata pages 3322 * and tlocks which we would be guaranteed without a deadlock. Without 3323 * this, a partial fix is to limit number of metadata pages we will lock 3324 * in a single transaction. Currently we will truncate the file so that 3325 * no more than 50 leaf pages will be locked. The caller of xtTruncate 3326 * will be responsible for ensuring that the current transaction gets 3327 * committed, and that subsequent transactions are created to truncate 3328 * the file further if needed. 3329 */ 3330#define MAX_TRUNCATE_LEAVES 50 3331 3332/* 3333 * xtTruncate() 3334 * 3335 * function: 3336 * traverse for truncation logging backward bottom up; 3337 * terminate at the last extent entry at the current subtree 3338 * root page covering new down size. 3339 * truncation may occur within the last extent entry. 3340 * 3341 * parameter: 3342 * int tid, 3343 * struct inode *ip, 3344 * s64 newsize, 3345 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3346 * 3347 * return: 3348 * 3349 * note: 3350 * PWMAP: 3351 * 1. truncate (non-COMMIT_NOLINK file) 3352 * by jfs_truncate() or jfs_open(O_TRUNC): 3353 * xtree is updated; 3354 * 2. truncate index table of directory when last entry removed 3355 * map update via tlock at commit time; 3356 * PMAP: 3357 * Call xtTruncate_pmap instead 3358 * WMAP: 3359 * 1. remove (free zero link count) on last reference release 3360 * (pmap has been freed at commit zero link count); 3361 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3362 * xtree is updated; 3363 * map update directly at truncation time; 3364 * 3365 * if (DELETE) 3366 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3367 * else if (TRUNCATE) 3368 * must write LOG_NOREDOPAGE for deleted index page; 3369 * 3370 * pages may already have been tlocked by anonymous transactions 3371 * during file growth (i.e., write) before truncation; 3372 * 3373 * except last truncated entry, deleted entries remains as is 3374 * in the page (nextindex is updated) for other use 3375 * (e.g., log/update allocation map): this avoid copying the page 3376 * info but delay free of pages; 3377 * 3378 */ 3379s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) 3380{ 3381 int rc = 0; 3382 s64 teof; 3383 struct metapage *mp; 3384 xtpage_t *p; 3385 s64 bn; 3386 int index, nextindex; 3387 xad_t *xad; 3388 s64 xoff, xaddr; 3389 int xlen, len, freexlen; 3390 struct btstack btstack; 3391 struct btframe *parent; 3392 struct tblock *tblk = 0; 3393 struct tlock *tlck = 0; 3394 struct xtlock *xtlck = 0; 3395 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ 3396 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ 3397 s64 nfreed; 3398 int freed, log; 3399 int locked_leaves = 0; 3400 3401 /* save object truncation type */ 3402 if (tid) { 3403 tblk = tid_to_tblock(tid); 3404 tblk->xflag |= flag; 3405 } 3406 3407 nfreed = 0; 3408 3409 flag &= COMMIT_MAP; 3410 assert(flag != COMMIT_PMAP); 3411 3412 if (flag == COMMIT_PWMAP) 3413 log = 1; 3414 else { 3415 log = 0; 3416 xadlock.flag = mlckFREEXADLIST; 3417 xadlock.index = 1; 3418 } 3419 3420 /* 3421 * if the newsize is not an integral number of pages, 3422 * the file between newsize and next page boundary will 3423 * be cleared. 3424 * if truncating into a file hole, it will cause 3425 * a full block to be allocated for the logical block. 3426 */ 3427 3428 /* 3429 * release page blocks of truncated region <teof, eof> 3430 * 3431 * free the data blocks from the leaf index blocks. 3432 * delete the parent index entries corresponding to 3433 * the freed child data/index blocks. 3434 * free the index blocks themselves which aren't needed 3435 * in new sized file. 3436 * 3437 * index blocks are updated only if the blocks are to be 3438 * retained in the new sized file. 3439 * if type is PMAP, the data and index pages are NOT 3440 * freed, and the data and index blocks are NOT freed 3441 * from working map. 3442 * (this will allow continued access of data/index of 3443 * temporary file (zerolink count file truncated to zero-length)). 3444 */ 3445 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 3446 JFS_SBI(ip->i_sb)->l2bsize; 3447 3448 /* clear stack */ 3449 BT_CLR(&btstack); 3450 3451 /* 3452 * start with root 3453 * 3454 * root resides in the inode 3455 */ 3456 bn = 0; 3457 3458 /* 3459 * first access of each page: 3460 */ 3461 getPage: 3462 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3463 if (rc) 3464 return -rc; 3465 3466 /* process entries backward from last index */ 3467 index = le16_to_cpu(p->header.nextindex) - 1; 3468 3469 if (p->header.flag & BT_INTERNAL) 3470 goto getChild; 3471 3472 /* 3473 * leaf page 3474 */ 3475 3476 /* Since this is the rightmost leaf, and we may have already freed 3477 * a page that was formerly to the right, let's make sure that the 3478 * next pointer is zero. 3479 */ 3480 p->header.next = 0; 3481 3482 freed = 0; 3483 3484 /* does region covered by leaf page precede Teof ? */ 3485 xad = &p->xad[index]; 3486 xoff = offsetXAD(xad); 3487 xlen = lengthXAD(xad); 3488 if (teof >= xoff + xlen) { 3489 XT_PUTPAGE(mp); 3490 goto getParent; 3491 } 3492 3493 /* (re)acquire tlock of the leaf page */ 3494 if (log) { 3495 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3496 /* 3497 * We need to limit the size of the transaction 3498 * to avoid exhausting pagecache & tlocks 3499 */ 3500 XT_PUTPAGE(mp); 3501 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3502 goto getParent; 3503 } 3504 tlck = txLock(tid, ip, mp, tlckXTREE); 3505 tlck->type = tlckXTREE | tlckTRUNCATE; 3506 xtlck = (struct xtlock *) & tlck->lock; 3507 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3508 } 3509 BT_MARK_DIRTY(mp, ip); 3510 3511 /* 3512 * scan backward leaf page entries 3513 */ 3514 for (; index >= XTENTRYSTART; index--) { 3515 xad = &p->xad[index]; 3516 xoff = offsetXAD(xad); 3517 xlen = lengthXAD(xad); 3518 xaddr = addressXAD(xad); 3519 3520 /* 3521 * entry beyond eof: continue scan of current page 3522 * xad 3523 * ---|---=======-------> 3524 * eof 3525 */ 3526 if (teof < xoff) { 3527 nfreed += xlen; 3528 continue; 3529 } 3530 3531 /* 3532 * (xoff <= teof): last entry to be deleted from page; 3533 * If other entries remain in page: keep and update the page. 3534 */ 3535 3536 /* 3537 * eof == entry_start: delete the entry 3538 * xad 3539 * -------|=======-------> 3540 * eof 3541 * 3542 */ 3543 if (teof == xoff) { 3544 nfreed += xlen; 3545 3546 if (index == XTENTRYSTART) 3547 break; 3548 3549 nextindex = index; 3550 } 3551 /* 3552 * eof within the entry: truncate the entry. 3553 * xad 3554 * -------===|===-------> 3555 * eof 3556 */ 3557 else if (teof < xoff + xlen) { 3558 /* update truncated entry */ 3559 len = teof - xoff; 3560 freexlen = xlen - len; 3561 XADlength(xad, len); 3562 3563 /* save pxd of truncated extent in tlck */ 3564 xaddr += len; 3565 if (log) { /* COMMIT_PWMAP */ 3566 xtlck->lwm.offset = (xtlck->lwm.offset) ? 3567 min(index, (int)xtlck->lwm.offset) : index; 3568 xtlck->lwm.length = index + 1 - 3569 xtlck->lwm.offset; 3570 xtlck->twm.offset = index; 3571 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 3572 pxdlock->flag = mlckFREEPXD; 3573 PXDaddress(&pxdlock->pxd, xaddr); 3574 PXDlength(&pxdlock->pxd, freexlen); 3575 } 3576 /* free truncated extent */ 3577 else { /* COMMIT_WMAP */ 3578 3579 pxdlock = (struct pxd_lock *) & xadlock; 3580 pxdlock->flag = mlckFREEPXD; 3581 PXDaddress(&pxdlock->pxd, xaddr); 3582 PXDlength(&pxdlock->pxd, freexlen); 3583 txFreeMap(ip, pxdlock, 0, COMMIT_WMAP); 3584 3585 /* reset map lock */ 3586 xadlock.flag = mlckFREEXADLIST; 3587 } 3588 3589 /* current entry is new last entry; */ 3590 nextindex = index + 1; 3591 3592 nfreed += freexlen; 3593 } 3594 /* 3595 * eof beyond the entry: 3596 * xad 3597 * -------=======---|---> 3598 * eof 3599 */ 3600 else { /* (xoff + xlen < teof) */ 3601 3602 nextindex = index + 1; 3603 } 3604 3605 if (nextindex < le16_to_cpu(p->header.nextindex)) { 3606 if (!log) { /* COMMIT_WAMP */ 3607 xadlock.xdlist = &p->xad[nextindex]; 3608 xadlock.count = 3609 le16_to_cpu(p->header.nextindex) - 3610 nextindex; 3611 txFreeMap(ip, (struct maplock *) & xadlock, 0, 3612 COMMIT_WMAP); 3613 } 3614 p->header.nextindex = cpu_to_le16(nextindex); 3615 } 3616 3617 XT_PUTPAGE(mp); 3618 3619 /* assert(freed == 0); */ 3620 goto getParent; 3621 } /* end scan of leaf page entries */ 3622 3623 freed = 1; 3624 3625 /* 3626 * leaf page become empty: free the page if type != PMAP 3627 */ 3628 if (log) { /* COMMIT_PWMAP */ 3629 /* txCommit() with tlckFREE: 3630 * free data extents covered by leaf [XTENTRYSTART:hwm); 3631 * invalidate leaf if COMMIT_PWMAP; 3632 * if (TRUNCATE), will write LOG_NOREDOPAGE; 3633 */ 3634 tlck->type = tlckXTREE | tlckFREE; 3635 } else { /* COMMIT_WAMP */ 3636 3637 /* free data extents covered by leaf */ 3638 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3639 xadlock.count = 3640 le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3641 txFreeMap(ip, (struct maplock *) & xadlock, 0, COMMIT_WMAP); 3642 } 3643 3644 if (p->header.flag & BT_ROOT) { 3645 p->header.flag &= ~BT_INTERNAL; 3646 p->header.flag |= BT_LEAF; 3647 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3648 3649 XT_PUTPAGE(mp); /* debug */ 3650 goto out; 3651 } else { 3652 if (log) { /* COMMIT_PWMAP */ 3653 /* page will be invalidated at tx completion 3654 */ 3655 XT_PUTPAGE(mp); 3656 } else { /* COMMIT_WMAP */ 3657 3658 if (mp->lid) 3659 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; 3660 3661 /* invalidate empty leaf page */ 3662 discard_metapage(mp); 3663 } 3664 } 3665 3666 /* 3667 * the leaf page become empty: delete the parent entry 3668 * for the leaf page if the parent page is to be kept 3669 * in the new sized file. 3670 */ 3671 3672 /* 3673 * go back up to the parent page 3674 */ 3675 getParent: 3676 /* pop/restore parent entry for the current child page */ 3677 if ((parent = BT_POP(&btstack)) == NULL) 3678 /* current page must have been root */ 3679 goto out; 3680 3681 /* get back the parent page */ 3682 bn = parent->bn; 3683 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3684 if (rc) 3685 return -rc; 3686 3687 index = parent->index; 3688 3689 /* 3690 * child page was not empty: 3691 */ 3692 if (freed == 0) { 3693 /* has any entry deleted from parent ? */ 3694 if (index < le16_to_cpu(p->header.nextindex) - 1) { 3695 /* (re)acquire tlock on the parent page */ 3696 if (log) { /* COMMIT_PWMAP */ 3697 /* txCommit() with tlckTRUNCATE: 3698 * free child extents covered by parent [); 3699 */ 3700 tlck = txLock(tid, ip, mp, tlckXTREE); 3701 xtlck = (struct xtlock *) & tlck->lock; 3702 if (!(tlck->type & tlckTRUNCATE)) { 3703 xtlck->hwm.offset = 3704 le16_to_cpu(p->header. 3705 nextindex) - 1; 3706 tlck->type = 3707 tlckXTREE | tlckTRUNCATE; 3708 } 3709 } else { /* COMMIT_WMAP */ 3710 3711 /* free child extents covered by parent */ 3712 xadlock.xdlist = &p->xad[index + 1]; 3713 xadlock.count = 3714 le16_to_cpu(p->header.nextindex) - 3715 index - 1; 3716 txFreeMap(ip, (struct maplock *) & xadlock, 0, 3717 COMMIT_WMAP); 3718 } 3719 BT_MARK_DIRTY(mp, ip); 3720 3721 p->header.nextindex = cpu_to_le16(index + 1); 3722 } 3723 XT_PUTPAGE(mp); 3724 goto getParent; 3725 } 3726 3727 /* 3728 * child page was empty: 3729 */ 3730 nfreed += lengthXAD(&p->xad[index]); 3731 3732 /* 3733 * During working map update, child page's tlock must be handled 3734 * before parent's. This is because the parent's tlock will cause 3735 * the child's disk space to be marked available in the wmap, so 3736 * it's important that the child page be released by that time. 3737 * 3738 * ToDo: tlocks should be on doubly-linked list, so we can 3739 * quickly remove it and add it to the end. 3740 */ 3741 3742 /* 3743 * Move parent page's tlock to the end of the tid's tlock list 3744 */ 3745 if (log && mp->lid && (tblk->last != mp->lid) && 3746 lid_to_tlock(mp->lid)->tid) { 3747 lid_t lid = mp->lid; 3748 struct tlock *prev; 3749 3750 tlck = lid_to_tlock(lid); 3751 3752 if (tblk->next == lid) 3753 tblk->next = tlck->next; 3754 else { 3755 for (prev = lid_to_tlock(tblk->next); 3756 prev->next != lid; 3757 prev = lid_to_tlock(prev->next)) { 3758 assert(prev->next); 3759 } 3760 prev->next = tlck->next; 3761 } 3762 lid_to_tlock(tblk->last)->next = lid; 3763 tlck->next = 0; 3764 tblk->last = lid; 3765 } 3766 3767 /* 3768 * parent page become empty: free the page 3769 */ 3770 if (index == XTENTRYSTART) { 3771 if (log) { /* COMMIT_PWMAP */ 3772 /* txCommit() with tlckFREE: 3773 * free child extents covered by parent; 3774 * invalidate parent if COMMIT_PWMAP; 3775 */ 3776 tlck = txLock(tid, ip, mp, tlckXTREE); 3777 xtlck = (struct xtlock *) & tlck->lock; 3778 xtlck->hwm.offset = 3779 le16_to_cpu(p->header.nextindex) - 1; 3780 tlck->type = tlckXTREE | tlckFREE; 3781 } else { /* COMMIT_WMAP */ 3782 3783 /* free child extents covered by parent */ 3784 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3785 xadlock.count = 3786 le16_to_cpu(p->header.nextindex) - 3787 XTENTRYSTART; 3788 txFreeMap(ip, (struct maplock *) & xadlock, 0, 3789 COMMIT_WMAP); 3790 } 3791 BT_MARK_DIRTY(mp, ip); 3792 3793 if (p->header.flag & BT_ROOT) { 3794 p->header.flag &= ~BT_INTERNAL; 3795 p->header.flag |= BT_LEAF; 3796 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3797 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { 3798 /* 3799 * Shrink root down to allow inline 3800 * EA (otherwise fsck complains) 3801 */ 3802 p->header.maxentry = 3803 cpu_to_le16(XTROOTINITSLOT); 3804 JFS_IP(ip)->mode2 |= INLINEEA; 3805 } 3806 3807 XT_PUTPAGE(mp); /* debug */ 3808 goto out; 3809 } else { 3810 if (log) { /* COMMIT_PWMAP */ 3811 /* page will be invalidated at tx completion 3812 */ 3813 XT_PUTPAGE(mp); 3814 } else { /* COMMIT_WMAP */ 3815 3816 if (mp->lid) 3817 lid_to_tlock(mp->lid)->flag |= 3818 tlckFREELOCK; 3819 3820 /* invalidate parent page */ 3821 discard_metapage(mp); 3822 } 3823 3824 /* parent has become empty and freed: 3825 * go back up to its parent page 3826 */ 3827 /* freed = 1; */ 3828 goto getParent; 3829 } 3830 } 3831 /* 3832 * parent page still has entries for front region; 3833 */ 3834 else { 3835 /* try truncate region covered by preceding entry 3836 * (process backward) 3837 */ 3838 index--; 3839 3840 /* go back down to the child page corresponding 3841 * to the entry 3842 */ 3843 goto getChild; 3844 } 3845 3846 /* 3847 * internal page: go down to child page of current entry 3848 */ 3849 getChild: 3850 /* save current parent entry for the child page */ 3851 BT_PUSH(&btstack, bn, index); 3852 3853 /* get child page */ 3854 xad = &p->xad[index]; 3855 bn = addressXAD(xad); 3856 3857 /* 3858 * first access of each internal entry: 3859 */ 3860 /* release parent page */ 3861 XT_PUTPAGE(mp); 3862 3863 /* process the child page */ 3864 goto getPage; 3865 3866 out: 3867 /* 3868 * update file resource stat 3869 */ 3870 /* set size 3871 */ 3872 if (S_ISDIR(ip->i_mode) && !newsize) 3873 ip->i_size = 1; /* fsck hates zero-length directories */ 3874 else 3875 ip->i_size = newsize; 3876 3877 /* update nblocks to reflect freed blocks */ 3878 ip->i_blocks -= LBLK2PBLK(ip->i_sb, nfreed); 3879 3880 /* 3881 * free tlock of invalidated pages 3882 */ 3883 if (flag == COMMIT_WMAP) 3884 txFreelock(ip); 3885 3886 return newsize; 3887} 3888 3889 3890/* 3891 * xtTruncate_pmap() 3892 * 3893 * function: 3894 * Perform truncate to zero lenghth for deleted file, leaving the 3895 * the xtree and working map untouched. This allows the file to 3896 * be accessed via open file handles, while the delete of the file 3897 * is committed to disk. 3898 * 3899 * parameter: 3900 * tid_t tid, 3901 * struct inode *ip, 3902 * s64 committed_size) 3903 * 3904 * return: new committed size 3905 * 3906 * note: 3907 * 3908 * To avoid deadlock by holding too many transaction locks, the 3909 * truncation may be broken up into multiple transactions. 3910 * The committed_size keeps track of part of the file has been 3911 * freed from the pmaps. 3912 */ 3913s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) 3914{ 3915 s64 bn; 3916 struct btstack btstack; 3917 int cmp; 3918 int index; 3919 int locked_leaves = 0; 3920 struct metapage *mp; 3921 xtpage_t *p; 3922 struct btframe *parent; 3923 int rc; 3924 struct tblock *tblk; 3925 struct tlock *tlck = 0; 3926 xad_t *xad; 3927 int xlen; 3928 s64 xoff; 3929 struct xtlock *xtlck = 0; 3930 3931 /* save object truncation type */ 3932 tblk = tid_to_tblock(tid); 3933 tblk->xflag |= COMMIT_PMAP; 3934 3935 /* clear stack */ 3936 BT_CLR(&btstack); 3937 3938 if (committed_size) { 3939 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; 3940 rc = xtSearch(ip, xoff, &cmp, &btstack, 0); 3941 if (rc) 3942 return -rc; 3943 assert(cmp == 0); 3944 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3945 } else { 3946 /* 3947 * start with root 3948 * 3949 * root resides in the inode 3950 */ 3951 bn = 0; 3952 3953 /* 3954 * first access of each page: 3955 */ 3956 getPage: 3957 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3958 if (rc) 3959 return -rc; 3960 3961 /* process entries backward from last index */ 3962 index = le16_to_cpu(p->header.nextindex) - 1; 3963 3964 if (p->header.flag & BT_INTERNAL) 3965 goto getChild; 3966 } 3967 3968 /* 3969 * leaf page 3970 */ 3971 3972 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3973 /* 3974 * We need to limit the size of the transaction 3975 * to avoid exhausting pagecache & tlocks 3976 */ 3977 xad = &p->xad[index]; 3978 xoff = offsetXAD(xad); 3979 xlen = lengthXAD(xad); 3980 XT_PUTPAGE(mp); 3981 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3982 } 3983 tlck = txLock(tid, ip, mp, tlckXTREE); 3984 tlck->type = tlckXTREE | tlckFREE; 3985 xtlck = (struct xtlock *) & tlck->lock; 3986 xtlck->hwm.offset = index; 3987 3988 3989 XT_PUTPAGE(mp); 3990 3991 /* 3992 * go back up to the parent page 3993 */ 3994 getParent: 3995 /* pop/restore parent entry for the current child page */ 3996 if ((parent = BT_POP(&btstack)) == NULL) 3997 /* current page must have been root */ 3998 goto out; 3999 4000 /* get back the parent page */ 4001 bn = parent->bn; 4002 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4003 if (rc) 4004 return -rc; 4005 4006 index = parent->index; 4007 4008 /* 4009 * parent page become empty: free the page 4010 */ 4011 if (index == XTENTRYSTART) { 4012 /* txCommit() with tlckFREE: 4013 * free child extents covered by parent; 4014 * invalidate parent if COMMIT_PWMAP; 4015 */ 4016 tlck = txLock(tid, ip, mp, tlckXTREE); 4017 xtlck = (struct xtlock *) & tlck->lock; 4018 xtlck->hwm.offset = 4019 le16_to_cpu(p->header.nextindex) - 1; 4020 tlck->type = tlckXTREE | tlckFREE; 4021 4022 XT_PUTPAGE(mp); 4023 4024 if (p->header.flag & BT_ROOT) { 4025 4026 goto out; 4027 } else { 4028 goto getParent; 4029 } 4030 } 4031 /* 4032 * parent page still has entries for front region; 4033 */ 4034 else 4035 index--; 4036 /* 4037 * internal page: go down to child page of current entry 4038 */ 4039 getChild: 4040 /* save current parent entry for the child page */ 4041 BT_PUSH(&btstack, bn, index); 4042 4043 /* get child page */ 4044 xad = &p->xad[index]; 4045 bn = addressXAD(xad); 4046 4047 /* 4048 * first access of each internal entry: 4049 */ 4050 /* release parent page */ 4051 XT_PUTPAGE(mp); 4052 4053 /* process the child page */ 4054 goto getPage; 4055 4056 out: 4057 4058 return 0; 4059} 4060 4061 4062#ifdef _JFS_DEBUG_XTREE 4063/* 4064 * xtDisplayTree() 4065 * 4066 * function: traverse forward 4067 */ 4068int xtDisplayTree(struct inode *ip) 4069{ 4070 int rc = 0; 4071 struct metapage *mp; 4072 xtpage_t *p; 4073 s64 bn, pbn; 4074 int index, lastindex, v, h; 4075 xad_t *xad; 4076 struct btstack btstack; 4077 struct btframe *btsp; 4078 struct btframe *parent; 4079 4080 printk("display B+-tree.\n"); 4081 4082 /* clear stack */ 4083 btsp = btstack.stack; 4084 4085 /* 4086 * start with root 4087 * 4088 * root resides in the inode 4089 */ 4090 bn = 0; 4091 v = h = 0; 4092 4093 /* 4094 * first access of each page: 4095 */ 4096 getPage: 4097 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4098 if (rc) 4099 return rc; 4100 4101 /* process entries forward from first index */ 4102 index = XTENTRYSTART; 4103 lastindex = le16_to_cpu(p->header.nextindex) - 1; 4104 4105 if (p->header.flag & BT_INTERNAL) { 4106 /* 4107 * first access of each internal page 4108 */ 4109 goto getChild; 4110 } else { /* (p->header.flag & BT_LEAF) */ 4111 4112 /* 4113 * first access of each leaf page 4114 */ 4115 printf("leaf page "); 4116 xtDisplayPage(ip, bn, p); 4117 4118 /* unpin the leaf page */ 4119 XT_PUTPAGE(mp); 4120 } 4121 4122 /* 4123 * go back up to the parent page 4124 */ 4125 getParent: 4126 /* pop/restore parent entry for the current child page */ 4127 if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL) 4128 /* current page must have been root */ 4129 return; 4130 4131 /* 4132 * parent page scan completed 4133 */ 4134 if ((index = parent->index) == (lastindex = parent->lastindex)) { 4135 /* go back up to the parent page */ 4136 goto getParent; 4137 } 4138 4139 /* 4140 * parent page has entries remaining 4141 */ 4142 /* get back the parent page */ 4143 bn = parent->bn; 4144 /* v = parent->level; */ 4145 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4146 if (rc) 4147 return rc; 4148 4149 /* get next parent entry */ 4150 index++; 4151 4152 /* 4153 * internal page: go down to child page of current entry 4154 */ 4155 getChild: 4156 /* push/save current parent entry for the child page */ 4157 btsp->bn = pbn = bn; 4158 btsp->index = index; 4159 btsp->lastindex = lastindex; 4160 /* btsp->level = v; */ 4161 /* btsp->node = h; */ 4162 ++btsp; 4163 4164 /* get child page */ 4165 xad = &p->xad[index]; 4166 bn = addressXAD(xad); 4167 4168 /* 4169 * first access of each internal entry: 4170 */ 4171 /* release parent page */ 4172 XT_PUTPAGE(mp); 4173 4174 printk("traverse down 0x%lx[%d]->0x%lx\n", (ulong) pbn, index, 4175 (ulong) bn); 4176 v++; 4177 h = index; 4178 4179 /* process the child page */ 4180 goto getPage; 4181} 4182 4183 4184/* 4185 * xtDisplayPage() 4186 * 4187 * function: display page 4188 */ 4189int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p) 4190{ 4191 int rc = 0; 4192 struct metapage *mp; 4193 xad_t *xad; 4194 s64 xaddr, xoff; 4195 int xlen, i, j; 4196 4197 if (p == NULL) { 4198 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4199 if (rc) 4200 return rc; 4201 } 4202 4203 /* display page control */ 4204 printf("bn:0x%lx flag:0x%x nextindex:%d\n", 4205 (ulong) bn, p->header.flag, 4206 le16_to_cpu(p->header.nextindex)); 4207 4208 /* display entries */ 4209 xad = &p->xad[XTENTRYSTART]; 4210 for (i = XTENTRYSTART, j = 1; i < le16_to_cpu(p->header.nextindex); 4211 i++, xad++, j++) { 4212 xoff = offsetXAD(xad); 4213 xaddr = addressXAD(xad); 4214 xlen = lengthXAD(xad); 4215 printf("\t[%d] 0x%lx:0x%lx(0x%x)", i, (ulong) xoff, 4216 (ulong) xaddr, xlen); 4217 4218 if (j == 4) { 4219 printf("\n"); 4220 j = 0; 4221 } 4222 } 4223 4224 printf("\n"); 4225} 4226#endif /* _JFS_DEBUG_XTREE */ 4227 4228 4229#ifdef _JFS_WIP 4230/* 4231 * xtGather() 4232 * 4233 * function: 4234 * traverse for allocation acquiring tlock at commit time 4235 * (vs at the time of update) logging backward top down 4236 * 4237 * note: 4238 * problem - establishing that all new allocation have been 4239 * processed both for append and random write in sparse file 4240 * at the current entry at the current subtree root page 4241 * 4242 */ 4243int xtGather(t) 4244btree_t *t; 4245{ 4246 int rc = 0; 4247 xtpage_t *p; 4248 u64 bn; 4249 int index; 4250 btentry_t *e; 4251 struct btstack btstack; 4252 struct btsf *parent; 4253 4254 /* clear stack */ 4255 BT_CLR(&btstack); 4256 4257 /* 4258 * start with root 4259 * 4260 * root resides in the inode 4261 */ 4262 bn = 0; 4263 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4264 if (rc) 4265 return rc; 4266 4267 /* new root is NOT pointed by a new entry 4268 if (p->header.flag & NEW) 4269 allocate new page lock; 4270 write a NEWPAGE log; 4271 */ 4272 4273 dopage: 4274 /* 4275 * first access of each page: 4276 */ 4277 /* process entries backward from last index */ 4278 index = le16_to_cpu(p->header.nextindex) - 1; 4279 4280 if (p->header.flag & BT_LEAF) { 4281 /* 4282 * first access of each leaf page 4283 */ 4284 /* process leaf page entries backward */ 4285 for (; index >= XTENTRYSTART; index--) { 4286 e = &p->xad[index]; 4287 /* 4288 * if newpage, log NEWPAGE. 4289 * 4290 if (e->flag & XAD_NEW) { 4291 nfound =+ entry->length; 4292 update current page lock for the entry; 4293 newpage(entry); 4294 * 4295 * if moved, log move. 4296 * 4297 } else if (e->flag & XAD_MOVED) { 4298 reset flag; 4299 update current page lock for the entry; 4300 } 4301 */ 4302 } 4303 4304 /* unpin the leaf page */ 4305 XT_PUTPAGE(mp); 4306 4307 /* 4308 * go back up to the parent page 4309 */ 4310 getParent: 4311 /* restore parent entry for the current child page */ 4312 if ((parent = BT_POP(&btstack)) == NULL) 4313 /* current page must have been root */ 4314 return 0; 4315 4316 if ((index = parent->index) == XTENTRYSTART) { 4317 /* 4318 * parent page scan completed 4319 */ 4320 /* go back up to the parent page */ 4321 goto getParent; 4322 } else { 4323 /* 4324 * parent page has entries remaining 4325 */ 4326 /* get back the parent page */ 4327 bn = parent->bn; 4328 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4329 if (rc) 4330 return EIO; 4331 4332 /* first subroot page which 4333 * covers all new allocated blocks 4334 * itself not new/modified. 4335 * (if modified from split of descendent, 4336 * go down path of split page) 4337 4338 if (nfound == nnew && 4339 !(p->header.flag & (NEW | MOD))) 4340 exit scan; 4341 */ 4342 4343 /* process parent page entries backward */ 4344 index--; 4345 } 4346 } else { 4347 /* 4348 * first access of each internal page 4349 */ 4350 } 4351 4352 /* 4353 * internal page: go down to child page of current entry 4354 */ 4355 4356 /* save current parent entry for the child page */ 4357 BT_PUSH(&btstack, bn, index); 4358 4359 /* get current entry for the child page */ 4360 e = &p->xad[index]; 4361 4362 /* 4363 * first access of each internal entry: 4364 */ 4365 /* 4366 * if new entry, log btree_tnewentry. 4367 * 4368 if (e->flag & XAD_NEW) 4369 update parent page lock for the entry; 4370 */ 4371 4372 /* release parent page */ 4373 XT_PUTPAGE(mp); 4374 4375 /* get child page */ 4376 bn = e->bn; 4377 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4378 if (rc) 4379 return rc; 4380 4381 /* 4382 * first access of each non-root page: 4383 */ 4384 /* 4385 * if new, log btree_newpage. 4386 * 4387 if (p->header.flag & NEW) 4388 allocate new page lock; 4389 write a NEWPAGE log (next, prev); 4390 */ 4391 4392 /* process the child page */ 4393 goto dopage; 4394 4395 out: 4396 return 0; 4397} 4398#endif /* _JFS_WIP */ 4399 4400 4401#ifdef CONFIG_JFS_STATISTICS 4402int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, 4403 int *eof, void *data) 4404{ 4405 int len = 0; 4406 off_t begin; 4407 4408 len += sprintf(buffer, 4409 "JFS Xtree statistics\n" 4410 "====================\n" 4411 "searches = %d\n" 4412 "fast searches = %d\n" 4413 "splits = %d\n", 4414 xtStat.search, 4415 xtStat.fastSearch, 4416 xtStat.split); 4417 4418 begin = offset; 4419 *start = buffer + begin; 4420 len -= begin; 4421 4422 if (len > length) 4423 len = length; 4424 else 4425 *eof = 1; 4426 4427 if (len < 0) 4428 len = 0; 4429 4430 return len; 4431} 4432#endif 4433