1/* 2 * Copyright (c) International Business Machines Corp., 2000-2002 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19/* 20 * jfs_dtree.c: directory B+-tree manager 21 * 22 * B+-tree with variable length key directory: 23 * 24 * each directory page is structured as an array of 32-byte 25 * directory entry slots initialized as a freelist 26 * to avoid search/compaction of free space at insertion. 27 * when an entry is inserted, a number of slots are allocated 28 * from the freelist as required to store variable length data 29 * of the entry; when the entry is deleted, slots of the entry 30 * are returned to freelist. 31 * 32 * leaf entry stores full name as key and file serial number 33 * (aka inode number) as data. 34 * internal/router entry stores sufffix compressed name 35 * as key and simple extent descriptor as data. 36 * 37 * each directory page maintains a sorted entry index table 38 * which stores the start slot index of sorted entries 39 * to allow binary search on the table. 40 * 41 * directory starts as a root/leaf page in on-disk inode 42 * inline data area. 43 * when it becomes full, it starts a leaf of a external extent 44 * of length of 1 block. each time the first leaf becomes full, 45 * it is extended rather than split (its size is doubled), 46 * until its length becoms 4 KBytes, from then the extent is split 47 * with new 4 Kbyte extent when it becomes full 48 * to reduce external fragmentation of small directories. 49 * 50 * blah, blah, blah, for linear scan of directory in pieces by 51 * readdir(). 52 * 53 * 54 * case-insensitive directory file system 55 * 56 * names are stored in case-sensitive way in leaf entry. 57 * but stored, searched and compared in case-insensitive (uppercase) order 58 * (i.e., both search key and entry key are folded for search/compare): 59 * (note that case-sensitive order is BROKEN in storage, e.g., 60 * sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad 61 * 62 * entries which folds to the same key makes up a equivalent class 63 * whose members are stored as contiguous cluster (may cross page boundary) 64 * but whose order is arbitrary and acts as duplicate, e.g., 65 * abc, Abc, aBc, abC) 66 * 67 * once match is found at leaf, requires scan forward/backward 68 * either for, in case-insensitive search, duplicate 69 * or for, in case-sensitive search, for exact match 70 * 71 * router entry must be created/stored in case-insensitive way 72 * in internal entry: 73 * (right most key of left page and left most key of right page 74 * are folded, and its suffix compression is propagated as router 75 * key in parent) 76 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB> 77 * should be made the router key for the split) 78 * 79 * case-insensitive search: 80 * 81 * fold search key; 82 * 83 * case-insensitive search of B-tree: 84 * for internal entry, router key is already folded; 85 * for leaf entry, fold the entry key before comparison. 86 * 87 * if (leaf entry case-insensitive match found) 88 * if (next entry satisfies case-insensitive match) 89 * return EDUPLICATE; 90 * if (prev entry satisfies case-insensitive match) 91 * return EDUPLICATE; 92 * return match; 93 * else 94 * return no match; 95 * 96 * serialization: 97 * target directory inode lock is being held on entry/exit 98 * of all main directory service routines. 99 * 100 * log based recovery: 101 */ 102 103#include <linux/fs.h> 104#include "jfs_incore.h" 105#include "jfs_superblock.h" 106#include "jfs_filsys.h" 107#include "jfs_metapage.h" 108#include "jfs_dmap.h" 109#include "jfs_unicode.h" 110#include "jfs_debug.h" 111 112/* dtree split parameter */ 113struct dtsplit { 114 struct metapage *mp; 115 s16 index; 116 s16 nslot; 117 struct component_name *key; 118 ddata_t *data; 119 struct pxdlist *pxdlist; 120}; 121 122#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) 123 124/* get page buffer for specified block address */ 125#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 126{\ 127 BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\ 128 if (!(RC))\ 129 {\ 130 if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\ 131 ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\ 132 {\ 133 jERROR(1,("DT_GETPAGE: dtree page corrupt\n"));\ 134 BT_PUTPAGE(MP);\ 135 updateSuper((IP)->i_sb, FM_DIRTY);\ 136 MP = NULL;\ 137 RC = EIO;\ 138 }\ 139 }\ 140} 141 142/* for consistency */ 143#define DT_PUTPAGE(MP) BT_PUTPAGE(MP) 144 145#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 146 BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot) 147 148/* 149 * forward references 150 */ 151static int dtSplitUp(tid_t tid, struct inode *ip, 152 struct dtsplit * split, struct btstack * btstack); 153 154static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 155 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp); 156 157static int dtExtendPage(tid_t tid, struct inode *ip, 158 struct dtsplit * split, struct btstack * btstack); 159 160static int dtSplitRoot(tid_t tid, struct inode *ip, 161 struct dtsplit * split, struct metapage ** rmpp); 162 163static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 164 dtpage_t * fp, struct btstack * btstack); 165 166static int dtSearchNode(struct inode *ip, 167 s64 lmxaddr, pxd_t * kpxd, struct btstack * btstack); 168 169static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p); 170 171static int dtReadFirst(struct inode *ip, struct btstack * btstack); 172 173static int dtReadNext(struct inode *ip, 174 loff_t * offset, struct btstack * btstack); 175 176static int dtCompare(struct component_name * key, dtpage_t * p, int si); 177 178static int ciCompare(struct component_name * key, dtpage_t * p, int si, 179 int flag); 180 181static void dtGetKey(dtpage_t * p, int i, struct component_name * key, 182 int flag); 183 184static void ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 185 int ri, struct component_name * key, int flag); 186 187static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 188 ddata_t * data, struct dt_lock **); 189 190static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 191 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 192 int do_index); 193 194static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock); 195 196static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock); 197 198static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock); 199 200#define ciToUpper(c) UniStrupr((c)->name) 201 202/* 203 * find_index() 204 * 205 * Returns dtree page containing directory table entry for specified 206 * index and pointer to its entry. 207 * 208 * mp must be released by caller. 209 */ 210static struct dir_table_slot *find_index(struct inode *ip, u32 index, 211 struct metapage ** mp) 212{ 213 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 214 s64 blkno; 215 s64 offset; 216 int page_offset; 217 struct dir_table_slot *slot; 218 static int maxWarnings = 10; 219 220 if (index < 2) { 221 if (maxWarnings) { 222 jERROR(1, ("find_entry called with index = %d\n", 223 index)); 224 maxWarnings--; 225 } 226 return 0; 227 } 228 229 if (index >= jfs_ip->next_index) { 230 jFYI(1, ("find_entry called with index >= next_index\n")); 231 return 0; 232 } 233 234 if (jfs_ip->next_index <= (MAX_INLINE_DIRTABLE_ENTRY + 1)) { 235 /* 236 * Inline directory table 237 */ 238 *mp = 0; 239 slot = &jfs_ip->i_dirtable[index - 2]; 240 } else { 241 offset = (index - 2) * sizeof(struct dir_table_slot); 242 page_offset = offset & (PSIZE - 1); 243 blkno = ((offset + 1) >> L2PSIZE) << 244 JFS_SBI(ip->i_sb)->l2nbperpage; 245 246 if (*mp && ((*mp)->index != blkno)) { 247 release_metapage(*mp); 248 *mp = 0; 249 } 250 if (*mp == 0) 251 *mp = read_metapage(ip, blkno, PSIZE, 0); 252 if (*mp == 0) { 253 jERROR(1, 254 ("free_index: error reading directory table\n")); 255 return 0; 256 } 257 258 slot = 259 (struct dir_table_slot *) ((char *) (*mp)->data + 260 page_offset); 261 } 262 return slot; 263} 264 265static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp, 266 u32 index) 267{ 268 struct tlock *tlck; 269 struct linelock *llck; 270 struct lv *lv; 271 272 tlck = txLock(tid, ip, mp, tlckDATA); 273 llck = (struct linelock *) tlck->lock; 274 275 if (llck->index >= llck->maxcnt) 276 llck = txLinelock(llck); 277 lv = &llck->lv[llck->index]; 278 279 /* 280 * Linelock slot size is twice the size of directory table 281 * slot size. 512 entries per page. 282 */ 283 lv->offset = ((index - 2) & 511) >> 1; 284 lv->length = 1; 285 llck->index++; 286} 287 288/* 289 * add_index() 290 * 291 * Adds an entry to the directory index table. This is used to provide 292 * each directory entry with a persistent index in which to resume 293 * directory traversals 294 */ 295static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) 296{ 297 struct super_block *sb = ip->i_sb; 298 struct jfs_sb_info *sbi = JFS_SBI(sb); 299 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 300 u64 blkno; 301 struct dir_table_slot *dirtab_slot; 302 u32 index; 303 struct linelock *llck; 304 struct lv *lv; 305 struct metapage *mp; 306 s64 offset; 307 uint page_offset; 308 int rc; 309 struct tlock *tlck; 310 s64 xaddr; 311 312 ASSERT(DO_INDEX(ip)); 313 314 if (jfs_ip->next_index < 2) { 315 jERROR(1, ("next_index = %d. Please fix this!\n", 316 jfs_ip->next_index)); 317 jfs_ip->next_index = 2; 318 } 319 320 index = jfs_ip->next_index++; 321 322 if (index <= MAX_INLINE_DIRTABLE_ENTRY) { 323 /* 324 * i_size reflects size of index table, or 8 bytes per entry. 325 */ 326 ip->i_size = (loff_t) (index - 1) << 3; 327 328 /* 329 * dir table fits inline within inode 330 */ 331 dirtab_slot = &jfs_ip->i_dirtable[index-2]; 332 dirtab_slot->flag = DIR_INDEX_VALID; 333 dirtab_slot->slot = slot; 334 DTSaddress(dirtab_slot, bn); 335 336 set_cflag(COMMIT_Dirtable, ip); 337 338 return index; 339 } 340 if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) { 341 /* 342 * It's time to move the inline table to an external 343 * page and begin to build the xtree 344 */ 345 346 /* 347 * Save the table, we're going to overwrite it with the 348 * xtree root 349 */ 350 struct dir_table_slot temp_table[12]; 351 memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table)); 352 353 /* 354 * Initialize empty x-tree 355 */ 356 xtInitRoot(tid, ip); 357 358 /* 359 * Allocate the first block & add it to the xtree 360 */ 361 xaddr = 0; 362 if ((rc = 363 xtInsert(tid, ip, 0, 0, sbi->nbperpage, 364 &xaddr, 0))) { 365 jFYI(1, ("add_index: xtInsert failed!\n")); 366 return -1; 367 } 368 ip->i_size = PSIZE; 369 ip->i_blocks += LBLK2PBLK(sb, sbi->nbperpage); 370 371 if ((mp = get_metapage(ip, 0, ip->i_blksize, 0)) == 0) { 372 jERROR(1, ("add_index: get_metapage failed!\n")); 373 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 374 return -1; 375 } 376 tlck = txLock(tid, ip, mp, tlckDATA); 377 llck = (struct linelock *) & tlck->lock; 378 ASSERT(llck->index == 0); 379 lv = &llck->lv[0]; 380 381 lv->offset = 0; 382 lv->length = 6; /* tlckDATA slot size is 16 bytes */ 383 llck->index++; 384 385 memcpy(mp->data, temp_table, sizeof(temp_table)); 386 387 mark_metapage_dirty(mp); 388 release_metapage(mp); 389 390 /* 391 * Logging is now directed by xtree tlocks 392 */ 393 clear_cflag(COMMIT_Dirtable, ip); 394 } 395 396 offset = (index - 2) * sizeof(struct dir_table_slot); 397 page_offset = offset & (PSIZE - 1); 398 blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage; 399 if (page_offset == 0) { 400 /* 401 * This will be the beginning of a new page 402 */ 403 xaddr = 0; 404 if ((rc = 405 xtInsert(tid, ip, 0, blkno, sbi->nbperpage, 406 &xaddr, 0))) { 407 jFYI(1, ("add_index: xtInsert failed!\n")); 408 jfs_ip->next_index--; 409 return -1; 410 } 411 ip->i_size += PSIZE; 412 ip->i_blocks += LBLK2PBLK(sb, sbi->nbperpage); 413 414 if ((mp = get_metapage(ip, blkno, PSIZE, 0))) 415 memset(mp->data, 0, PSIZE); /* Just looks better */ 416 else 417 xtTruncate(tid, ip, offset, COMMIT_PWMAP); 418 } else 419 mp = read_metapage(ip, blkno, PSIZE, 0); 420 421 if (mp == 0) { 422 jERROR(1, ("add_index: get/read_metapage failed!\n")); 423 return -1; 424 } 425 426 lock_index(tid, ip, mp, index); 427 428 dirtab_slot = 429 (struct dir_table_slot *) ((char *) mp->data + page_offset); 430 dirtab_slot->flag = DIR_INDEX_VALID; 431 dirtab_slot->slot = slot; 432 DTSaddress(dirtab_slot, bn); 433 434 mark_metapage_dirty(mp); 435 release_metapage(mp); 436 437 return index; 438} 439 440/* 441 * free_index() 442 * 443 * Marks an entry to the directory index table as free. 444 */ 445static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next) 446{ 447 struct dir_table_slot *dirtab_slot; 448 struct metapage *mp = 0; 449 450 dirtab_slot = find_index(ip, index, &mp); 451 452 if (dirtab_slot == 0) 453 return; 454 455 dirtab_slot->flag = DIR_INDEX_FREE; 456 dirtab_slot->slot = dirtab_slot->addr1 = 0; 457 dirtab_slot->addr2 = cpu_to_le32(next); 458 459 if (mp) { 460 lock_index(tid, ip, mp, index); 461 mark_metapage_dirty(mp); 462 release_metapage(mp); 463 } else 464 set_cflag(COMMIT_Dirtable, ip); 465} 466 467/* 468 * modify_index() 469 * 470 * Changes an entry in the directory index table 471 */ 472static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, 473 int slot, struct metapage ** mp) 474{ 475 struct dir_table_slot *dirtab_slot; 476 477 dirtab_slot = find_index(ip, index, mp); 478 479 if (dirtab_slot == 0) 480 return; 481 482 DTSaddress(dirtab_slot, bn); 483 dirtab_slot->slot = slot; 484 485 if (*mp) { 486 lock_index(tid, ip, *mp, index); 487 mark_metapage_dirty(*mp); 488 } else 489 set_cflag(COMMIT_Dirtable, ip); 490} 491 492/* 493 * read_index() 494 * 495 * reads a directory table slot 496 */ 497static int read_index(struct inode *ip, u32 index, 498 struct dir_table_slot * dirtab_slot) 499{ 500 struct metapage *mp = 0; 501 struct dir_table_slot *slot; 502 503 slot = find_index(ip, index, &mp); 504 if (slot == 0) { 505 return -EIO; 506 } 507 508 memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot)); 509 510 if (mp) 511 release_metapage(mp); 512 513 return 0; 514} 515 516/* 517 * dtSearch() 518 * 519 * function: 520 * Search for the entry with specified key 521 * 522 * parameter: 523 * 524 * return: 0 - search result on stack, leaf page pinned; 525 * errno - I/O error 526 */ 527int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, 528 struct btstack * btstack, int flag) 529{ 530 int rc = 0; 531 int cmp = 1; /* init for empty page */ 532 s64 bn; 533 struct metapage *mp; 534 dtpage_t *p; 535 s8 *stbl; 536 int base, index, lim; 537 struct btframe *btsp; 538 pxd_t *pxd; 539 int psize = 288; /* initial in-line directory */ 540 ino_t inumber; 541 struct component_name ciKey; 542 struct super_block *sb = ip->i_sb; 543 544 ciKey.name = 545 (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), 546 GFP_NOFS); 547 if (ciKey.name == 0) { 548 rc = ENOMEM; 549 goto dtSearch_Exit2; 550 } 551 552 553 /* uppercase search key for c-i directory */ 554 UniStrcpy(ciKey.name, key->name); 555 ciKey.namlen = key->namlen; 556 557 /* only uppercase if case-insensitive support is on */ 558 if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) { 559 ciToUpper(&ciKey); 560 } 561 BT_CLR(btstack); /* reset stack */ 562 563 /* init level count for max pages to split */ 564 btstack->nsplit = 1; 565 566 /* 567 * search down tree from root: 568 * 569 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 570 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 571 * 572 * if entry with search key K is not found 573 * internal page search find the entry with largest key Ki 574 * less than K which point to the child page to search; 575 * leaf page search find the entry with smallest key Kj 576 * greater than K so that the returned index is the position of 577 * the entry to be shifted right for insertion of new entry. 578 * for empty tree, search key is greater than any key of the tree. 579 * 580 * by convention, root bn = 0. 581 */ 582 for (bn = 0;;) { 583 /* get/pin the page to search */ 584 DT_GETPAGE(ip, bn, mp, psize, p, rc); 585 if (rc) 586 goto dtSearch_Exit1; 587 588 /* get sorted entry table of the page */ 589 stbl = DT_GETSTBL(p); 590 591 /* 592 * binary search with search key K on the current page. 593 */ 594 for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) { 595 index = base + (lim >> 1); 596 597 if (p->header.flag & BT_LEAF) { 598 /* uppercase leaf name to compare */ 599 cmp = 600 ciCompare(&ciKey, p, stbl[index], 601 JFS_SBI(sb)->mntflag); 602 } else { 603 /* router key is in uppercase */ 604 605 cmp = dtCompare(&ciKey, p, stbl[index]); 606 607 608 } 609 if (cmp == 0) { 610 /* 611 * search hit 612 */ 613 /* search hit - leaf page: 614 * return the entry found 615 */ 616 if (p->header.flag & BT_LEAF) { 617 inumber = le32_to_cpu( 618 ((struct ldtentry *) & p->slot[stbl[index]])->inumber); 619 620 /* 621 * search for JFS_LOOKUP 622 */ 623 if (flag == JFS_LOOKUP) { 624 *data = inumber; 625 rc = 0; 626 goto out; 627 } 628 629 /* 630 * search for JFS_CREATE 631 */ 632 if (flag == JFS_CREATE) { 633 *data = inumber; 634 rc = EEXIST; 635 goto out; 636 } 637 638 /* 639 * search for JFS_REMOVE or JFS_RENAME 640 */ 641 if ((flag == JFS_REMOVE || 642 flag == JFS_RENAME) && 643 *data != inumber) { 644 rc = ESTALE; 645 goto out; 646 } 647 648 /* 649 * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME 650 */ 651 /* save search result */ 652 *data = inumber; 653 btsp = btstack->top; 654 btsp->bn = bn; 655 btsp->index = index; 656 btsp->mp = mp; 657 658 rc = 0; 659 goto dtSearch_Exit1; 660 } 661 662 /* search hit - internal page: 663 * descend/search its child page 664 */ 665 goto getChild; 666 } 667 668 if (cmp > 0) { 669 base = index + 1; 670 --lim; 671 } 672 } 673 674 /* 675 * search miss 676 * 677 * base is the smallest index with key (Kj) greater than 678 * search key (K) and may be zero or (maxindex + 1) index. 679 */ 680 /* 681 * search miss - leaf page 682 * 683 * return location of entry (base) where new entry with 684 * search key K is to be inserted. 685 */ 686 if (p->header.flag & BT_LEAF) { 687 /* 688 * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME 689 */ 690 if (flag == JFS_LOOKUP || flag == JFS_REMOVE || 691 flag == JFS_RENAME) { 692 rc = ENOENT; 693 goto out; 694 } 695 696 /* 697 * search for JFS_CREATE|JFS_FINDDIR: 698 * 699 * save search result 700 */ 701 *data = 0; 702 btsp = btstack->top; 703 btsp->bn = bn; 704 btsp->index = base; 705 btsp->mp = mp; 706 707 rc = 0; 708 goto dtSearch_Exit1; 709 } 710 711 /* 712 * search miss - internal page 713 * 714 * if base is non-zero, decrement base by one to get the parent 715 * entry of the child page to search. 716 */ 717 index = base ? base - 1 : base; 718 719 /* 720 * go down to child page 721 */ 722 getChild: 723 /* update max. number of pages to split */ 724 if (btstack->nsplit >= 8) { 725 /* Something's corrupted, mark filesytem dirty so 726 * chkdsk will fix it. 727 */ 728 jERROR(1, ("stack overrun in dtSearch!\n")); 729 updateSuper(sb, FM_DIRTY); 730 rc = EIO; 731 goto out; 732 } 733 btstack->nsplit++; 734 735 /* push (bn, index) of the parent page/entry */ 736 BT_PUSH(btstack, bn, index); 737 738 /* get the child page block number */ 739 pxd = (pxd_t *) & p->slot[stbl[index]]; 740 bn = addressPXD(pxd); 741 psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; 742 743 /* unpin the parent page */ 744 DT_PUTPAGE(mp); 745 } 746 747 out: 748 DT_PUTPAGE(mp); 749 750 dtSearch_Exit1: 751 752 kfree(ciKey.name); 753 754 dtSearch_Exit2: 755 756 return rc; 757} 758 759 760/* 761 * dtInsert() 762 * 763 * function: insert an entry to directory tree 764 * 765 * parameter: 766 * 767 * return: 0 - success; 768 * errno - failure; 769 */ 770int dtInsert(tid_t tid, struct inode *ip, 771 struct component_name * name, ino_t * fsn, struct btstack * btstack) 772{ 773 int rc = 0; 774 struct metapage *mp; /* meta-page buffer */ 775 dtpage_t *p; /* base B+-tree index page */ 776 s64 bn; 777 int index; 778 struct dtsplit split; /* split information */ 779 ddata_t data; 780 struct dt_lock *dtlck; 781 int n; 782 struct tlock *tlck; 783 struct lv *lv; 784 785 /* 786 * retrieve search result 787 * 788 * dtSearch() returns (leaf page pinned, index at which to insert). 789 * n.b. dtSearch() may return index of (maxindex + 1) of 790 * the full page. 791 */ 792 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 793 794 /* 795 * insert entry for new key 796 */ 797 if (DO_INDEX(ip)) { 798 if (JFS_IP(ip)->next_index == DIREND) { 799 DT_PUTPAGE(mp); 800 return EMLINK; 801 } 802 n = NDTLEAF(name->namlen); 803 data.leaf.tid = tid; 804 data.leaf.ip = ip; 805 } else { 806 n = NDTLEAF_LEGACY(name->namlen); 807 data.leaf.ip = 0; /* signifies legacy directory format */ 808 } 809 data.leaf.ino = cpu_to_le32(*fsn); 810 811 /* 812 * leaf page does not have enough room for new entry: 813 * 814 * extend/split the leaf page; 815 * 816 * dtSplitUp() will insert the entry and unpin the leaf page. 817 */ 818 if (n > p->header.freecnt) { 819 split.mp = mp; 820 split.index = index; 821 split.nslot = n; 822 split.key = name; 823 split.data = &data; 824 rc = dtSplitUp(tid, ip, &split, btstack); 825 return rc; 826 } 827 828 /* 829 * leaf page does have enough room for new entry: 830 * 831 * insert the new data entry into the leaf page; 832 */ 833 BT_MARK_DIRTY(mp, ip); 834 /* 835 * acquire a transaction lock on the leaf page 836 */ 837 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 838 dtlck = (struct dt_lock *) & tlck->lock; 839 ASSERT(dtlck->index == 0); 840 lv = & dtlck->lv[0]; 841 842 /* linelock header */ 843 lv->offset = 0; 844 lv->length = 1; 845 dtlck->index++; 846 847 dtInsertEntry(p, index, name, &data, &dtlck); 848 849 /* linelock stbl of non-root leaf page */ 850 if (!(p->header.flag & BT_ROOT)) { 851 if (dtlck->index >= dtlck->maxcnt) 852 dtlck = (struct dt_lock *) txLinelock(dtlck); 853 lv = & dtlck->lv[dtlck->index]; 854 n = index >> L2DTSLOTSIZE; 855 lv->offset = p->header.stblindex + n; 856 lv->length = 857 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 858 dtlck->index++; 859 } 860 861 /* unpin the leaf page */ 862 DT_PUTPAGE(mp); 863 864 return 0; 865} 866 867 868/* 869 * dtSplitUp() 870 * 871 * function: propagate insertion bottom up; 872 * 873 * parameter: 874 * 875 * return: 0 - success; 876 * errno - failure; 877 * leaf page unpinned; 878 */ 879static int dtSplitUp(tid_t tid, 880 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 881{ 882 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 883 int rc = 0; 884 struct metapage *smp; 885 dtpage_t *sp; /* split page */ 886 struct metapage *rmp; 887 dtpage_t *rp; /* new right page split from sp */ 888 pxd_t rpxd; /* new right page extent descriptor */ 889 struct metapage *lmp; 890 dtpage_t *lp; /* left child page */ 891 int skip; /* index of entry of insertion */ 892 struct btframe *parent; /* parent page entry on traverse stack */ 893 s64 xaddr, nxaddr; 894 int xlen, xsize; 895 struct pxdlist pxdlist; 896 pxd_t *pxd; 897 struct component_name key = { 0, 0 }; 898 ddata_t *data = split->data; 899 int n; 900 struct dt_lock *dtlck; 901 struct tlock *tlck; 902 struct lv *lv; 903 904 /* get split page */ 905 smp = split->mp; 906 sp = DT_PAGE(ip, smp); 907 908 key.name = 909 (wchar_t *) kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), 910 GFP_NOFS); 911 if (key.name == 0) { 912 DT_PUTPAGE(smp); 913 rc = ENOMEM; 914 goto dtSplitUp_Exit; 915 } 916 917 /* 918 * split leaf page 919 * 920 * The split routines insert the new entry, and 921 * acquire txLock as appropriate. 922 */ 923 /* 924 * split root leaf page: 925 */ 926 if (sp->header.flag & BT_ROOT) { 927 /* 928 * allocate a single extent child page 929 */ 930 xlen = 1; 931 n = sbi->bsize >> L2DTSLOTSIZE; 932 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 933 n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */ 934 if (n <= split->nslot) 935 xlen++; 936 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) 937 goto freeKeyName; 938 939 pxdlist.maxnpxd = 1; 940 pxdlist.npxd = 0; 941 pxd = &pxdlist.pxd[0]; 942 PXDaddress(pxd, xaddr); 943 PXDlength(pxd, xlen); 944 split->pxdlist = &pxdlist; 945 rc = dtSplitRoot(tid, ip, split, &rmp); 946 947 DT_PUTPAGE(rmp); 948 DT_PUTPAGE(smp); 949 950 goto freeKeyName; 951 } 952 953 /* 954 * extend first leaf page 955 * 956 * extend the 1st extent if less than buffer page size 957 * (dtExtendPage() reurns leaf page unpinned) 958 */ 959 pxd = &sp->header.self; 960 xlen = lengthPXD(pxd); 961 xsize = xlen << sbi->l2bsize; 962 if (xsize < PSIZE) { 963 xaddr = addressPXD(pxd); 964 n = xsize >> L2DTSLOTSIZE; 965 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 966 if ((n + sp->header.freecnt) <= split->nslot) 967 n = xlen + (xlen << 1); 968 else 969 n = xlen; 970 if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, 971 (s64) n, &nxaddr))) 972 goto extendOut; 973 974 pxdlist.maxnpxd = 1; 975 pxdlist.npxd = 0; 976 pxd = &pxdlist.pxd[0]; 977 PXDaddress(pxd, nxaddr) 978 PXDlength(pxd, xlen + n); 979 split->pxdlist = &pxdlist; 980 if ((rc = dtExtendPage(tid, ip, split, btstack))) { 981 nxaddr = addressPXD(pxd); 982 if (xaddr != nxaddr) { 983 /* free relocated extent */ 984 xlen = lengthPXD(pxd); 985 dbFree(ip, nxaddr, (s64) xlen); 986 } else { 987 /* free extended delta */ 988 xlen = lengthPXD(pxd) - n; 989 xaddr = addressPXD(pxd) + xlen; 990 dbFree(ip, xaddr, (s64) n); 991 } 992 } 993 994 extendOut: 995 DT_PUTPAGE(smp); 996 goto freeKeyName; 997 } 998 999 /* 1000 * split leaf page <sp> into <sp> and a new right page <rp>. 1001 * 1002 * return <rp> pinned and its extent descriptor <rpxd> 1003 */ 1004 /* 1005 * allocate new directory page extent and 1006 * new index page(s) to cover page split(s) 1007 * 1008 * allocation hint: ? 1009 */ 1010 n = btstack->nsplit; 1011 pxdlist.maxnpxd = pxdlist.npxd = 0; 1012 xlen = sbi->nbperpage; 1013 for (pxd = pxdlist.pxd; n > 0; n--, pxd++) { 1014 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) { 1015 PXDaddress(pxd, xaddr); 1016 PXDlength(pxd, xlen); 1017 pxdlist.maxnpxd++; 1018 continue; 1019 } 1020 1021 DT_PUTPAGE(smp); 1022 1023 /* undo allocation */ 1024 goto splitOut; 1025 } 1026 1027 split->pxdlist = &pxdlist; 1028 if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) { 1029 DT_PUTPAGE(smp); 1030 1031 /* undo allocation */ 1032 goto splitOut; 1033 } 1034 1035 /* 1036 * propagate up the router entry for the leaf page just split 1037 * 1038 * insert a router entry for the new page into the parent page, 1039 * propagate the insert/split up the tree by walking back the stack 1040 * of (bn of parent page, index of child page entry in parent page) 1041 * that were traversed during the search for the page that split. 1042 * 1043 * the propagation of insert/split up the tree stops if the root 1044 * splits or the page inserted into doesn't have to split to hold 1045 * the new entry. 1046 * 1047 * the parent entry for the split page remains the same, and 1048 * a new entry is inserted at its right with the first key and 1049 * block number of the new right page. 1050 * 1051 * There are a maximum of 4 pages pinned at any time: 1052 * two children, left parent and right parent (when the parent splits). 1053 * keep the child pages pinned while working on the parent. 1054 * make sure that all pins are released at exit. 1055 */ 1056 while ((parent = BT_POP(btstack)) != NULL) { 1057 /* parent page specified by stack frame <parent> */ 1058 1059 /* keep current child pages (<lp>, <rp>) pinned */ 1060 lmp = smp; 1061 lp = sp; 1062 1063 /* 1064 * insert router entry in parent for new right child page <rp> 1065 */ 1066 /* get the parent page <sp> */ 1067 DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1068 if (rc) { 1069 DT_PUTPAGE(lmp); 1070 DT_PUTPAGE(rmp); 1071 goto splitOut; 1072 } 1073 1074 /* 1075 * The new key entry goes ONE AFTER the index of parent entry, 1076 * because the split was to the right. 1077 */ 1078 skip = parent->index + 1; 1079 1080 /* 1081 * compute the key for the router entry 1082 * 1083 * key suffix compression: 1084 * for internal pages that have leaf pages as children, 1085 * retain only what's needed to distinguish between 1086 * the new entry and the entry on the page to its left. 1087 * If the keys compare equal, retain the entire key. 1088 * 1089 * note that compression is performed only at computing 1090 * router key at the lowest internal level. 1091 * further compression of the key between pairs of higher 1092 * level internal pages loses too much information and 1093 * the search may fail. 1094 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,} 1095 * results in two adjacent parent entries (a)(xx). 1096 * if split occurs between these two entries, and 1097 * if compression is applied, the router key of parent entry 1098 * of right page (x) will divert search for x into right 1099 * subtree and miss x in the left subtree.) 1100 * 1101 * the entire key must be retained for the next-to-leftmost 1102 * internal key at any level of the tree, or search may fail 1103 * (e.g., ?) 1104 */ 1105 switch (rp->header.flag & BT_TYPE) { 1106 case BT_LEAF: 1107 /* 1108 * compute the length of prefix for suffix compression 1109 * between last entry of left page and first entry 1110 * of right page 1111 */ 1112 if ((sp->header.flag & BT_ROOT && skip > 1) || 1113 sp->header.prev != 0 || skip > 1) { 1114 /* compute uppercase router prefix key */ 1115 ciGetLeafPrefixKey(lp, 1116 lp->header.nextindex - 1, 1117 rp, 0, &key, sbi->mntflag); 1118 } else { 1119 /* next to leftmost entry of 1120 lowest internal level */ 1121 1122 /* compute uppercase router key */ 1123 dtGetKey(rp, 0, &key, sbi->mntflag); 1124 key.name[key.namlen] = 0; 1125 1126 if ((sbi->mntflag & JFS_OS2) == JFS_OS2) 1127 ciToUpper(&key); 1128 } 1129 1130 n = NDTINTERNAL(key.namlen); 1131 break; 1132 1133 case BT_INTERNAL: 1134 dtGetKey(rp, 0, &key, sbi->mntflag); 1135 n = NDTINTERNAL(key.namlen); 1136 break; 1137 1138 default: 1139 jERROR(2, ("dtSplitUp(): UFO!\n")); 1140 break; 1141 } 1142 1143 /* unpin left child page */ 1144 DT_PUTPAGE(lmp); 1145 1146 /* 1147 * compute the data for the router entry 1148 */ 1149 data->xd = rpxd; /* child page xd */ 1150 1151 /* 1152 * parent page is full - split the parent page 1153 */ 1154 if (n > sp->header.freecnt) { 1155 /* init for parent page split */ 1156 split->mp = smp; 1157 split->index = skip; /* index at insert */ 1158 split->nslot = n; 1159 split->key = &key; 1160 /* split->data = data; */ 1161 1162 /* unpin right child page */ 1163 DT_PUTPAGE(rmp); 1164 1165 /* The split routines insert the new entry, 1166 * acquire txLock as appropriate. 1167 * return <rp> pinned and its block number <rbn>. 1168 */ 1169 rc = (sp->header.flag & BT_ROOT) ? 1170 dtSplitRoot(tid, ip, split, &rmp) : 1171 dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd); 1172 if (rc) { 1173 DT_PUTPAGE(smp); 1174 goto splitOut; 1175 } 1176 1177 /* smp and rmp are pinned */ 1178 } 1179 /* 1180 * parent page is not full - insert router entry in parent page 1181 */ 1182 else { 1183 BT_MARK_DIRTY(smp, ip); 1184 /* 1185 * acquire a transaction lock on the parent page 1186 */ 1187 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1188 dtlck = (struct dt_lock *) & tlck->lock; 1189 ASSERT(dtlck->index == 0); 1190 lv = & dtlck->lv[0]; 1191 1192 /* linelock header */ 1193 lv->offset = 0; 1194 lv->length = 1; 1195 dtlck->index++; 1196 1197 /* linelock stbl of non-root parent page */ 1198 if (!(sp->header.flag & BT_ROOT)) { 1199 lv++; 1200 n = skip >> L2DTSLOTSIZE; 1201 lv->offset = sp->header.stblindex + n; 1202 lv->length = 1203 ((sp->header.nextindex - 1204 1) >> L2DTSLOTSIZE) - n + 1; 1205 dtlck->index++; 1206 } 1207 1208 dtInsertEntry(sp, skip, &key, data, &dtlck); 1209 1210 /* exit propagate up */ 1211 break; 1212 } 1213 } 1214 1215 /* unpin current split and its right page */ 1216 DT_PUTPAGE(smp); 1217 DT_PUTPAGE(rmp); 1218 1219 /* 1220 * free remaining extents allocated for split 1221 */ 1222 splitOut: 1223 n = pxdlist.npxd; 1224 pxd = &pxdlist.pxd[n]; 1225 for (; n < pxdlist.maxnpxd; n++, pxd++) 1226 dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd)); 1227 1228 freeKeyName: 1229 kfree(key.name); 1230 1231 dtSplitUp_Exit: 1232 1233 return rc; 1234} 1235 1236 1237/* 1238 * dtSplitPage() 1239 * 1240 * function: Split a non-root page of a btree. 1241 * 1242 * parameter: 1243 * 1244 * return: 0 - success; 1245 * errno - failure; 1246 * return split and new page pinned; 1247 */ 1248static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 1249 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp) 1250{ 1251 struct super_block *sb = ip->i_sb; 1252 int rc = 0; 1253 struct metapage *smp; 1254 dtpage_t *sp; 1255 struct metapage *rmp; 1256 dtpage_t *rp; /* new right page allocated */ 1257 s64 rbn; /* new right page block number */ 1258 struct metapage *mp; 1259 dtpage_t *p; 1260 s64 nextbn; 1261 struct pxdlist *pxdlist; 1262 pxd_t *pxd; 1263 int skip, nextindex, half, left, nxt, off, si; 1264 struct ldtentry *ldtentry; 1265 struct idtentry *idtentry; 1266 u8 *stbl; 1267 struct dtslot *f; 1268 int fsi, stblsize; 1269 int n; 1270 struct dt_lock *sdtlck, *rdtlck; 1271 struct tlock *tlck; 1272 struct dt_lock *dtlck; 1273 struct lv *slv, *rlv, *lv; 1274 1275 /* get split page */ 1276 smp = split->mp; 1277 sp = DT_PAGE(ip, smp); 1278 1279 /* 1280 * allocate the new right page for the split 1281 */ 1282 pxdlist = split->pxdlist; 1283 pxd = &pxdlist->pxd[pxdlist->npxd]; 1284 pxdlist->npxd++; 1285 rbn = addressPXD(pxd); 1286 rmp = get_metapage(ip, rbn, PSIZE, 1); 1287 if (rmp == NULL) 1288 return EIO; 1289 1290 jEVENT(0, 1291 ("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p\n", ip, smp, rmp)); 1292 1293 BT_MARK_DIRTY(rmp, ip); 1294 /* 1295 * acquire a transaction lock on the new right page 1296 */ 1297 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1298 rdtlck = (struct dt_lock *) & tlck->lock; 1299 1300 rp = (dtpage_t *) rmp->data; 1301 *rpp = rp; 1302 rp->header.self = *pxd; 1303 1304 BT_MARK_DIRTY(smp, ip); 1305 /* 1306 * acquire a transaction lock on the split page 1307 * 1308 * action: 1309 */ 1310 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1311 sdtlck = (struct dt_lock *) & tlck->lock; 1312 1313 /* linelock header of split page */ 1314 ASSERT(sdtlck->index == 0); 1315 slv = & sdtlck->lv[0]; 1316 slv->offset = 0; 1317 slv->length = 1; 1318 sdtlck->index++; 1319 1320 /* 1321 * initialize/update sibling pointers between sp and rp 1322 */ 1323 nextbn = le64_to_cpu(sp->header.next); 1324 rp->header.next = cpu_to_le64(nextbn); 1325 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1326 sp->header.next = cpu_to_le64(rbn); 1327 1328 /* 1329 * initialize new right page 1330 */ 1331 rp->header.flag = sp->header.flag; 1332 1333 /* compute sorted entry table at start of extent data area */ 1334 rp->header.nextindex = 0; 1335 rp->header.stblindex = 1; 1336 1337 n = PSIZE >> L2DTSLOTSIZE; 1338 rp->header.maxslot = n; 1339 stblsize = (n + 31) >> L2DTSLOTSIZE; /* in unit of slot */ 1340 1341 /* init freelist */ 1342 fsi = rp->header.stblindex + stblsize; 1343 rp->header.freelist = fsi; 1344 rp->header.freecnt = rp->header.maxslot - fsi; 1345 1346 /* 1347 * sequential append at tail: append without split 1348 * 1349 * If splitting the last page on a level because of appending 1350 * a entry to it (skip is maxentry), it's likely that the access is 1351 * sequential. Adding an empty page on the side of the level is less 1352 * work and can push the fill factor much higher than normal. 1353 * If we're wrong it's no big deal, we'll just do the split the right 1354 * way next time. 1355 * (It may look like it's equally easy to do a similar hack for 1356 * reverse sorted data, that is, split the tree left, 1357 * but it's not. Be my guest.) 1358 */ 1359 if (nextbn == 0 && split->index == sp->header.nextindex) { 1360 /* linelock header + stbl (first slot) of new page */ 1361 rlv = & rdtlck->lv[rdtlck->index]; 1362 rlv->offset = 0; 1363 rlv->length = 2; 1364 rdtlck->index++; 1365 1366 /* 1367 * initialize freelist of new right page 1368 */ 1369 f = &rp->slot[fsi]; 1370 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1371 f->next = fsi; 1372 f->next = -1; 1373 1374 /* insert entry at the first entry of the new right page */ 1375 dtInsertEntry(rp, 0, split->key, split->data, &rdtlck); 1376 1377 goto out; 1378 } 1379 1380 /* 1381 * non-sequential insert (at possibly middle page) 1382 */ 1383 1384 /* 1385 * update prev pointer of previous right sibling page; 1386 */ 1387 if (nextbn != 0) { 1388 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1389 if (rc) 1390 return rc; 1391 1392 BT_MARK_DIRTY(mp, ip); 1393 /* 1394 * acquire a transaction lock on the next page 1395 */ 1396 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 1397 jEVENT(0, 1398 ("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p\n", 1399 tlck, ip, mp)); 1400 dtlck = (struct dt_lock *) & tlck->lock; 1401 1402 /* linelock header of previous right sibling page */ 1403 lv = & dtlck->lv[dtlck->index]; 1404 lv->offset = 0; 1405 lv->length = 1; 1406 dtlck->index++; 1407 1408 p->header.prev = cpu_to_le64(rbn); 1409 1410 DT_PUTPAGE(mp); 1411 } 1412 1413 /* 1414 * split the data between the split and right pages. 1415 */ 1416 skip = split->index; 1417 half = (PSIZE >> L2DTSLOTSIZE) >> 1; /* swag */ 1418 left = 0; 1419 1420 /* 1421 * compute fill factor for split pages 1422 * 1423 * <nxt> traces the next entry to move to rp 1424 * <off> traces the next entry to stay in sp 1425 */ 1426 stbl = (u8 *) & sp->slot[sp->header.stblindex]; 1427 nextindex = sp->header.nextindex; 1428 for (nxt = off = 0; nxt < nextindex; ++off) { 1429 if (off == skip) 1430 /* check for fill factor with new entry size */ 1431 n = split->nslot; 1432 else { 1433 si = stbl[nxt]; 1434 switch (sp->header.flag & BT_TYPE) { 1435 case BT_LEAF: 1436 ldtentry = (struct ldtentry *) & sp->slot[si]; 1437 if (DO_INDEX(ip)) 1438 n = NDTLEAF(ldtentry->namlen); 1439 else 1440 n = NDTLEAF_LEGACY(ldtentry-> 1441 namlen); 1442 break; 1443 1444 case BT_INTERNAL: 1445 idtentry = (struct idtentry *) & sp->slot[si]; 1446 n = NDTINTERNAL(idtentry->namlen); 1447 break; 1448 1449 default: 1450 break; 1451 } 1452 1453 ++nxt; /* advance to next entry to move in sp */ 1454 } 1455 1456 left += n; 1457 if (left >= half) 1458 break; 1459 } 1460 1461 /* <nxt> poins to the 1st entry to move */ 1462 1463 /* 1464 * move entries to right page 1465 * 1466 * dtMoveEntry() initializes rp and reserves entry for insertion 1467 * 1468 * split page moved out entries are linelocked; 1469 * new/right page moved in entries are linelocked; 1470 */ 1471 /* linelock header + stbl of new right page */ 1472 rlv = & rdtlck->lv[rdtlck->index]; 1473 rlv->offset = 0; 1474 rlv->length = 5; 1475 rdtlck->index++; 1476 1477 dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip)); 1478 1479 sp->header.nextindex = nxt; 1480 1481 /* 1482 * finalize freelist of new right page 1483 */ 1484 fsi = rp->header.freelist; 1485 f = &rp->slot[fsi]; 1486 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1487 f->next = fsi; 1488 f->next = -1; 1489 1490 /* 1491 * Update directory index table for entries now in right page 1492 */ 1493 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1494 mp = 0; 1495 stbl = DT_GETSTBL(rp); 1496 for (n = 0; n < rp->header.nextindex; n++) { 1497 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1498 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 1499 rbn, n, &mp); 1500 } 1501 if (mp) 1502 release_metapage(mp); 1503 } 1504 1505 /* 1506 * the skipped index was on the left page, 1507 */ 1508 if (skip <= off) { 1509 /* insert the new entry in the split page */ 1510 dtInsertEntry(sp, skip, split->key, split->data, &sdtlck); 1511 1512 /* linelock stbl of split page */ 1513 if (sdtlck->index >= sdtlck->maxcnt) 1514 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 1515 slv = & sdtlck->lv[sdtlck->index]; 1516 n = skip >> L2DTSLOTSIZE; 1517 slv->offset = sp->header.stblindex + n; 1518 slv->length = 1519 ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 1520 sdtlck->index++; 1521 } 1522 /* 1523 * the skipped index was on the right page, 1524 */ 1525 else { 1526 /* adjust the skip index to reflect the new position */ 1527 skip -= nxt; 1528 1529 /* insert the new entry in the right page */ 1530 dtInsertEntry(rp, skip, split->key, split->data, &rdtlck); 1531 } 1532 1533 out: 1534 *rmpp = rmp; 1535 *rpxdp = *pxd; 1536 1537 ip->i_blocks += LBLK2PBLK(sb, lengthPXD(pxd)); 1538 1539 jEVENT(0, ("dtSplitPage: ip:0x%p sp:0x%p rp:0x%p\n", ip, sp, rp)); 1540 return 0; 1541} 1542 1543 1544/* 1545 * dtExtendPage() 1546 * 1547 * function: extend 1st/only directory leaf page 1548 * 1549 * parameter: 1550 * 1551 * return: 0 - success; 1552 * errno - failure; 1553 * return extended page pinned; 1554 */ 1555static int dtExtendPage(tid_t tid, 1556 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 1557{ 1558 struct super_block *sb = ip->i_sb; 1559 int rc; 1560 struct metapage *smp, *pmp, *mp; 1561 dtpage_t *sp, *pp; 1562 struct pxdlist *pxdlist; 1563 pxd_t *pxd, *tpxd; 1564 int xlen, xsize; 1565 int newstblindex, newstblsize; 1566 int oldstblindex, oldstblsize; 1567 int fsi, last; 1568 struct dtslot *f; 1569 struct btframe *parent; 1570 int n; 1571 struct dt_lock *dtlck; 1572 s64 xaddr, txaddr; 1573 struct tlock *tlck; 1574 struct pxd_lock *pxdlock; 1575 struct lv *lv; 1576 uint type; 1577 struct ldtentry *ldtentry; 1578 u8 *stbl; 1579 1580 /* get page to extend */ 1581 smp = split->mp; 1582 sp = DT_PAGE(ip, smp); 1583 1584 /* get parent/root page */ 1585 parent = BT_POP(btstack); 1586 DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc); 1587 if (rc) 1588 return (rc); 1589 1590 /* 1591 * extend the extent 1592 */ 1593 pxdlist = split->pxdlist; 1594 pxd = &pxdlist->pxd[pxdlist->npxd]; 1595 pxdlist->npxd++; 1596 1597 xaddr = addressPXD(pxd); 1598 tpxd = &sp->header.self; 1599 txaddr = addressPXD(tpxd); 1600 /* in-place extension */ 1601 if (xaddr == txaddr) { 1602 type = tlckEXTEND; 1603 } 1604 /* relocation */ 1605 else { 1606 type = tlckNEW; 1607 1608 /* save moved extent descriptor for later free */ 1609 tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE); 1610 pxdlock = (struct pxd_lock *) & tlck->lock; 1611 pxdlock->flag = mlckFREEPXD; 1612 pxdlock->pxd = sp->header.self; 1613 pxdlock->index = 1; 1614 1615 /* 1616 * Update directory index table to reflect new page address 1617 */ 1618 if (DO_INDEX(ip)) { 1619 mp = 0; 1620 stbl = DT_GETSTBL(sp); 1621 for (n = 0; n < sp->header.nextindex; n++) { 1622 ldtentry = 1623 (struct ldtentry *) & sp->slot[stbl[n]]; 1624 modify_index(tid, ip, 1625 le32_to_cpu(ldtentry->index), 1626 xaddr, n, &mp); 1627 } 1628 if (mp) 1629 release_metapage(mp); 1630 } 1631 } 1632 1633 /* 1634 * extend the page 1635 */ 1636 sp->header.self = *pxd; 1637 1638 jEVENT(0, 1639 ("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p\n", ip, smp, sp)); 1640 1641 BT_MARK_DIRTY(smp, ip); 1642 /* 1643 * acquire a transaction lock on the extended/leaf page 1644 */ 1645 tlck = txLock(tid, ip, smp, tlckDTREE | type); 1646 dtlck = (struct dt_lock *) & tlck->lock; 1647 lv = & dtlck->lv[0]; 1648 1649 /* update buffer extent descriptor of extended page */ 1650 xlen = lengthPXD(pxd); 1651 xsize = xlen << JFS_SBI(sb)->l2bsize; 1652#ifdef _STILL_TO_PORT 1653 bmSetXD(smp, xaddr, xsize); 1654#endif /* _STILL_TO_PORT */ 1655 1656 /* 1657 * copy old stbl to new stbl at start of extended area 1658 */ 1659 oldstblindex = sp->header.stblindex; 1660 oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE; 1661 newstblindex = sp->header.maxslot; 1662 n = xsize >> L2DTSLOTSIZE; 1663 newstblsize = (n + 31) >> L2DTSLOTSIZE; 1664 memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex], 1665 sp->header.nextindex); 1666 1667 /* 1668 * in-line extension: linelock old area of extended page 1669 */ 1670 if (type == tlckEXTEND) { 1671 /* linelock header */ 1672 lv->offset = 0; 1673 lv->length = 1; 1674 dtlck->index++; 1675 lv++; 1676 1677 /* linelock new stbl of extended page */ 1678 lv->offset = newstblindex; 1679 lv->length = newstblsize; 1680 } 1681 /* 1682 * relocation: linelock whole relocated area 1683 */ 1684 else { 1685 lv->offset = 0; 1686 lv->length = sp->header.maxslot + newstblsize; 1687 } 1688 1689 dtlck->index++; 1690 1691 sp->header.maxslot = n; 1692 sp->header.stblindex = newstblindex; 1693 /* sp->header.nextindex remains the same */ 1694 1695 /* 1696 * add old stbl region at head of freelist 1697 */ 1698 fsi = oldstblindex; 1699 f = &sp->slot[fsi]; 1700 last = sp->header.freelist; 1701 for (n = 0; n < oldstblsize; n++, fsi++, f++) { 1702 f->next = last; 1703 last = fsi; 1704 } 1705 sp->header.freelist = last; 1706 sp->header.freecnt += oldstblsize; 1707 1708 /* 1709 * append free region of newly extended area at tail of freelist 1710 */ 1711 /* init free region of newly extended area */ 1712 fsi = n = newstblindex + newstblsize; 1713 f = &sp->slot[fsi]; 1714 for (fsi++; fsi < sp->header.maxslot; f++, fsi++) 1715 f->next = fsi; 1716 f->next = -1; 1717 1718 /* append new free region at tail of old freelist */ 1719 fsi = sp->header.freelist; 1720 if (fsi == -1) 1721 sp->header.freelist = n; 1722 else { 1723 do { 1724 f = &sp->slot[fsi]; 1725 fsi = f->next; 1726 } while (fsi != -1); 1727 1728 f->next = n; 1729 } 1730 1731 sp->header.freecnt += sp->header.maxslot - n; 1732 1733 /* 1734 * insert the new entry 1735 */ 1736 dtInsertEntry(sp, split->index, split->key, split->data, &dtlck); 1737 1738 BT_MARK_DIRTY(pmp, ip); 1739 /* 1740 * linelock any freeslots residing in old extent 1741 */ 1742 if (type == tlckEXTEND) { 1743 n = sp->header.maxslot >> 2; 1744 if (sp->header.freelist < n) 1745 dtLinelockFreelist(sp, n, &dtlck); 1746 } 1747 1748 /* 1749 * update parent entry on the parent/root page 1750 */ 1751 /* 1752 * acquire a transaction lock on the parent/root page 1753 */ 1754 tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); 1755 dtlck = (struct dt_lock *) & tlck->lock; 1756 lv = & dtlck->lv[dtlck->index]; 1757 1758 /* linelock parent entry - 1st slot */ 1759 lv->offset = 1; 1760 lv->length = 1; 1761 dtlck->index++; 1762 1763 /* update the parent pxd for page extension */ 1764 tpxd = (pxd_t *) & pp->slot[1]; 1765 *tpxd = *pxd; 1766 1767 /* Since the directory might have an EA and/or ACL associated with it 1768 * we need to make sure we take that into account when setting the 1769 * i_nblocks 1770 */ 1771 ip->i_blocks = LBLK2PBLK(ip->i_sb, xlen + 1772 ((JFS_IP(ip)->ea.flag & DXD_EXTENT) ? 1773 lengthDXD(&JFS_IP(ip)->ea) : 0) + 1774 ((JFS_IP(ip)->acl.flag & DXD_EXTENT) ? 1775 lengthDXD(&JFS_IP(ip)->acl) : 0)); 1776 1777 jEVENT(0, 1778 ("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p\n", ip, smp, sp)); 1779 1780 1781 DT_PUTPAGE(pmp); 1782 return 0; 1783} 1784 1785 1786/* 1787 * dtSplitRoot() 1788 * 1789 * function: 1790 * split the full root page into 1791 * original/root/split page and new right page 1792 * i.e., root remains fixed in tree anchor (inode) and 1793 * the root is copied to a single new right child page 1794 * since root page << non-root page, and 1795 * the split root page contains a single entry for the 1796 * new right child page. 1797 * 1798 * parameter: 1799 * 1800 * return: 0 - success; 1801 * errno - failure; 1802 * return new page pinned; 1803 */ 1804static int dtSplitRoot(tid_t tid, 1805 struct inode *ip, struct dtsplit * split, struct metapage ** rmpp) 1806{ 1807 struct super_block *sb = ip->i_sb; 1808 struct metapage *smp; 1809 dtroot_t *sp; 1810 struct metapage *rmp; 1811 dtpage_t *rp; 1812 s64 rbn; 1813 int xlen; 1814 int xsize; 1815 struct dtslot *f; 1816 s8 *stbl; 1817 int fsi, stblsize, n; 1818 struct idtentry *s; 1819 pxd_t *ppxd; 1820 struct pxdlist *pxdlist; 1821 pxd_t *pxd; 1822 struct dt_lock *dtlck; 1823 struct tlock *tlck; 1824 struct lv *lv; 1825 1826 /* get split root page */ 1827 smp = split->mp; 1828 sp = &JFS_IP(ip)->i_dtroot; 1829 1830 /* 1831 * allocate/initialize a single (right) child page 1832 * 1833 * N.B. at first split, a one (or two) block to fit new entry 1834 * is allocated; at subsequent split, a full page is allocated; 1835 */ 1836 pxdlist = split->pxdlist; 1837 pxd = &pxdlist->pxd[pxdlist->npxd]; 1838 pxdlist->npxd++; 1839 rbn = addressPXD(pxd); 1840 xlen = lengthPXD(pxd); 1841 xsize = xlen << JFS_SBI(sb)->l2bsize; 1842 rmp = get_metapage(ip, rbn, xsize, 1); 1843 rp = rmp->data; 1844 1845 BT_MARK_DIRTY(rmp, ip); 1846 /* 1847 * acquire a transaction lock on the new right page 1848 */ 1849 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1850 dtlck = (struct dt_lock *) & tlck->lock; 1851 1852 rp->header.flag = 1853 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1854 rp->header.self = *pxd; 1855 1856 /* initialize sibling pointers */ 1857 rp->header.next = 0; 1858 rp->header.prev = 0; 1859 1860 /* 1861 * move in-line root page into new right page extent 1862 */ 1863 /* linelock header + copied entries + new stbl (1st slot) in new page */ 1864 ASSERT(dtlck->index == 0); 1865 lv = & dtlck->lv[0]; 1866 lv->offset = 0; 1867 lv->length = 10; /* 1 + 8 + 1 */ 1868 dtlck->index++; 1869 1870 n = xsize >> L2DTSLOTSIZE; 1871 rp->header.maxslot = n; 1872 stblsize = (n + 31) >> L2DTSLOTSIZE; 1873 1874 /* copy old stbl to new stbl at start of extended area */ 1875 rp->header.stblindex = DTROOTMAXSLOT; 1876 stbl = (s8 *) & rp->slot[DTROOTMAXSLOT]; 1877 memcpy(stbl, sp->header.stbl, sp->header.nextindex); 1878 rp->header.nextindex = sp->header.nextindex; 1879 1880 /* copy old data area to start of new data area */ 1881 memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE); 1882 1883 /* 1884 * append free region of newly extended area at tail of freelist 1885 */ 1886 /* init free region of newly extended area */ 1887 fsi = n = DTROOTMAXSLOT + stblsize; 1888 f = &rp->slot[fsi]; 1889 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1890 f->next = fsi; 1891 f->next = -1; 1892 1893 /* append new free region at tail of old freelist */ 1894 fsi = sp->header.freelist; 1895 if (fsi == -1) 1896 rp->header.freelist = n; 1897 else { 1898 rp->header.freelist = fsi; 1899 1900 do { 1901 f = &rp->slot[fsi]; 1902 fsi = f->next; 1903 } while (fsi != -1); 1904 1905 f->next = n; 1906 } 1907 1908 rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n; 1909 1910 /* 1911 * Update directory index table for entries now in right page 1912 */ 1913 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1914 struct metapage *mp = 0; 1915 struct ldtentry *ldtentry; 1916 1917 stbl = DT_GETSTBL(rp); 1918 for (n = 0; n < rp->header.nextindex; n++) { 1919 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1920 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 1921 rbn, n, &mp); 1922 } 1923 if (mp) 1924 release_metapage(mp); 1925 } 1926 /* 1927 * insert the new entry into the new right/child page 1928 * (skip index in the new right page will not change) 1929 */ 1930 dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); 1931 1932 /* 1933 * reset parent/root page 1934 * 1935 * set the 1st entry offset to 0, which force the left-most key 1936 * at any level of the tree to be less than any search key. 1937 * 1938 * The btree comparison code guarantees that the left-most key on any 1939 * level of the tree is never used, so it doesn't need to be filled in. 1940 */ 1941 BT_MARK_DIRTY(smp, ip); 1942 /* 1943 * acquire a transaction lock on the root page (in-memory inode) 1944 */ 1945 tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT); 1946 dtlck = (struct dt_lock *) & tlck->lock; 1947 1948 /* linelock root */ 1949 ASSERT(dtlck->index == 0); 1950 lv = & dtlck->lv[0]; 1951 lv->offset = 0; 1952 lv->length = DTROOTMAXSLOT; 1953 dtlck->index++; 1954 1955 /* update page header of root */ 1956 if (sp->header.flag & BT_LEAF) { 1957 sp->header.flag &= ~BT_LEAF; 1958 sp->header.flag |= BT_INTERNAL; 1959 } 1960 1961 /* init the first entry */ 1962 s = (struct idtentry *) & sp->slot[DTENTRYSTART]; 1963 ppxd = (pxd_t *) s; 1964 *ppxd = *pxd; 1965 s->next = -1; 1966 s->namlen = 0; 1967 1968 stbl = sp->header.stbl; 1969 stbl[0] = DTENTRYSTART; 1970 sp->header.nextindex = 1; 1971 1972 /* init freelist */ 1973 fsi = DTENTRYSTART + 1; 1974 f = &sp->slot[fsi]; 1975 1976 /* init free region of remaining area */ 1977 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 1978 f->next = fsi; 1979 f->next = -1; 1980 1981 sp->header.freelist = DTENTRYSTART + 1; 1982 sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1); 1983 1984 *rmpp = rmp; 1985 1986 ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd)); 1987 return 0; 1988} 1989 1990 1991/* 1992 * dtDelete() 1993 * 1994 * function: delete the entry(s) referenced by a key. 1995 * 1996 * parameter: 1997 * 1998 * return: 1999 */ 2000int dtDelete(tid_t tid, 2001 struct inode *ip, struct component_name * key, ino_t * ino, int flag) 2002{ 2003 int rc = 0; 2004 s64 bn; 2005 struct metapage *mp, *imp; 2006 dtpage_t *p; 2007 int index; 2008 struct btstack btstack; 2009 struct dt_lock *dtlck; 2010 struct tlock *tlck; 2011 struct lv *lv; 2012 int i; 2013 struct ldtentry *ldtentry; 2014 u8 *stbl; 2015 u32 table_index, next_index; 2016 struct metapage *nmp; 2017 dtpage_t *np; 2018 2019 /* 2020 * search for the entry to delete: 2021 * 2022 * dtSearch() returns (leaf page pinned, index at which to delete). 2023 */ 2024 if ((rc = dtSearch(ip, key, ino, &btstack, flag))) 2025 return rc; 2026 2027 /* retrieve search result */ 2028 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2029 2030 /* 2031 * We need to find put the index of the next entry into the 2032 * directory index table in order to resume a readdir from this 2033 * entry. 2034 */ 2035 if (DO_INDEX(ip)) { 2036 stbl = DT_GETSTBL(p); 2037 ldtentry = (struct ldtentry *) & p->slot[stbl[index]]; 2038 table_index = le32_to_cpu(ldtentry->index); 2039 if (index == (p->header.nextindex - 1)) { 2040 /* 2041 * Last entry in this leaf page 2042 */ 2043 if ((p->header.flag & BT_ROOT) 2044 || (p->header.next == 0)) 2045 next_index = -1; 2046 else { 2047 /* Read next leaf page */ 2048 DT_GETPAGE(ip, le64_to_cpu(p->header.next), 2049 nmp, PSIZE, np, rc); 2050 if (rc) 2051 next_index = -1; 2052 else { 2053 stbl = DT_GETSTBL(np); 2054 ldtentry = 2055 (struct ldtentry *) & np-> 2056 slot[stbl[0]]; 2057 next_index = 2058 le32_to_cpu(ldtentry->index); 2059 DT_PUTPAGE(nmp); 2060 } 2061 } 2062 } else { 2063 ldtentry = 2064 (struct ldtentry *) & p->slot[stbl[index + 1]]; 2065 next_index = le32_to_cpu(ldtentry->index); 2066 } 2067 free_index(tid, ip, table_index, next_index); 2068 } 2069 /* 2070 * the leaf page becomes empty, delete the page 2071 */ 2072 if (p->header.nextindex == 1) { 2073 /* delete empty page */ 2074 rc = dtDeleteUp(tid, ip, mp, p, &btstack); 2075 } 2076 /* 2077 * the leaf page has other entries remaining: 2078 * 2079 * delete the entry from the leaf page. 2080 */ 2081 else { 2082 BT_MARK_DIRTY(mp, ip); 2083 /* 2084 * acquire a transaction lock on the leaf page 2085 */ 2086 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2087 dtlck = (struct dt_lock *) & tlck->lock; 2088 2089 /* 2090 * Do not assume that dtlck->index will be zero. During a 2091 * rename within a directory, this transaction may have 2092 * modified this page already when adding the new entry. 2093 */ 2094 2095 /* linelock header */ 2096 if (dtlck->index >= dtlck->maxcnt) 2097 dtlck = (struct dt_lock *) txLinelock(dtlck); 2098 lv = & dtlck->lv[dtlck->index]; 2099 lv->offset = 0; 2100 lv->length = 1; 2101 dtlck->index++; 2102 2103 /* linelock stbl of non-root leaf page */ 2104 if (!(p->header.flag & BT_ROOT)) { 2105 if (dtlck->index >= dtlck->maxcnt) 2106 dtlck = (struct dt_lock *) txLinelock(dtlck); 2107 lv = & dtlck->lv[dtlck->index]; 2108 i = index >> L2DTSLOTSIZE; 2109 lv->offset = p->header.stblindex + i; 2110 lv->length = 2111 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2112 i + 1; 2113 dtlck->index++; 2114 } 2115 2116 /* free the leaf entry */ 2117 dtDeleteEntry(p, index, &dtlck); 2118 2119 /* 2120 * Update directory index table for entries moved in stbl 2121 */ 2122 if (DO_INDEX(ip) && index < p->header.nextindex) { 2123 imp = 0; 2124 stbl = DT_GETSTBL(p); 2125 for (i = index; i < p->header.nextindex; i++) { 2126 ldtentry = 2127 (struct ldtentry *) & p->slot[stbl[i]]; 2128 modify_index(tid, ip, 2129 le32_to_cpu(ldtentry->index), 2130 bn, i, &imp); 2131 } 2132 if (imp) 2133 release_metapage(imp); 2134 } 2135 2136 DT_PUTPAGE(mp); 2137 } 2138 2139 return rc; 2140} 2141 2142 2143/* 2144 * dtDeleteUp() 2145 * 2146 * function: 2147 * free empty pages as propagating deletion up the tree 2148 * 2149 * parameter: 2150 * 2151 * return: 2152 */ 2153static int dtDeleteUp(tid_t tid, struct inode *ip, 2154 struct metapage * fmp, dtpage_t * fp, struct btstack * btstack) 2155{ 2156 int rc = 0; 2157 struct metapage *mp; 2158 dtpage_t *p; 2159 int index, nextindex; 2160 int xlen; 2161 struct btframe *parent; 2162 struct dt_lock *dtlck; 2163 struct tlock *tlck; 2164 struct lv *lv; 2165 struct pxd_lock *pxdlock; 2166 int i; 2167 2168 /* 2169 * keep the root leaf page which has become empty 2170 */ 2171 if (BT_IS_ROOT(fmp)) { 2172 /* 2173 * reset the root 2174 * 2175 * dtInitRoot() acquires txlock on the root 2176 */ 2177 dtInitRoot(tid, ip, PARENT(ip)); 2178 2179 DT_PUTPAGE(fmp); 2180 2181 return 0; 2182 } 2183 2184 /* 2185 * free the non-root leaf page 2186 */ 2187 /* 2188 * acquire a transaction lock on the page 2189 * 2190 * write FREEXTENT|NOREDOPAGE log record 2191 * N.B. linelock is overlaid as freed extent descriptor, and 2192 * the buffer page is freed; 2193 */ 2194 tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); 2195 pxdlock = (struct pxd_lock *) & tlck->lock; 2196 pxdlock->flag = mlckFREEPXD; 2197 pxdlock->pxd = fp->header.self; 2198 pxdlock->index = 1; 2199 2200 /* update sibling pointers */ 2201 if ((rc = dtRelink(tid, ip, fp))) 2202 return rc; 2203 2204 xlen = lengthPXD(&fp->header.self); 2205 ip->i_blocks -= LBLK2PBLK(ip->i_sb, xlen); 2206 2207 /* free/invalidate its buffer page */ 2208 discard_metapage(fmp); 2209 2210 /* 2211 * propagate page deletion up the directory tree 2212 * 2213 * If the delete from the parent page makes it empty, 2214 * continue all the way up the tree. 2215 * stop if the root page is reached (which is never deleted) or 2216 * if the entry deletion does not empty the page. 2217 */ 2218 while ((parent = BT_POP(btstack)) != NULL) { 2219 /* pin the parent page <sp> */ 2220 DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2221 if (rc) 2222 return rc; 2223 2224 /* 2225 * free the extent of the child page deleted 2226 */ 2227 index = parent->index; 2228 2229 /* 2230 * delete the entry for the child page from parent 2231 */ 2232 nextindex = p->header.nextindex; 2233 2234 /* 2235 * the parent has the single entry being deleted: 2236 * 2237 * free the parent page which has become empty. 2238 */ 2239 if (nextindex == 1) { 2240 /* 2241 * keep the root internal page which has become empty 2242 */ 2243 if (p->header.flag & BT_ROOT) { 2244 /* 2245 * reset the root 2246 * 2247 * dtInitRoot() acquires txlock on the root 2248 */ 2249 dtInitRoot(tid, ip, PARENT(ip)); 2250 2251 DT_PUTPAGE(mp); 2252 2253 return 0; 2254 } 2255 /* 2256 * free the parent page 2257 */ 2258 else { 2259 /* 2260 * acquire a transaction lock on the page 2261 * 2262 * write FREEXTENT|NOREDOPAGE log record 2263 */ 2264 tlck = 2265 txMaplock(tid, ip, 2266 tlckDTREE | tlckFREE); 2267 pxdlock = (struct pxd_lock *) & tlck->lock; 2268 pxdlock->flag = mlckFREEPXD; 2269 pxdlock->pxd = p->header.self; 2270 pxdlock->index = 1; 2271 2272 /* update sibling pointers */ 2273 if ((rc = dtRelink(tid, ip, p))) 2274 return rc; 2275 2276 xlen = lengthPXD(&p->header.self); 2277 ip->i_blocks -= LBLK2PBLK(ip->i_sb, xlen); 2278 2279 /* free/invalidate its buffer page */ 2280 discard_metapage(mp); 2281 2282 /* propagate up */ 2283 continue; 2284 } 2285 } 2286 2287 /* 2288 * the parent has other entries remaining: 2289 * 2290 * delete the router entry from the parent page. 2291 */ 2292 BT_MARK_DIRTY(mp, ip); 2293 /* 2294 * acquire a transaction lock on the page 2295 * 2296 * action: router entry deletion 2297 */ 2298 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2299 dtlck = (struct dt_lock *) & tlck->lock; 2300 2301 /* linelock header */ 2302 if (dtlck->index >= dtlck->maxcnt) 2303 dtlck = (struct dt_lock *) txLinelock(dtlck); 2304 lv = & dtlck->lv[dtlck->index]; 2305 lv->offset = 0; 2306 lv->length = 1; 2307 dtlck->index++; 2308 2309 /* linelock stbl of non-root leaf page */ 2310 if (!(p->header.flag & BT_ROOT)) { 2311 if (dtlck->index < dtlck->maxcnt) 2312 lv++; 2313 else { 2314 dtlck = (struct dt_lock *) txLinelock(dtlck); 2315 lv = & dtlck->lv[0]; 2316 } 2317 i = index >> L2DTSLOTSIZE; 2318 lv->offset = p->header.stblindex + i; 2319 lv->length = 2320 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2321 i + 1; 2322 dtlck->index++; 2323 } 2324 2325 /* free the router entry */ 2326 dtDeleteEntry(p, index, &dtlck); 2327 2328 /* reset key of new leftmost entry of level (for consistency) */ 2329 if (index == 0 && 2330 ((p->header.flag & BT_ROOT) || p->header.prev == 0)) 2331 dtTruncateEntry(p, 0, &dtlck); 2332 2333 /* unpin the parent page */ 2334 DT_PUTPAGE(mp); 2335 2336 /* exit propagation up */ 2337 break; 2338 } 2339 2340 return 0; 2341} 2342 2343 2344/* 2345 * NAME: dtRelocate() 2346 * 2347 * FUNCTION: relocate dtpage (internal or leaf) of directory; 2348 * This function is mainly used by defragfs utility. 2349 */ 2350int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, 2351 s64 nxaddr) 2352{ 2353 int rc = 0; 2354 struct metapage *mp, *pmp, *lmp, *rmp; 2355 dtpage_t *p, *pp, *rp = 0, *lp= 0; 2356 s64 bn; 2357 int index; 2358 struct btstack btstack; 2359 pxd_t *pxd; 2360 s64 oxaddr, nextbn, prevbn; 2361 int xlen, xsize; 2362 struct tlock *tlck; 2363 struct dt_lock *dtlck; 2364 struct pxd_lock *pxdlock; 2365 s8 *stbl; 2366 struct lv *lv; 2367 2368 oxaddr = addressPXD(opxd); 2369 xlen = lengthPXD(opxd); 2370 2371 jEVENT(0, ("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d\n", 2372 (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr, 2373 xlen)); 2374 2375 /* 2376 * 1. get the internal parent dtpage covering 2377 * router entry for the tartget page to be relocated; 2378 */ 2379 rc = dtSearchNode(ip, lmxaddr, opxd, &btstack); 2380 if (rc) 2381 return rc; 2382 2383 /* retrieve search result */ 2384 DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2385 jEVENT(0, ("dtRelocate: parent router entry validated.\n")); 2386 2387 /* 2388 * 2. relocate the target dtpage 2389 */ 2390 /* read in the target page from src extent */ 2391 DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2392 if (rc) { 2393 /* release the pinned parent page */ 2394 DT_PUTPAGE(pmp); 2395 return rc; 2396 } 2397 2398 /* 2399 * read in sibling pages if any to update sibling pointers; 2400 */ 2401 rmp = NULL; 2402 if (p->header.next) { 2403 nextbn = le64_to_cpu(p->header.next); 2404 DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2405 if (rc) { 2406 DT_PUTPAGE(mp); 2407 DT_PUTPAGE(pmp); 2408 return (rc); 2409 } 2410 } 2411 2412 lmp = NULL; 2413 if (p->header.prev) { 2414 prevbn = le64_to_cpu(p->header.prev); 2415 DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 2416 if (rc) { 2417 DT_PUTPAGE(mp); 2418 DT_PUTPAGE(pmp); 2419 if (rmp) 2420 DT_PUTPAGE(rmp); 2421 return (rc); 2422 } 2423 } 2424 2425 /* at this point, all xtpages to be updated are in memory */ 2426 2427 /* 2428 * update sibling pointers of sibling dtpages if any; 2429 */ 2430 if (lmp) { 2431 tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK); 2432 dtlck = (struct dt_lock *) & tlck->lock; 2433 /* linelock header */ 2434 ASSERT(dtlck->index == 0); 2435 lv = & dtlck->lv[0]; 2436 lv->offset = 0; 2437 lv->length = 1; 2438 dtlck->index++; 2439 2440 lp->header.next = cpu_to_le64(nxaddr); 2441 DT_PUTPAGE(lmp); 2442 } 2443 2444 if (rmp) { 2445 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK); 2446 dtlck = (struct dt_lock *) & tlck->lock; 2447 /* linelock header */ 2448 ASSERT(dtlck->index == 0); 2449 lv = & dtlck->lv[0]; 2450 lv->offset = 0; 2451 lv->length = 1; 2452 dtlck->index++; 2453 2454 rp->header.prev = cpu_to_le64(nxaddr); 2455 DT_PUTPAGE(rmp); 2456 } 2457 2458 /* 2459 * update the target dtpage to be relocated 2460 * 2461 * write LOG_REDOPAGE of LOG_NEW type for dst page 2462 * for the whole target page (logredo() will apply 2463 * after image and update bmap for allocation of the 2464 * dst extent), and update bmap for allocation of 2465 * the dst extent; 2466 */ 2467 tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW); 2468 dtlck = (struct dt_lock *) & tlck->lock; 2469 /* linelock header */ 2470 ASSERT(dtlck->index == 0); 2471 lv = & dtlck->lv[0]; 2472 2473 /* update the self address in the dtpage header */ 2474 pxd = &p->header.self; 2475 PXDaddress(pxd, nxaddr); 2476 2477 /* the dst page is the same as the src page, i.e., 2478 * linelock for afterimage of the whole page; 2479 */ 2480 lv->offset = 0; 2481 lv->length = p->header.maxslot; 2482 dtlck->index++; 2483 2484 /* update the buffer extent descriptor of the dtpage */ 2485 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2486#ifdef _STILL_TO_PORT 2487 bmSetXD(mp, nxaddr, xsize); 2488#endif /* _STILL_TO_PORT */ 2489 /* unpin the relocated page */ 2490 DT_PUTPAGE(mp); 2491 jEVENT(0, ("dtRelocate: target dtpage relocated.\n")); 2492 2493 /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec 2494 * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec 2495 * will also force a bmap update ). 2496 */ 2497 2498 /* 2499 * 3. acquire maplock for the source extent to be freed; 2500 */ 2501 /* for dtpage relocation, write a LOG_NOREDOPAGE record 2502 * for the source dtpage (logredo() will init NoRedoPage 2503 * filter and will also update bmap for free of the source 2504 * dtpage), and upadte bmap for free of the source dtpage; 2505 */ 2506 tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); 2507 pxdlock = (struct pxd_lock *) & tlck->lock; 2508 pxdlock->flag = mlckFREEPXD; 2509 PXDaddress(&pxdlock->pxd, oxaddr); 2510 PXDlength(&pxdlock->pxd, xlen); 2511 pxdlock->index = 1; 2512 2513 /* 2514 * 4. update the parent router entry for relocation; 2515 * 2516 * acquire tlck for the parent entry covering the target dtpage; 2517 * write LOG_REDOPAGE to apply after image only; 2518 */ 2519 jEVENT(0, ("dtRelocate: update parent router entry.\n")); 2520 tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); 2521 dtlck = (struct dt_lock *) & tlck->lock; 2522 lv = & dtlck->lv[dtlck->index]; 2523 2524 /* update the PXD with the new address */ 2525 stbl = DT_GETSTBL(pp); 2526 pxd = (pxd_t *) & pp->slot[stbl[index]]; 2527 PXDaddress(pxd, nxaddr); 2528 lv->offset = stbl[index]; 2529 lv->length = 1; 2530 dtlck->index++; 2531 2532 /* unpin the parent dtpage */ 2533 DT_PUTPAGE(pmp); 2534 2535 return rc; 2536} 2537 2538 2539/* 2540 * NAME: dtSearchNode() 2541 * 2542 * FUNCTION: Search for an dtpage containing a specified address 2543 * This function is mainly used by defragfs utility. 2544 * 2545 * NOTE: Search result on stack, the found page is pinned at exit. 2546 * The result page must be an internal dtpage. 2547 * lmxaddr give the address of the left most page of the 2548 * dtree level, in which the required dtpage resides. 2549 */ 2550static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd, 2551 struct btstack * btstack) 2552{ 2553 int rc = 0; 2554 s64 bn; 2555 struct metapage *mp; 2556 dtpage_t *p; 2557 int psize = 288; /* initial in-line directory */ 2558 s8 *stbl; 2559 int i; 2560 pxd_t *pxd; 2561 struct btframe *btsp; 2562 2563 BT_CLR(btstack); /* reset stack */ 2564 2565 /* 2566 * descend tree to the level with specified leftmost page 2567 * 2568 * by convention, root bn = 0. 2569 */ 2570 for (bn = 0;;) { 2571 /* get/pin the page to search */ 2572 DT_GETPAGE(ip, bn, mp, psize, p, rc); 2573 if (rc) 2574 return rc; 2575 2576 /* does the xaddr of leftmost page of the levevl 2577 * matches levevl search key ? 2578 */ 2579 if (p->header.flag & BT_ROOT) { 2580 if (lmxaddr == 0) 2581 break; 2582 } else if (addressPXD(&p->header.self) == lmxaddr) 2583 break; 2584 2585 /* 2586 * descend down to leftmost child page 2587 */ 2588 if (p->header.flag & BT_LEAF) 2589 return ESTALE; 2590 2591 /* get the leftmost entry */ 2592 stbl = DT_GETSTBL(p); 2593 pxd = (pxd_t *) & p->slot[stbl[0]]; 2594 2595 /* get the child page block address */ 2596 bn = addressPXD(pxd); 2597 psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; 2598 /* unpin the parent page */ 2599 DT_PUTPAGE(mp); 2600 } 2601 2602 /* 2603 * search each page at the current levevl 2604 */ 2605 loop: 2606 stbl = DT_GETSTBL(p); 2607 for (i = 0; i < p->header.nextindex; i++) { 2608 pxd = (pxd_t *) & p->slot[stbl[i]]; 2609 2610 /* found the specified router entry */ 2611 if (addressPXD(pxd) == addressPXD(kpxd) && 2612 lengthPXD(pxd) == lengthPXD(kpxd)) { 2613 btsp = btstack->top; 2614 btsp->bn = bn; 2615 btsp->index = i; 2616 btsp->mp = mp; 2617 2618 return 0; 2619 } 2620 } 2621 2622 /* get the right sibling page if any */ 2623 if (p->header.next) 2624 bn = le64_to_cpu(p->header.next); 2625 else { 2626 DT_PUTPAGE(mp); 2627 return ESTALE; 2628 } 2629 2630 /* unpin current page */ 2631 DT_PUTPAGE(mp); 2632 2633 /* get the right sibling page */ 2634 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2635 if (rc) 2636 return rc; 2637 2638 goto loop; 2639} 2640 2641 2642/* 2643 * dtRelink() 2644 * 2645 * function: 2646 * link around a freed page. 2647 * 2648 * parameter: 2649 * fp: page to be freed 2650 * 2651 * return: 2652 */ 2653static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p) 2654{ 2655 int rc; 2656 struct metapage *mp; 2657 s64 nextbn, prevbn; 2658 struct tlock *tlck; 2659 struct dt_lock *dtlck; 2660 struct lv *lv; 2661 2662 nextbn = le64_to_cpu(p->header.next); 2663 prevbn = le64_to_cpu(p->header.prev); 2664 2665 /* update prev pointer of the next page */ 2666 if (nextbn != 0) { 2667 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 2668 if (rc) 2669 return rc; 2670 2671 BT_MARK_DIRTY(mp, ip); 2672 /* 2673 * acquire a transaction lock on the next page 2674 * 2675 * action: update prev pointer; 2676 */ 2677 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2678 jEVENT(0, 2679 ("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p\n", 2680 tlck, ip, mp)); 2681 dtlck = (struct dt_lock *) & tlck->lock; 2682 2683 /* linelock header */ 2684 if (dtlck->index >= dtlck->maxcnt) 2685 dtlck = (struct dt_lock *) txLinelock(dtlck); 2686 lv = & dtlck->lv[dtlck->index]; 2687 lv->offset = 0; 2688 lv->length = 1; 2689 dtlck->index++; 2690 2691 p->header.prev = cpu_to_le64(prevbn); 2692 DT_PUTPAGE(mp); 2693 } 2694 2695 /* update next pointer of the previous page */ 2696 if (prevbn != 0) { 2697 DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 2698 if (rc) 2699 return rc; 2700 2701 BT_MARK_DIRTY(mp, ip); 2702 /* 2703 * acquire a transaction lock on the prev page 2704 * 2705 * action: update next pointer; 2706 */ 2707 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2708 jEVENT(0, 2709 ("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p\n", 2710 tlck, ip, mp)); 2711 dtlck = (struct dt_lock *) & tlck->lock; 2712 2713 /* linelock header */ 2714 if (dtlck->index >= dtlck->maxcnt) 2715 dtlck = (struct dt_lock *) txLinelock(dtlck); 2716 lv = & dtlck->lv[dtlck->index]; 2717 lv->offset = 0; 2718 lv->length = 1; 2719 dtlck->index++; 2720 2721 p->header.next = cpu_to_le64(nextbn); 2722 DT_PUTPAGE(mp); 2723 } 2724 2725 return 0; 2726} 2727 2728 2729/* 2730 * dtInitRoot() 2731 * 2732 * initialize directory root (inline in inode) 2733 */ 2734void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot) 2735{ 2736 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 2737 dtroot_t *p; 2738 int fsi; 2739 struct dtslot *f; 2740 struct tlock *tlck; 2741 struct dt_lock *dtlck; 2742 struct lv *lv; 2743 u16 xflag_save; 2744 2745 /* 2746 * If this was previously an non-empty directory, we need to remove 2747 * the old directory table. 2748 */ 2749 if (DO_INDEX(ip)) { 2750 if (jfs_ip->next_index > (MAX_INLINE_DIRTABLE_ENTRY + 1)) { 2751 struct tblock *tblk = tid_to_tblock(tid); 2752 /* 2753 * We're playing games with the tid's xflag. If 2754 * we're removing a regular file, the file's xtree 2755 * is committed with COMMIT_PMAP, but we always 2756 * commit the directories xtree with COMMIT_PWMAP. 2757 */ 2758 xflag_save = tblk->xflag; 2759 tblk->xflag = 0; 2760 /* 2761 * xtTruncate isn't guaranteed to fully truncate 2762 * the xtree. The caller needs to check i_size 2763 * after committing the transaction to see if 2764 * additional truncation is needed. The 2765 * COMMIT_Stale flag tells caller that we 2766 * initiated the truncation. 2767 */ 2768 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 2769 set_cflag(COMMIT_Stale, ip); 2770 2771 tblk->xflag = xflag_save; 2772 /* 2773 * Tells jfs_metapage code that the metadata pages 2774 * for the index table are no longer useful, and 2775 * remove them from page cache. 2776 */ 2777 invalidate_inode_metapages(ip); 2778 } else 2779 ip->i_size = 1; 2780 2781 jfs_ip->next_index = 2; 2782 } else 2783 ip->i_size = IDATASIZE; 2784 2785 /* 2786 * acquire a transaction lock on the root 2787 * 2788 * action: directory initialization; 2789 */ 2790 tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag, 2791 tlckDTREE | tlckENTRY | tlckBTROOT); 2792 dtlck = (struct dt_lock *) & tlck->lock; 2793 2794 /* linelock root */ 2795 ASSERT(dtlck->index == 0); 2796 lv = & dtlck->lv[0]; 2797 lv->offset = 0; 2798 lv->length = DTROOTMAXSLOT; 2799 dtlck->index++; 2800 2801 p = &jfs_ip->i_dtroot; 2802 2803 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 2804 2805 p->header.nextindex = 0; 2806 2807 /* init freelist */ 2808 fsi = 1; 2809 f = &p->slot[fsi]; 2810 2811 /* init data area of root */ 2812 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2813 f->next = fsi; 2814 f->next = -1; 2815 2816 p->header.freelist = 1; 2817 p->header.freecnt = 8; 2818 2819 /* init '..' entry */ 2820 p->header.idotdot = cpu_to_le32(idotdot); 2821 2822 2823 return; 2824} 2825 2826/* 2827 * add_missing_indices() 2828 * 2829 * function: Fix dtree page in which one or more entries has an invalid index. 2830 * fsck.jfs should really fix this, but it currently does not. 2831 * Called from jfs_readdir when bad index is detected. 2832 */ 2833static void add_missing_indices(struct inode *inode, s64 bn) 2834{ 2835 struct ldtentry *d; 2836 struct dt_lock *dtlck; 2837 int i; 2838 uint index; 2839 struct lv *lv; 2840 struct metapage *mp; 2841 dtpage_t *p; 2842 int rc; 2843 s8 *stbl; 2844 tid_t tid; 2845 struct tlock *tlck; 2846 2847 tid = txBegin(inode->i_sb, 0); 2848 2849 DT_GETPAGE(inode, bn, mp, PSIZE, p, rc); 2850 2851 if (rc) { 2852 printk(KERN_ERR "DT_GETPAGE failed!\n"); 2853 goto end; 2854 } 2855 BT_MARK_DIRTY(mp, inode); 2856 2857 ASSERT(p->header.flag & BT_LEAF); 2858 2859 tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY); 2860 dtlck = (struct dt_lock *) &tlck->lock; 2861 2862 stbl = DT_GETSTBL(p); 2863 for (i = 0; i < p->header.nextindex; i++) { 2864 d = (struct ldtentry *) &p->slot[stbl[i]]; 2865 index = le32_to_cpu(d->index); 2866 if ((index < 2) || (index >= JFS_IP(inode)->next_index)) { 2867 d->index = cpu_to_le32(add_index(tid, inode, bn, i)); 2868 if (dtlck->index >= dtlck->maxcnt) 2869 dtlck = (struct dt_lock *) txLinelock(dtlck); 2870 lv = dtlck->lv; 2871 lv->offset = stbl[i]; 2872 lv->length = 1; 2873 dtlck->index++; 2874 } 2875 } 2876 2877 DT_PUTPAGE(mp); 2878 (void) txCommit(tid, 1, &inode, 0); 2879end: 2880 txEnd(tid); 2881} 2882 2883/* 2884 * Buffer to hold directory entry info while traversing a dtree page 2885 * before being fed to the filldir function 2886 */ 2887struct jfs_dirent { 2888 loff_t position; 2889 int ino; 2890 u16 name_len; 2891 char name[0]; 2892}; 2893 2894/* 2895 * function to determine next variable-sized jfs_dirent in buffer 2896 */ 2897inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent) 2898{ 2899 return (struct jfs_dirent *) 2900 ((char *)dirent + 2901 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 + 2902 sizeof (loff_t) - 1) & 2903 ~(sizeof (loff_t) - 1))); 2904} 2905 2906/* 2907 * jfs_readdir() 2908 * 2909 * function: read directory entries sequentially 2910 * from the specified entry offset 2911 * 2912 * parameter: 2913 * 2914 * return: offset = (pn, index) of start entry 2915 * of next jfs_readdir()/dtRead() 2916 */ 2917int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 2918{ 2919 struct inode *ip = filp->f_dentry->d_inode; 2920 struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; 2921 int rc = 0; 2922 loff_t dtpos; /* legacy OS/2 style position */ 2923 struct dtoffset { 2924 s16 pn; 2925 s16 index; 2926 s32 unused; 2927 } *dtoffset = (struct dtoffset *) &dtpos; 2928 s64 bn; 2929 struct metapage *mp; 2930 dtpage_t *p; 2931 int index; 2932 s8 *stbl; 2933 struct btstack btstack; 2934 int i, next; 2935 struct ldtentry *d; 2936 struct dtslot *t; 2937 int d_namleft, len, outlen; 2938 unsigned long dirent_buf; 2939 char *name_ptr; 2940 int dtlhdrdatalen; 2941 u32 dir_index; 2942 int do_index = 0; 2943 uint loop_count = 0; 2944 struct jfs_dirent *jfs_dirent; 2945 int jfs_dirents; 2946 int overflow, fix_page, page_fixed = 0; 2947 static int unique_pos = 2; /* If we can't fix broken index */ 2948 2949 if (filp->f_pos == DIREND) 2950 return 0; 2951 2952 if (DO_INDEX(ip)) { 2953 /* 2954 * persistent index is stored in directory entries. 2955 * Special cases: 0 = . 2956 * 1 = .. 2957 * -1 = End of directory 2958 */ 2959 do_index = 1; 2960 dtlhdrdatalen = DTLHDRDATALEN; 2961 2962 dir_index = (u32) filp->f_pos; 2963 2964 if (dir_index > 1) { 2965 struct dir_table_slot dirtab_slot; 2966 2967 if (dtEmpty(ip) || 2968 (dir_index >= JFS_IP(ip)->next_index)) { 2969 /* Stale position. Directory has shrunk */ 2970 filp->f_pos = DIREND; 2971 return 0; 2972 } 2973 repeat: 2974 rc = read_index(ip, dir_index, &dirtab_slot); 2975 if (rc) { 2976 filp->f_pos = DIREND; 2977 return rc; 2978 } 2979 if (dirtab_slot.flag == DIR_INDEX_FREE) { 2980 if (loop_count++ > JFS_IP(ip)->next_index) { 2981 jERROR(1, ("jfs_readdir detected " 2982 "infinite loop!\n")); 2983 filp->f_pos = DIREND; 2984 return 0; 2985 } 2986 dir_index = le32_to_cpu(dirtab_slot.addr2); 2987 if (dir_index == -1) { 2988 filp->f_pos = DIREND; 2989 return 0; 2990 } 2991 goto repeat; 2992 } 2993 bn = addressDTS(&dirtab_slot); 2994 index = dirtab_slot.slot; 2995 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2996 if (rc) { 2997 filp->f_pos = DIREND; 2998 return 0; 2999 } 3000 if (p->header.flag & BT_INTERNAL) { 3001 jERROR(1,("jfs_readdir: bad index table\n")); 3002 DT_PUTPAGE(mp); 3003 filp->f_pos = -1; 3004 return 0; 3005 } 3006 } else { 3007 if (dir_index == 0) { 3008 /* 3009 * self "." 3010 */ 3011 filp->f_pos = 0; 3012 if (filldir(dirent, ".", 1, 0, ip->i_ino, 3013 DT_DIR)) 3014 return 0; 3015 } 3016 /* 3017 * parent ".." 3018 */ 3019 filp->f_pos = 1; 3020 if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) 3021 return 0; 3022 3023 /* 3024 * Find first entry of left-most leaf 3025 */ 3026 if (dtEmpty(ip)) { 3027 filp->f_pos = DIREND; 3028 return 0; 3029 } 3030 3031 if ((rc = dtReadFirst(ip, &btstack))) 3032 return -rc; 3033 3034 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3035 } 3036 } else { 3037 /* 3038 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 3039 * 3040 * pn = index = 0: First entry "." 3041 * pn = 0; index = 1: Second entry ".." 3042 * pn > 0: Real entries, pn=1 -> leftmost page 3043 * pn = index = -1: No more entries 3044 */ 3045 dtlhdrdatalen = DTLHDRDATALEN_LEGACY; 3046 3047 dtpos = filp->f_pos; 3048 if (dtpos == 0) { 3049 /* build "." entry */ 3050 3051 if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino, 3052 DT_DIR)) 3053 return 0; 3054 dtoffset->index = 1; 3055 filp->f_pos = dtpos; 3056 } 3057 3058 if (dtoffset->pn == 0) { 3059 if (dtoffset->index == 1) { 3060 /* build ".." entry */ 3061 3062 if (filldir(dirent, "..", 2, filp->f_pos, 3063 PARENT(ip), DT_DIR)) 3064 return 0; 3065 } else { 3066 jERROR(1, 3067 ("jfs_readdir called with invalid offset!\n")); 3068 } 3069 dtoffset->pn = 1; 3070 dtoffset->index = 0; 3071 filp->f_pos = dtpos; 3072 } 3073 3074 if (dtEmpty(ip)) { 3075 filp->f_pos = DIREND; 3076 return 0; 3077 } 3078 3079 if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) { 3080 jERROR(1, 3081 ("jfs_readdir: unexpected rc = %d from dtReadNext\n", 3082 rc)); 3083 filp->f_pos = DIREND; 3084 return 0; 3085 } 3086 /* get start leaf page and index */ 3087 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3088 3089 /* offset beyond directory eof ? */ 3090 if (bn < 0) { 3091 filp->f_pos = DIREND; 3092 return 0; 3093 } 3094 } 3095 3096 dirent_buf = __get_free_page(GFP_KERNEL); 3097 if (dirent_buf == 0) { 3098 DT_PUTPAGE(mp); 3099 jERROR(1, ("jfs_readdir: __get_free_page failed!\n")); 3100 filp->f_pos = DIREND; 3101 return -ENOMEM; 3102 } 3103 3104 while (1) { 3105 jfs_dirent = (struct jfs_dirent *) dirent_buf; 3106 jfs_dirents = 0; 3107 overflow = fix_page = 0; 3108 3109 stbl = DT_GETSTBL(p); 3110 3111 for (i = index; i < p->header.nextindex; i++) { 3112 d = (struct ldtentry *) & p->slot[stbl[i]]; 3113 3114 if (((long) jfs_dirent + d->namlen + 1) > 3115 (dirent_buf + PSIZE)) { 3116 /* DBCS codepages could overrun dirent_buf */ 3117 index = i; 3118 overflow = 1; 3119 break; 3120 } 3121 3122 d_namleft = d->namlen; 3123 name_ptr = jfs_dirent->name; 3124 jfs_dirent->ino = le32_to_cpu(d->inumber); 3125 3126 if (do_index) { 3127 len = min(d_namleft, DTLHDRDATALEN); 3128 jfs_dirent->position = le32_to_cpu(d->index); 3129 /* 3130 * d->index should always be valid, but it 3131 * isn't. fsck.jfs doesn't create the 3132 * directory index for the lost+found 3133 * directory. Rather than let it go, 3134 * we can try to fix it. 3135 */ 3136 if ((jfs_dirent->position < 2) || 3137 (jfs_dirent->position >= 3138 JFS_IP(ip)->next_index)) { 3139 if (!page_fixed && !isReadOnly(ip)) { 3140 fix_page = 1; 3141 /* 3142 * setting overflow and setting 3143 * index to i will cause the 3144 * same page to be processed 3145 * again starting here 3146 */ 3147 overflow = 1; 3148 index = i; 3149 break; 3150 } 3151 jfs_dirent->position = unique_pos++; 3152 } 3153 } else { 3154 jfs_dirent->position = dtpos; 3155 len = min(d_namleft, DTLHDRDATALEN_LEGACY); 3156 } 3157 3158 /* copy the name of head/only segment */ 3159 outlen = jfs_strfromUCS_le(name_ptr, d->name, len, 3160 codepage); 3161 jfs_dirent->name_len = outlen; 3162 3163 /* copy name in the additional segment(s) */ 3164 next = d->next; 3165 while (next >= 0) { 3166 t = (struct dtslot *) & p->slot[next]; 3167 name_ptr += outlen; 3168 d_namleft -= len; 3169 /* Sanity Check */ 3170 if (d_namleft == 0) { 3171 jERROR(1,("JFS:Dtree error: " 3172 "ino = %ld, bn=%Ld, index = %d\n", 3173 (long)ip->i_ino, (long long)bn, i)); 3174 updateSuper(ip->i_sb, FM_DIRTY); 3175 goto skip_one; 3176 } 3177 len = min(d_namleft, DTSLOTDATALEN); 3178 outlen = jfs_strfromUCS_le(name_ptr, t->name, 3179 len, codepage); 3180 jfs_dirent->name_len += outlen; 3181 3182 next = t->next; 3183 } 3184 3185 jfs_dirents++; 3186 jfs_dirent = next_jfs_dirent(jfs_dirent); 3187skip_one: 3188 if (!do_index) 3189 dtoffset->index++; 3190 } 3191 3192 if (!overflow) { 3193 /* Point to next leaf page */ 3194 if (p->header.flag & BT_ROOT) 3195 bn = 0; 3196 else { 3197 bn = le64_to_cpu(p->header.next); 3198 index = 0; 3199 /* update offset (pn:index) for new page */ 3200 if (!do_index) { 3201 dtoffset->pn++; 3202 dtoffset->index = 0; 3203 } 3204 } 3205 page_fixed = 0; 3206 } 3207 3208 /* unpin previous leaf page */ 3209 DT_PUTPAGE(mp); 3210 3211 jfs_dirent = (struct jfs_dirent *) dirent_buf; 3212 while (jfs_dirents--) { 3213 filp->f_pos = jfs_dirent->position; 3214 if (filldir(dirent, jfs_dirent->name, 3215 jfs_dirent->name_len, filp->f_pos, 3216 jfs_dirent->ino, DT_UNKNOWN)) 3217 goto out; 3218 jfs_dirent = next_jfs_dirent(jfs_dirent); 3219 } 3220 3221 if (fix_page) { 3222 add_missing_indices(ip, bn); 3223 page_fixed = 1; 3224 } 3225 3226 if (!overflow && (bn == 0)) { 3227 filp->f_pos = DIREND; 3228 break; 3229 } 3230 3231 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3232 if (rc) { 3233 free_page(dirent_buf); 3234 return -rc; 3235 } 3236 } 3237 3238 out: 3239 free_page(dirent_buf); 3240 3241 return rc; 3242} 3243 3244 3245/* 3246 * dtReadFirst() 3247 * 3248 * function: get the leftmost page of the directory 3249 */ 3250static int dtReadFirst(struct inode *ip, struct btstack * btstack) 3251{ 3252 int rc = 0; 3253 s64 bn; 3254 int psize = 288; /* initial in-line directory */ 3255 struct metapage *mp; 3256 dtpage_t *p; 3257 s8 *stbl; 3258 struct btframe *btsp; 3259 pxd_t *xd; 3260 3261 BT_CLR(btstack); /* reset stack */ 3262 3263 /* 3264 * descend leftmost path of the tree 3265 * 3266 * by convention, root bn = 0. 3267 */ 3268 for (bn = 0;;) { 3269 DT_GETPAGE(ip, bn, mp, psize, p, rc); 3270 if (rc) 3271 return rc; 3272 3273 /* 3274 * leftmost leaf page 3275 */ 3276 if (p->header.flag & BT_LEAF) { 3277 /* return leftmost entry */ 3278 btsp = btstack->top; 3279 btsp->bn = bn; 3280 btsp->index = 0; 3281 btsp->mp = mp; 3282 3283 return 0; 3284 } 3285 3286 /* 3287 * descend down to leftmost child page 3288 */ 3289 /* push (bn, index) of the parent page/entry */ 3290 BT_PUSH(btstack, bn, 0); 3291 3292 /* get the leftmost entry */ 3293 stbl = DT_GETSTBL(p); 3294 xd = (pxd_t *) & p->slot[stbl[0]]; 3295 3296 /* get the child page block address */ 3297 bn = addressPXD(xd); 3298 psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize; 3299 3300 /* unpin the parent page */ 3301 DT_PUTPAGE(mp); 3302 } 3303} 3304 3305 3306/* 3307 * dtReadNext() 3308 * 3309 * function: get the page of the specified offset (pn:index) 3310 * 3311 * return: if (offset > eof), bn = -1; 3312 * 3313 * note: if index > nextindex of the target leaf page, 3314 * start with 1st entry of next leaf page; 3315 */ 3316static int dtReadNext(struct inode *ip, loff_t * offset, 3317 struct btstack * btstack) 3318{ 3319 int rc = 0; 3320 struct dtoffset { 3321 s16 pn; 3322 s16 index; 3323 s32 unused; 3324 } *dtoffset = (struct dtoffset *) offset; 3325 s64 bn; 3326 struct metapage *mp; 3327 dtpage_t *p; 3328 int index; 3329 int pn; 3330 s8 *stbl; 3331 struct btframe *btsp, *parent; 3332 pxd_t *xd; 3333 3334 /* 3335 * get leftmost leaf page pinned 3336 */ 3337 if ((rc = dtReadFirst(ip, btstack))) 3338 return rc; 3339 3340 /* get leaf page */ 3341 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 3342 3343 /* get the start offset (pn:index) */ 3344 pn = dtoffset->pn - 1; /* Now pn = 0 represents leftmost leaf */ 3345 index = dtoffset->index; 3346 3347 /* start at leftmost page ? */ 3348 if (pn == 0) { 3349 /* offset beyond eof ? */ 3350 if (index < p->header.nextindex) 3351 goto out; 3352 3353 if (p->header.flag & BT_ROOT) { 3354 bn = -1; 3355 goto out; 3356 } 3357 3358 /* start with 1st entry of next leaf page */ 3359 dtoffset->pn++; 3360 dtoffset->index = index = 0; 3361 goto a; 3362 } 3363 3364 /* start at non-leftmost page: scan parent pages for large pn */ 3365 if (p->header.flag & BT_ROOT) { 3366 bn = -1; 3367 goto out; 3368 } 3369 3370 /* start after next leaf page ? */ 3371 if (pn > 1) 3372 goto b; 3373 3374 /* get leaf page pn = 1 */ 3375 a: 3376 bn = le64_to_cpu(p->header.next); 3377 3378 /* unpin leaf page */ 3379 DT_PUTPAGE(mp); 3380 3381 /* offset beyond eof ? */ 3382 if (bn == 0) { 3383 bn = -1; 3384 goto out; 3385 } 3386 3387 goto c; 3388 3389 /* 3390 * scan last internal page level to get target leaf page 3391 */ 3392 b: 3393 /* unpin leftmost leaf page */ 3394 DT_PUTPAGE(mp); 3395 3396 /* get left most parent page */ 3397 btsp = btstack->top; 3398 parent = btsp - 1; 3399 bn = parent->bn; 3400 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3401 if (rc) 3402 return rc; 3403 3404 /* scan parent pages at last internal page level */ 3405 while (pn >= p->header.nextindex) { 3406 pn -= p->header.nextindex; 3407 3408 /* get next parent page address */ 3409 bn = le64_to_cpu(p->header.next); 3410 3411 /* unpin current parent page */ 3412 DT_PUTPAGE(mp); 3413 3414 /* offset beyond eof ? */ 3415 if (bn == 0) { 3416 bn = -1; 3417 goto out; 3418 } 3419 3420 /* get next parent page */ 3421 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3422 if (rc) 3423 return rc; 3424 3425 /* update parent page stack frame */ 3426 parent->bn = bn; 3427 } 3428 3429 /* get leaf page address */ 3430 stbl = DT_GETSTBL(p); 3431 xd = (pxd_t *) & p->slot[stbl[pn]]; 3432 bn = addressPXD(xd); 3433 3434 /* unpin parent page */ 3435 DT_PUTPAGE(mp); 3436 3437 /* 3438 * get target leaf page 3439 */ 3440 c: 3441 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3442 if (rc) 3443 return rc; 3444 3445 /* 3446 * leaf page has been completed: 3447 * start with 1st entry of next leaf page 3448 */ 3449 if (index >= p->header.nextindex) { 3450 bn = le64_to_cpu(p->header.next); 3451 3452 /* unpin leaf page */ 3453 DT_PUTPAGE(mp); 3454 3455 /* offset beyond eof ? */ 3456 if (bn == 0) { 3457 bn = -1; 3458 goto out; 3459 } 3460 3461 /* get next leaf page */ 3462 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3463 if (rc) 3464 return rc; 3465 3466 /* start with 1st entry of next leaf page */ 3467 dtoffset->pn++; 3468 dtoffset->index = 0; 3469 } 3470 3471 out: 3472 /* return target leaf page pinned */ 3473 btsp = btstack->top; 3474 btsp->bn = bn; 3475 btsp->index = dtoffset->index; 3476 btsp->mp = mp; 3477 3478 return 0; 3479} 3480 3481 3482/* 3483 * dtCompare() 3484 * 3485 * function: compare search key with an internal entry 3486 * 3487 * return: 3488 * < 0 if k is < record 3489 * = 0 if k is = record 3490 * > 0 if k is > record 3491 */ 3492static int dtCompare(struct component_name * key, /* search key */ 3493 dtpage_t * p, /* directory page */ 3494 int si) 3495{ /* entry slot index */ 3496 wchar_t *kname, *name; 3497 int klen, namlen, len, rc; 3498 struct idtentry *ih; 3499 struct dtslot *t; 3500 3501 /* 3502 * force the left-most key on internal pages, at any level of 3503 * the tree, to be less than any search key. 3504 * this obviates having to update the leftmost key on an internal 3505 * page when the user inserts a new key in the tree smaller than 3506 * anything that has been stored. 3507 * 3508 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3509 * at any internal page at any level of the tree, 3510 * it descends to child of the entry anyway - 3511 * ? make the entry as min size dummy entry) 3512 * 3513 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3514 * return (1); 3515 */ 3516 3517 kname = key->name; 3518 klen = key->namlen; 3519 3520 ih = (struct idtentry *) & p->slot[si]; 3521 si = ih->next; 3522 name = ih->name; 3523 namlen = ih->namlen; 3524 len = min(namlen, DTIHDRDATALEN); 3525 3526 /* compare with head/only segment */ 3527 len = min(klen, len); 3528 if ((rc = UniStrncmp_le(kname, name, len))) 3529 return rc; 3530 3531 klen -= len; 3532 namlen -= len; 3533 3534 /* compare with additional segment(s) */ 3535 kname += len; 3536 while (klen > 0 && namlen > 0) { 3537 /* compare with next name segment */ 3538 t = (struct dtslot *) & p->slot[si]; 3539 len = min(namlen, DTSLOTDATALEN); 3540 len = min(klen, len); 3541 name = t->name; 3542 if ((rc = UniStrncmp_le(kname, name, len))) 3543 return rc; 3544 3545 klen -= len; 3546 namlen -= len; 3547 kname += len; 3548 si = t->next; 3549 } 3550 3551 return (klen - namlen); 3552} 3553 3554 3555 3556 3557/* 3558 * ciCompare() 3559 * 3560 * function: compare search key with an (leaf/internal) entry 3561 * 3562 * return: 3563 * < 0 if k is < record 3564 * = 0 if k is = record 3565 * > 0 if k is > record 3566 */ 3567static int ciCompare(struct component_name * key, /* search key */ 3568 dtpage_t * p, /* directory page */ 3569 int si, /* entry slot index */ 3570 int flag) 3571{ 3572 wchar_t *kname, *name, x; 3573 int klen, namlen, len, rc; 3574 struct ldtentry *lh; 3575 struct idtentry *ih; 3576 struct dtslot *t; 3577 int i; 3578 3579 /* 3580 * force the left-most key on internal pages, at any level of 3581 * the tree, to be less than any search key. 3582 * this obviates having to update the leftmost key on an internal 3583 * page when the user inserts a new key in the tree smaller than 3584 * anything that has been stored. 3585 * 3586 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3587 * at any internal page at any level of the tree, 3588 * it descends to child of the entry anyway - 3589 * ? make the entry as min size dummy entry) 3590 * 3591 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3592 * return (1); 3593 */ 3594 3595 kname = key->name; 3596 klen = key->namlen; 3597 3598 /* 3599 * leaf page entry 3600 */ 3601 if (p->header.flag & BT_LEAF) { 3602 lh = (struct ldtentry *) & p->slot[si]; 3603 si = lh->next; 3604 name = lh->name; 3605 namlen = lh->namlen; 3606 if (flag & JFS_DIR_INDEX) 3607 len = min(namlen, DTLHDRDATALEN); 3608 else 3609 len = min(namlen, DTLHDRDATALEN_LEGACY); 3610 } 3611 /* 3612 * internal page entry 3613 */ 3614 else { 3615 ih = (struct idtentry *) & p->slot[si]; 3616 si = ih->next; 3617 name = ih->name; 3618 namlen = ih->namlen; 3619 len = min(namlen, DTIHDRDATALEN); 3620 } 3621 3622 /* compare with head/only segment */ 3623 len = min(klen, len); 3624 for (i = 0; i < len; i++, kname++, name++) { 3625 /* only uppercase if case-insensitive support is on */ 3626 if ((flag & JFS_OS2) == JFS_OS2) 3627 x = UniToupper(le16_to_cpu(*name)); 3628 else 3629 x = le16_to_cpu(*name); 3630 if ((rc = *kname - x)) 3631 return rc; 3632 } 3633 3634 klen -= len; 3635 namlen -= len; 3636 3637 /* compare with additional segment(s) */ 3638 while (klen > 0 && namlen > 0) { 3639 /* compare with next name segment */ 3640 t = (struct dtslot *) & p->slot[si]; 3641 len = min(namlen, DTSLOTDATALEN); 3642 len = min(klen, len); 3643 name = t->name; 3644 for (i = 0; i < len; i++, kname++, name++) { 3645 /* only uppercase if case-insensitive support is on */ 3646 if ((flag & JFS_OS2) == JFS_OS2) 3647 x = UniToupper(le16_to_cpu(*name)); 3648 else 3649 x = le16_to_cpu(*name); 3650 3651 if ((rc = *kname - x)) 3652 return rc; 3653 } 3654 3655 klen -= len; 3656 namlen -= len; 3657 si = t->next; 3658 } 3659 3660 return (klen - namlen); 3661} 3662 3663 3664/* 3665 * ciGetLeafPrefixKey() 3666 * 3667 * function: compute prefix of suffix compression 3668 * from two adjacent leaf entries 3669 * across page boundary 3670 * 3671 * return: 3672 * Number of prefix bytes needed to distinguish b from a. 3673 */ 3674static void ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 3675 int ri, struct component_name * key, int flag) 3676{ 3677 int klen, namlen; 3678 wchar_t *pl, *pr, *kname; 3679 wchar_t lname[JFS_NAME_MAX + 1]; 3680 struct component_name lkey = { 0, lname }; 3681 wchar_t rname[JFS_NAME_MAX + 1]; 3682 struct component_name rkey = { 0, rname }; 3683 3684 /* get left and right key */ 3685 dtGetKey(lp, li, &lkey, flag); 3686 lkey.name[lkey.namlen] = 0; 3687 3688 if ((flag & JFS_OS2) == JFS_OS2) 3689 ciToUpper(&lkey); 3690 3691 dtGetKey(rp, ri, &rkey, flag); 3692 rkey.name[rkey.namlen] = 0; 3693 3694 3695 if ((flag & JFS_OS2) == JFS_OS2) 3696 ciToUpper(&rkey); 3697 3698 /* compute prefix */ 3699 klen = 0; 3700 kname = key->name; 3701 namlen = min(lkey.namlen, rkey.namlen); 3702 for (pl = lkey.name, pr = rkey.name; 3703 namlen; pl++, pr++, namlen--, klen++, kname++) { 3704 *kname = *pr; 3705 if (*pl != *pr) { 3706 key->namlen = klen + 1; 3707 return; 3708 } 3709 } 3710 3711 /* l->namlen <= r->namlen since l <= r */ 3712 if (lkey.namlen < rkey.namlen) { 3713 *kname = *pr; 3714 key->namlen = klen + 1; 3715 } else /* l->namelen == r->namelen */ 3716 key->namlen = klen; 3717 3718 return; 3719} 3720 3721 3722 3723/* 3724 * dtGetKey() 3725 * 3726 * function: get key of the entry 3727 */ 3728static void dtGetKey(dtpage_t * p, int i, /* entry index */ 3729 struct component_name * key, int flag) 3730{ 3731 int si; 3732 s8 *stbl; 3733 struct ldtentry *lh; 3734 struct idtentry *ih; 3735 struct dtslot *t; 3736 int namlen, len; 3737 wchar_t *name, *kname; 3738 3739 /* get entry */ 3740 stbl = DT_GETSTBL(p); 3741 si = stbl[i]; 3742 if (p->header.flag & BT_LEAF) { 3743 lh = (struct ldtentry *) & p->slot[si]; 3744 si = lh->next; 3745 namlen = lh->namlen; 3746 name = lh->name; 3747 if (flag & JFS_DIR_INDEX) 3748 len = min(namlen, DTLHDRDATALEN); 3749 else 3750 len = min(namlen, DTLHDRDATALEN_LEGACY); 3751 } else { 3752 ih = (struct idtentry *) & p->slot[si]; 3753 si = ih->next; 3754 namlen = ih->namlen; 3755 name = ih->name; 3756 len = min(namlen, DTIHDRDATALEN); 3757 } 3758 3759 key->namlen = namlen; 3760 kname = key->name; 3761 3762 /* 3763 * move head/only segment 3764 */ 3765 UniStrncpy_le(kname, name, len); 3766 3767 /* 3768 * move additional segment(s) 3769 */ 3770 while (si >= 0) { 3771 /* get next segment */ 3772 t = &p->slot[si]; 3773 kname += len; 3774 namlen -= len; 3775 len = min(namlen, DTSLOTDATALEN); 3776 UniStrncpy_le(kname, t->name, len); 3777 3778 si = t->next; 3779 } 3780} 3781 3782 3783/* 3784 * dtInsertEntry() 3785 * 3786 * function: allocate free slot(s) and 3787 * write a leaf/internal entry 3788 * 3789 * return: entry slot index 3790 */ 3791static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 3792 ddata_t * data, struct dt_lock ** dtlock) 3793{ 3794 struct dtslot *h, *t; 3795 struct ldtentry *lh = 0; 3796 struct idtentry *ih = 0; 3797 int hsi, fsi, klen, len, nextindex; 3798 wchar_t *kname, *name; 3799 s8 *stbl; 3800 pxd_t *xd; 3801 struct dt_lock *dtlck = *dtlock; 3802 struct lv *lv; 3803 int xsi, n; 3804 s64 bn = 0; 3805 struct metapage *mp = 0; 3806 3807 klen = key->namlen; 3808 kname = key->name; 3809 3810 /* allocate a free slot */ 3811 hsi = fsi = p->header.freelist; 3812 h = &p->slot[fsi]; 3813 p->header.freelist = h->next; 3814 --p->header.freecnt; 3815 3816 /* open new linelock */ 3817 if (dtlck->index >= dtlck->maxcnt) 3818 dtlck = (struct dt_lock *) txLinelock(dtlck); 3819 3820 lv = & dtlck->lv[dtlck->index]; 3821 lv->offset = hsi; 3822 3823 /* write head/only segment */ 3824 if (p->header.flag & BT_LEAF) { 3825 lh = (struct ldtentry *) h; 3826 lh->next = h->next; 3827 lh->inumber = data->leaf.ino; /* little-endian */ 3828 lh->namlen = klen; 3829 name = lh->name; 3830 if (data->leaf.ip) { 3831 len = min(klen, DTLHDRDATALEN); 3832 if (!(p->header.flag & BT_ROOT)) 3833 bn = addressPXD(&p->header.self); 3834 lh->index = cpu_to_le32(add_index(data->leaf.tid, 3835 data->leaf.ip, 3836 bn, index)); 3837 } else 3838 len = min(klen, DTLHDRDATALEN_LEGACY); 3839 } else { 3840 ih = (struct idtentry *) h; 3841 ih->next = h->next; 3842 xd = (pxd_t *) ih; 3843 *xd = data->xd; 3844 ih->namlen = klen; 3845 name = ih->name; 3846 len = min(klen, DTIHDRDATALEN); 3847 } 3848 3849 UniStrncpy_le(name, kname, len); 3850 3851 n = 1; 3852 xsi = hsi; 3853 3854 /* write additional segment(s) */ 3855 t = h; 3856 klen -= len; 3857 while (klen) { 3858 /* get free slot */ 3859 fsi = p->header.freelist; 3860 t = &p->slot[fsi]; 3861 p->header.freelist = t->next; 3862 --p->header.freecnt; 3863 3864 /* is next slot contiguous ? */ 3865 if (fsi != xsi + 1) { 3866 /* close current linelock */ 3867 lv->length = n; 3868 dtlck->index++; 3869 3870 /* open new linelock */ 3871 if (dtlck->index < dtlck->maxcnt) 3872 lv++; 3873 else { 3874 dtlck = (struct dt_lock *) txLinelock(dtlck); 3875 lv = & dtlck->lv[0]; 3876 } 3877 3878 lv->offset = fsi; 3879 n = 0; 3880 } 3881 3882 kname += len; 3883 len = min(klen, DTSLOTDATALEN); 3884 UniStrncpy_le(t->name, kname, len); 3885 3886 n++; 3887 xsi = fsi; 3888 klen -= len; 3889 } 3890 3891 /* close current linelock */ 3892 lv->length = n; 3893 dtlck->index++; 3894 3895 *dtlock = dtlck; 3896 3897 /* terminate last/only segment */ 3898 if (h == t) { 3899 /* single segment entry */ 3900 if (p->header.flag & BT_LEAF) 3901 lh->next = -1; 3902 else 3903 ih->next = -1; 3904 } else 3905 /* multi-segment entry */ 3906 t->next = -1; 3907 3908 /* if insert into middle, shift right succeeding entries in stbl */ 3909 stbl = DT_GETSTBL(p); 3910 nextindex = p->header.nextindex; 3911 if (index < nextindex) { 3912 memmove(stbl + index + 1, stbl + index, nextindex - index); 3913 3914 if ((p->header.flag & BT_LEAF) && data->leaf.ip) { 3915 /* 3916 * Need to update slot number for entries that moved 3917 * in the stbl 3918 */ 3919 mp = 0; 3920 for (n = index + 1; n <= nextindex; n++) { 3921 lh = (struct ldtentry *) & (p->slot[stbl[n]]); 3922 modify_index(data->leaf.tid, data->leaf.ip, 3923 le32_to_cpu(lh->index), bn, n, 3924 &mp); 3925 } 3926 if (mp) 3927 release_metapage(mp); 3928 } 3929 } 3930 3931 stbl[index] = hsi; 3932 3933 /* advance next available entry index of stbl */ 3934 ++p->header.nextindex; 3935} 3936 3937 3938/* 3939 * dtMoveEntry() 3940 * 3941 * function: move entries from split/left page to new/right page 3942 * 3943 * nextindex of dst page and freelist/freecnt of both pages 3944 * are updated. 3945 */ 3946static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 3947 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 3948 int do_index) 3949{ 3950 int ssi, next; /* src slot index */ 3951 int di; /* dst entry index */ 3952 int dsi; /* dst slot index */ 3953 s8 *sstbl, *dstbl; /* sorted entry table */ 3954 int snamlen, len; 3955 struct ldtentry *slh, *dlh = 0; 3956 struct idtentry *sih, *dih = 0; 3957 struct dtslot *h, *s, *d; 3958 struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock; 3959 struct lv *slv, *dlv; 3960 int xssi, ns, nd; 3961 int sfsi; 3962 3963 sstbl = (s8 *) & sp->slot[sp->header.stblindex]; 3964 dstbl = (s8 *) & dp->slot[dp->header.stblindex]; 3965 3966 dsi = dp->header.freelist; /* first (whole page) free slot */ 3967 sfsi = sp->header.freelist; 3968 3969 /* linelock destination entry slot */ 3970 dlv = & ddtlck->lv[ddtlck->index]; 3971 dlv->offset = dsi; 3972 3973 /* linelock source entry slot */ 3974 slv = & sdtlck->lv[sdtlck->index]; 3975 slv->offset = sstbl[si]; 3976 xssi = slv->offset - 1; 3977 3978 /* 3979 * move entries 3980 */ 3981 ns = nd = 0; 3982 for (di = 0; si < sp->header.nextindex; si++, di++) { 3983 ssi = sstbl[si]; 3984 dstbl[di] = dsi; 3985 3986 /* is next slot contiguous ? */ 3987 if (ssi != xssi + 1) { 3988 /* close current linelock */ 3989 slv->length = ns; 3990 sdtlck->index++; 3991 3992 /* open new linelock */ 3993 if (sdtlck->index < sdtlck->maxcnt) 3994 slv++; 3995 else { 3996 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 3997 slv = & sdtlck->lv[0]; 3998 } 3999 4000 slv->offset = ssi; 4001 ns = 0; 4002 } 4003 4004 /* 4005 * move head/only segment of an entry 4006 */ 4007 /* get dst slot */ 4008 h = d = &dp->slot[dsi]; 4009 4010 /* get src slot and move */ 4011 s = &sp->slot[ssi]; 4012 if (sp->header.flag & BT_LEAF) { 4013 /* get source entry */ 4014 slh = (struct ldtentry *) s; 4015 dlh = (struct ldtentry *) h; 4016 snamlen = slh->namlen; 4017 4018 if (do_index) { 4019 len = min(snamlen, DTLHDRDATALEN); 4020 dlh->index = slh->index; /* little-endian */ 4021 } else 4022 len = min(snamlen, DTLHDRDATALEN_LEGACY); 4023 4024 memcpy(dlh, slh, 6 + len * 2); 4025 4026 next = slh->next; 4027 4028 /* update dst head/only segment next field */ 4029 dsi++; 4030 dlh->next = dsi; 4031 } else { 4032 sih = (struct idtentry *) s; 4033 snamlen = sih->namlen; 4034 4035 len = min(snamlen, DTIHDRDATALEN); 4036 dih = (struct idtentry *) h; 4037 memcpy(dih, sih, 10 + len * 2); 4038 next = sih->next; 4039 4040 dsi++; 4041 dih->next = dsi; 4042 } 4043 4044 /* free src head/only segment */ 4045 s->next = sfsi; 4046 s->cnt = 1; 4047 sfsi = ssi; 4048 4049 ns++; 4050 nd++; 4051 xssi = ssi; 4052 4053 /* 4054 * move additional segment(s) of the entry 4055 */ 4056 snamlen -= len; 4057 while ((ssi = next) >= 0) { 4058 /* is next slot contiguous ? */ 4059 if (ssi != xssi + 1) { 4060 /* close current linelock */ 4061 slv->length = ns; 4062 sdtlck->index++; 4063 4064 /* open new linelock */ 4065 if (sdtlck->index < sdtlck->maxcnt) 4066 slv++; 4067 else { 4068 sdtlck = 4069 (struct dt_lock *) 4070 txLinelock(sdtlck); 4071 slv = & sdtlck->lv[0]; 4072 } 4073 4074 slv->offset = ssi; 4075 ns = 0; 4076 } 4077 4078 /* get next source segment */ 4079 s = &sp->slot[ssi]; 4080 4081 /* get next destination free slot */ 4082 d++; 4083 4084 len = min(snamlen, DTSLOTDATALEN); 4085 UniStrncpy(d->name, s->name, len); 4086 4087 ns++; 4088 nd++; 4089 xssi = ssi; 4090 4091 dsi++; 4092 d->next = dsi; 4093 4094 /* free source segment */ 4095 next = s->next; 4096 s->next = sfsi; 4097 s->cnt = 1; 4098 sfsi = ssi; 4099 4100 snamlen -= len; 4101 } /* end while */ 4102 4103 /* terminate dst last/only segment */ 4104 if (h == d) { 4105 /* single segment entry */ 4106 if (dp->header.flag & BT_LEAF) 4107 dlh->next = -1; 4108 else 4109 dih->next = -1; 4110 } else 4111 /* multi-segment entry */ 4112 d->next = -1; 4113 } /* end for */ 4114 4115 /* close current linelock */ 4116 slv->length = ns; 4117 sdtlck->index++; 4118 *sdtlock = sdtlck; 4119 4120 dlv->length = nd; 4121 ddtlck->index++; 4122 *ddtlock = ddtlck; 4123 4124 /* update source header */ 4125 sp->header.freelist = sfsi; 4126 sp->header.freecnt += nd; 4127 4128 /* update destination header */ 4129 dp->header.nextindex = di; 4130 4131 dp->header.freelist = dsi; 4132 dp->header.freecnt -= nd; 4133} 4134 4135 4136/* 4137 * dtDeleteEntry() 4138 * 4139 * function: free a (leaf/internal) entry 4140 * 4141 * log freelist header, stbl, and each segment slot of entry 4142 * (even though last/only segment next field is modified, 4143 * physical image logging requires all segment slots of 4144 * the entry logged to avoid applying previous updates 4145 * to the same slots) 4146 */ 4147static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock) 4148{ 4149 int fsi; /* free entry slot index */ 4150 s8 *stbl; 4151 struct dtslot *t; 4152 int si, freecnt; 4153 struct dt_lock *dtlck = *dtlock; 4154 struct lv *lv; 4155 int xsi, n; 4156 4157 /* get free entry slot index */ 4158 stbl = DT_GETSTBL(p); 4159 fsi = stbl[fi]; 4160 4161 /* open new linelock */ 4162 if (dtlck->index >= dtlck->maxcnt) 4163 dtlck = (struct dt_lock *) txLinelock(dtlck); 4164 lv = & dtlck->lv[dtlck->index]; 4165 4166 lv->offset = fsi; 4167 4168 /* get the head/only segment */ 4169 t = &p->slot[fsi]; 4170 if (p->header.flag & BT_LEAF) 4171 si = ((struct ldtentry *) t)->next; 4172 else 4173 si = ((struct idtentry *) t)->next; 4174 t->next = si; 4175 t->cnt = 1; 4176 4177 n = freecnt = 1; 4178 xsi = fsi; 4179 4180 /* find the last/only segment */ 4181 while (si >= 0) { 4182 /* is next slot contiguous ? */ 4183 if (si != xsi + 1) { 4184 /* close current linelock */ 4185 lv->length = n; 4186 dtlck->index++; 4187 4188 /* open new linelock */ 4189 if (dtlck->index < dtlck->maxcnt) 4190 lv++; 4191 else { 4192 dtlck = (struct dt_lock *) txLinelock(dtlck); 4193 lv = & dtlck->lv[0]; 4194 } 4195 4196 lv->offset = si; 4197 n = 0; 4198 } 4199 4200 n++; 4201 xsi = si; 4202 freecnt++; 4203 4204 t = &p->slot[si]; 4205 t->cnt = 1; 4206 si = t->next; 4207 } 4208 4209 /* close current linelock */ 4210 lv->length = n; 4211 dtlck->index++; 4212 4213 *dtlock = dtlck; 4214 4215 /* update freelist */ 4216 t->next = p->header.freelist; 4217 p->header.freelist = fsi; 4218 p->header.freecnt += freecnt; 4219 4220 /* if delete from middle, 4221 * shift left the succedding entries in the stbl 4222 */ 4223 si = p->header.nextindex; 4224 if (fi < si - 1) 4225 memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1); 4226 4227 p->header.nextindex--; 4228} 4229 4230 4231/* 4232 * dtTruncateEntry() 4233 * 4234 * function: truncate a (leaf/internal) entry 4235 * 4236 * log freelist header, stbl, and each segment slot of entry 4237 * (even though last/only segment next field is modified, 4238 * physical image logging requires all segment slots of 4239 * the entry logged to avoid applying previous updates 4240 * to the same slots) 4241 */ 4242static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock) 4243{ 4244 int tsi; /* truncate entry slot index */ 4245 s8 *stbl; 4246 struct dtslot *t; 4247 int si, freecnt; 4248 struct dt_lock *dtlck = *dtlock; 4249 struct lv *lv; 4250 int fsi, xsi, n; 4251 4252 /* get free entry slot index */ 4253 stbl = DT_GETSTBL(p); 4254 tsi = stbl[ti]; 4255 4256 /* open new linelock */ 4257 if (dtlck->index >= dtlck->maxcnt) 4258 dtlck = (struct dt_lock *) txLinelock(dtlck); 4259 lv = & dtlck->lv[dtlck->index]; 4260 4261 lv->offset = tsi; 4262 4263 /* get the head/only segment */ 4264 t = &p->slot[tsi]; 4265 ASSERT(p->header.flag & BT_INTERNAL); 4266 ((struct idtentry *) t)->namlen = 0; 4267 si = ((struct idtentry *) t)->next; 4268 ((struct idtentry *) t)->next = -1; 4269 4270 n = 1; 4271 freecnt = 0; 4272 fsi = si; 4273 xsi = tsi; 4274 4275 /* find the last/only segment */ 4276 while (si >= 0) { 4277 /* is next slot contiguous ? */ 4278 if (si != xsi + 1) { 4279 /* close current linelock */ 4280 lv->length = n; 4281 dtlck->index++; 4282 4283 /* open new linelock */ 4284 if (dtlck->index < dtlck->maxcnt) 4285 lv++; 4286 else { 4287 dtlck = (struct dt_lock *) txLinelock(dtlck); 4288 lv = & dtlck->lv[0]; 4289 } 4290 4291 lv->offset = si; 4292 n = 0; 4293 } 4294 4295 n++; 4296 xsi = si; 4297 freecnt++; 4298 4299 t = &p->slot[si]; 4300 t->cnt = 1; 4301 si = t->next; 4302 } 4303 4304 /* close current linelock */ 4305 lv->length = n; 4306 dtlck->index++; 4307 4308 *dtlock = dtlck; 4309 4310 /* update freelist */ 4311 if (freecnt == 0) 4312 return; 4313 t->next = p->header.freelist; 4314 p->header.freelist = fsi; 4315 p->header.freecnt += freecnt; 4316} 4317 4318 4319/* 4320 * dtLinelockFreelist() 4321 */ 4322static void dtLinelockFreelist(dtpage_t * p, /* directory page */ 4323 int m, /* max slot index */ 4324 struct dt_lock ** dtlock) 4325{ 4326 int fsi; /* free entry slot index */ 4327 struct dtslot *t; 4328 int si; 4329 struct dt_lock *dtlck = *dtlock; 4330 struct lv *lv; 4331 int xsi, n; 4332 4333 /* get free entry slot index */ 4334 fsi = p->header.freelist; 4335 4336 /* open new linelock */ 4337 if (dtlck->index >= dtlck->maxcnt) 4338 dtlck = (struct dt_lock *) txLinelock(dtlck); 4339 lv = & dtlck->lv[dtlck->index]; 4340 4341 lv->offset = fsi; 4342 4343 n = 1; 4344 xsi = fsi; 4345 4346 t = &p->slot[fsi]; 4347 si = t->next; 4348 4349 /* find the last/only segment */ 4350 while (si < m && si >= 0) { 4351 /* is next slot contiguous ? */ 4352 if (si != xsi + 1) { 4353 /* close current linelock */ 4354 lv->length = n; 4355 dtlck->index++; 4356 4357 /* open new linelock */ 4358 if (dtlck->index < dtlck->maxcnt) 4359 lv++; 4360 else { 4361 dtlck = (struct dt_lock *) txLinelock(dtlck); 4362 lv = & dtlck->lv[0]; 4363 } 4364 4365 lv->offset = si; 4366 n = 0; 4367 } 4368 4369 n++; 4370 xsi = si; 4371 4372 t = &p->slot[si]; 4373 si = t->next; 4374 } 4375 4376 /* close current linelock */ 4377 lv->length = n; 4378 dtlck->index++; 4379 4380 *dtlock = dtlck; 4381} 4382 4383 4384/* 4385 * NAME: dtModify 4386 * 4387 * FUNCTION: Modify the inode number part of a directory entry 4388 * 4389 * PARAMETERS: 4390 * tid - Transaction id 4391 * ip - Inode of parent directory 4392 * key - Name of entry to be modified 4393 * orig_ino - Original inode number expected in entry 4394 * new_ino - New inode number to put into entry 4395 * flag - JFS_RENAME 4396 * 4397 * RETURNS: 4398 * ESTALE - If entry found does not match orig_ino passed in 4399 * ENOENT - If no entry can be found to match key 4400 * 0 - If successfully modified entry 4401 */ 4402int dtModify(tid_t tid, struct inode *ip, 4403 struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag) 4404{ 4405 int rc; 4406 s64 bn; 4407 struct metapage *mp; 4408 dtpage_t *p; 4409 int index; 4410 struct btstack btstack; 4411 struct tlock *tlck; 4412 struct dt_lock *dtlck; 4413 struct lv *lv; 4414 s8 *stbl; 4415 int entry_si; /* entry slot index */ 4416 struct ldtentry *entry; 4417 4418 /* 4419 * search for the entry to modify: 4420 * 4421 * dtSearch() returns (leaf page pinned, index at which to modify). 4422 */ 4423 if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag))) 4424 return rc; 4425 4426 /* retrieve search result */ 4427 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4428 4429 BT_MARK_DIRTY(mp, ip); 4430 /* 4431 * acquire a transaction lock on the leaf page of named entry 4432 */ 4433 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 4434 dtlck = (struct dt_lock *) & tlck->lock; 4435 4436 /* get slot index of the entry */ 4437 stbl = DT_GETSTBL(p); 4438 entry_si = stbl[index]; 4439 4440 /* linelock entry */ 4441 ASSERT(dtlck->index == 0); 4442 lv = & dtlck->lv[0]; 4443 lv->offset = entry_si; 4444 lv->length = 1; 4445 dtlck->index++; 4446 4447 /* get the head/only segment */ 4448 entry = (struct ldtentry *) & p->slot[entry_si]; 4449 4450 /* substitute the inode number of the entry */ 4451 entry->inumber = cpu_to_le32(new_ino); 4452 4453 /* unpin the leaf page */ 4454 DT_PUTPAGE(mp); 4455 4456 return 0; 4457} 4458 4459#ifdef _JFS_DEBUG_DTREE 4460/* 4461 * dtDisplayTree() 4462 * 4463 * function: traverse forward 4464 */ 4465int dtDisplayTree(struct inode *ip) 4466{ 4467 int rc; 4468 struct metapage *mp; 4469 dtpage_t *p; 4470 s64 bn, pbn; 4471 int index, lastindex, v, h; 4472 pxd_t *xd; 4473 struct btstack btstack; 4474 struct btframe *btsp; 4475 struct btframe *parent; 4476 u8 *stbl; 4477 int psize = 256; 4478 4479 printk("display B+-tree.\n"); 4480 4481 /* clear stack */ 4482 btsp = btstack.stack; 4483 4484 /* 4485 * start with root 4486 * 4487 * root resides in the inode 4488 */ 4489 bn = 0; 4490 v = h = 0; 4491 4492 /* 4493 * first access of each page: 4494 */ 4495 newPage: 4496 DT_GETPAGE(ip, bn, mp, psize, p, rc); 4497 if (rc) 4498 return rc; 4499 4500 /* process entries forward from first index */ 4501 index = 0; 4502 lastindex = p->header.nextindex - 1; 4503 4504 if (p->header.flag & BT_INTERNAL) { 4505 /* 4506 * first access of each internal page 4507 */ 4508 printf("internal page "); 4509 dtDisplayPage(ip, bn, p); 4510 4511 goto getChild; 4512 } else { /* (p->header.flag & BT_LEAF) */ 4513 4514 /* 4515 * first access of each leaf page 4516 */ 4517 printf("leaf page "); 4518 dtDisplayPage(ip, bn, p); 4519 4520 /* 4521 * process leaf page entries 4522 * 4523 for ( ; index <= lastindex; index++) 4524 { 4525 } 4526 */ 4527 4528 /* unpin the leaf page */ 4529 DT_PUTPAGE(mp); 4530 } 4531 4532 /* 4533 * go back up to the parent page 4534 */ 4535 getParent: 4536 /* pop/restore parent entry for the current child page */ 4537 if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL) 4538 /* current page must have been root */ 4539 return; 4540 4541 /* 4542 * parent page scan completed 4543 */ 4544 if ((index = parent->index) == (lastindex = parent->lastindex)) { 4545 /* go back up to the parent page */ 4546 goto getParent; 4547 } 4548 4549 /* 4550 * parent page has entries remaining 4551 */ 4552 /* get back the parent page */ 4553 bn = parent->bn; 4554 /* v = parent->level; */ 4555 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4556 if (rc) 4557 return rc; 4558 4559 /* get next parent entry */ 4560 index++; 4561 4562 /* 4563 * internal page: go down to child page of current entry 4564 */ 4565 getChild: 4566 /* push/save current parent entry for the child page */ 4567 btsp->bn = pbn = bn; 4568 btsp->index = index; 4569 btsp->lastindex = lastindex; 4570 /* btsp->level = v; */ 4571 /* btsp->node = h; */ 4572 ++btsp; 4573 4574 /* get current entry for the child page */ 4575 stbl = DT_GETSTBL(p); 4576 xd = (pxd_t *) & p->slot[stbl[index]]; 4577 4578 /* 4579 * first access of each internal entry: 4580 */ 4581 4582 /* get child page */ 4583 bn = addressPXD(xd); 4584 psize = lengthPXD(xd) << ip->i_ipmnt->i_l2bsize; 4585 4586 printk("traverse down 0x%Lx[%d]->0x%Lx\n", pbn, index, bn); 4587 v++; 4588 h = index; 4589 4590 /* release parent page */ 4591 DT_PUTPAGE(mp); 4592 4593 /* process the child page */ 4594 goto newPage; 4595} 4596 4597 4598/* 4599 * dtDisplayPage() 4600 * 4601 * function: display page 4602 */ 4603int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p) 4604{ 4605 int rc; 4606 struct metapage *mp; 4607 struct ldtentry *lh; 4608 struct idtentry *ih; 4609 pxd_t *xd; 4610 int i, j; 4611 u8 *stbl; 4612 wchar_t name[JFS_NAME_MAX + 1]; 4613 struct component_name key = { 0, name }; 4614 int freepage = 0; 4615 4616 if (p == NULL) { 4617 freepage = 1; 4618 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4619 if (rc) 4620 return rc; 4621 } 4622 4623 /* display page control */ 4624 printk("bn:0x%Lx flag:0x%08x nextindex:%d\n", 4625 bn, p->header.flag, p->header.nextindex); 4626 4627 /* display entries */ 4628 stbl = DT_GETSTBL(p); 4629 for (i = 0, j = 1; i < p->header.nextindex; i++, j++) { 4630 dtGetKey(p, i, &key, JFS_SBI(ip->i_sb)->mntflag); 4631 key.name[key.namlen] = '\0'; 4632 if (p->header.flag & BT_LEAF) { 4633 lh = (struct ldtentry *) & p->slot[stbl[i]]; 4634 printf("\t[%d] %s:%d", i, key.name, 4635 le32_to_cpu(lh->inumber)); 4636 } else { 4637 ih = (struct idtentry *) & p->slot[stbl[i]]; 4638 xd = (pxd_t *) ih; 4639 bn = addressPXD(xd); 4640 printf("\t[%d] %s:0x%Lx", i, key.name, bn); 4641 } 4642 4643 if (j == 4) { 4644 printf("\n"); 4645 j = 0; 4646 } 4647 } 4648 4649 printf("\n"); 4650 4651 if (freepage) 4652 DT_PUTPAGE(mp); 4653 4654 return 0; 4655} 4656#endif /* _JFS_DEBUG_DTREE */ 4657