1/* 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 3 */ 4 5/* Now we have all buffers that must be used in balancing of the tree */ 6/* Further calculations can not cause schedule(), and thus the buffer */ 7/* tree will be stable until the balancing will be finished */ 8/* balance the tree according to the analysis made before, */ 9/* and using buffers obtained after all above. */ 10 11/** 12 ** balance_leaf_when_delete 13 ** balance_leaf 14 ** do_balance 15 ** 16 **/ 17 18#include <asm/uaccess.h> 19#include <linux/time.h> 20#include <linux/reiserfs_fs.h> 21#include <linux/buffer_head.h> 22#include <linux/kernel.h> 23 24static inline void buffer_info_init_left(struct tree_balance *tb, 25 struct buffer_info *bi) 26{ 27 bi->tb = tb; 28 bi->bi_bh = tb->L[0]; 29 bi->bi_parent = tb->FL[0]; 30 bi->bi_position = get_left_neighbor_position(tb, 0); 31} 32 33static inline void buffer_info_init_right(struct tree_balance *tb, 34 struct buffer_info *bi) 35{ 36 bi->tb = tb; 37 bi->bi_bh = tb->R[0]; 38 bi->bi_parent = tb->FR[0]; 39 bi->bi_position = get_right_neighbor_position(tb, 0); 40} 41 42static inline void buffer_info_init_tbS0(struct tree_balance *tb, 43 struct buffer_info *bi) 44{ 45 bi->tb = tb; 46 bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); 47 bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); 48 bi->bi_position = PATH_H_POSITION(tb->tb_path, 1); 49} 50 51static inline void buffer_info_init_bh(struct tree_balance *tb, 52 struct buffer_info *bi, 53 struct buffer_head *bh) 54{ 55 bi->tb = tb; 56 bi->bi_bh = bh; 57 bi->bi_parent = NULL; 58 bi->bi_position = 0; 59} 60 61inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, 62 struct buffer_head *bh, int flag) 63{ 64 journal_mark_dirty(tb->transaction_handle, 65 tb->transaction_handle->t_super, bh); 66} 67 68#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty 69#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty 70 71/* summary: 72 if deleting something ( tb->insert_size[0] < 0 ) 73 return(balance_leaf_when_delete()); (flag d handled here) 74 else 75 if lnum is larger than 0 we put items into the left node 76 if rnum is larger than 0 we put items into the right node 77 if snum1 is larger than 0 we put items into the new node s1 78 if snum2 is larger than 0 we put items into the new node s2 79Note that all *num* count new items being created. 80 81It would be easier to read balance_leaf() if each of these summary 82lines was a separate procedure rather than being inlined. I think 83that there are many passages here and in balance_leaf_when_delete() in 84which two calls to one procedure can replace two passages, and it 85might save cache space and improve software maintenance costs to do so. 86 87Vladimir made the perceptive comment that we should offload most of 88the decision making in this function into fix_nodes/check_balance, and 89then create some sort of structure in tb that says what actions should 90be performed by do_balance. 91 92-Hans */ 93 94/* Balance leaf node in case of delete or cut: insert_size[0] < 0 95 * 96 * lnum, rnum can have values >= -1 97 * -1 means that the neighbor must be joined with S 98 * 0 means that nothing should be done with the neighbor 99 * >0 means to shift entirely or partly the specified number of items to the neighbor 100 */ 101static int balance_leaf_when_delete(struct tree_balance *tb, int flag) 102{ 103 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 104 int item_pos = PATH_LAST_POSITION(tb->tb_path); 105 int pos_in_item = tb->tb_path->pos_in_item; 106 struct buffer_info bi; 107 int n; 108 struct item_head *ih; 109 110 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1, 111 "vs- 12000: level: wrong FR %z", tb->FR[0]); 112 RFALSE(tb->blknum[0] > 1, 113 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]); 114 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0), 115 "PAP-12010: tree can not be empty"); 116 117 ih = B_N_PITEM_HEAD(tbS0, item_pos); 118 buffer_info_init_tbS0(tb, &bi); 119 120 /* Delete or truncate the item */ 121 122 switch (flag) { 123 case M_DELETE: /* delete item in S[0] */ 124 125 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], 126 "vs-12013: mode Delete, insert size %d, ih to be deleted %h", 127 -tb->insert_size[0], ih); 128 129 leaf_delete_items(&bi, 0, item_pos, 1, -1); 130 131 if (!item_pos && tb->CFL[0]) { 132 if (B_NR_ITEMS(tbS0)) { 133 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 134 0); 135 } else { 136 if (!PATH_H_POSITION(tb->tb_path, 1)) 137 replace_key(tb, tb->CFL[0], tb->lkey[0], 138 PATH_H_PPARENT(tb->tb_path, 139 0), 0); 140 } 141 } 142 143 RFALSE(!item_pos && !tb->CFL[0], 144 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], 145 tb->L[0]); 146 147 break; 148 149 case M_CUT:{ /* cut item in S[0] */ 150 if (is_direntry_le_ih(ih)) { 151 152 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ 153 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ 154 tb->insert_size[0] = -1; 155 leaf_cut_from_buffer(&bi, item_pos, pos_in_item, 156 -tb->insert_size[0]); 157 158 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0], 159 "PAP-12030: can not change delimiting key. CFL[0]=%p", 160 tb->CFL[0]); 161 162 if (!item_pos && !pos_in_item && tb->CFL[0]) { 163 replace_key(tb, tb->CFL[0], tb->lkey[0], 164 tbS0, 0); 165 } 166 } else { 167 leaf_cut_from_buffer(&bi, item_pos, pos_in_item, 168 -tb->insert_size[0]); 169 170 RFALSE(!ih_item_len(ih), 171 "PAP-12035: cut must leave non-zero dynamic length of item"); 172 } 173 break; 174 } 175 176 default: 177 print_cur_tb("12040"); 178 reiserfs_panic(tb->tb_sb, "PAP-12040", 179 "unexpected mode: %s(%d)", 180 (flag == 181 M_PASTE) ? "PASTE" : ((flag == 182 M_INSERT) ? "INSERT" : 183 "UNKNOWN"), flag); 184 } 185 186 /* the rule is that no shifting occurs unless by shifting a node can be freed */ 187 n = B_NR_ITEMS(tbS0); 188 if (tb->lnum[0]) { /* L[0] takes part in balancing */ 189 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */ 190 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */ 191 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { 192 /* all contents of all the 3 buffers will be in L[0] */ 193 if (PATH_H_POSITION(tb->tb_path, 1) == 0 194 && 1 < B_NR_ITEMS(tb->FR[0])) 195 replace_key(tb, tb->CFL[0], 196 tb->lkey[0], 197 tb->FR[0], 1); 198 199 leaf_move_items(LEAF_FROM_S_TO_L, tb, n, 200 -1, NULL); 201 leaf_move_items(LEAF_FROM_R_TO_L, tb, 202 B_NR_ITEMS(tb->R[0]), 203 -1, NULL); 204 205 reiserfs_invalidate_buffer(tb, tbS0); 206 reiserfs_invalidate_buffer(tb, 207 tb->R[0]); 208 209 return 0; 210 } 211 /* all contents of all the 3 buffers will be in R[0] */ 212 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, 213 NULL); 214 leaf_move_items(LEAF_FROM_L_TO_R, tb, 215 B_NR_ITEMS(tb->L[0]), -1, NULL); 216 217 /* right_delimiting_key is correct in R[0] */ 218 replace_key(tb, tb->CFR[0], tb->rkey[0], 219 tb->R[0], 0); 220 221 reiserfs_invalidate_buffer(tb, tbS0); 222 reiserfs_invalidate_buffer(tb, tb->L[0]); 223 224 return -1; 225 } 226 227 RFALSE(tb->rnum[0] != 0, 228 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); 229 /* all contents of L[0] and S[0] will be in L[0] */ 230 leaf_shift_left(tb, n, -1); 231 232 reiserfs_invalidate_buffer(tb, tbS0); 233 234 return 0; 235 } 236 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */ 237 238 RFALSE((tb->lnum[0] + tb->rnum[0] < n) || 239 (tb->lnum[0] + tb->rnum[0] > n + 1), 240 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent", 241 tb->rnum[0], tb->lnum[0], n); 242 RFALSE((tb->lnum[0] + tb->rnum[0] == n) && 243 (tb->lbytes != -1 || tb->rbytes != -1), 244 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split", 245 tb->rbytes, tb->lbytes); 246 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) && 247 (tb->lbytes < 1 || tb->rbytes != -1), 248 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split", 249 tb->rbytes, tb->lbytes); 250 251 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 252 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 253 254 reiserfs_invalidate_buffer(tb, tbS0); 255 256 return 0; 257 } 258 259 if (tb->rnum[0] == -1) { 260 /* all contents of R[0] and S[0] will be in R[0] */ 261 leaf_shift_right(tb, n, -1); 262 reiserfs_invalidate_buffer(tb, tbS0); 263 return 0; 264 } 265 266 RFALSE(tb->rnum[0], 267 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); 268 return 0; 269} 270 271static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */ 272 const char *body, /* body of inserted item or bytes to paste */ 273 int flag, /* i - insert, d - delete, c - cut, p - paste 274 (see comment to do_balance) */ 275 struct item_head *insert_key, /* in our processing of one level we sometimes determine what 276 must be inserted into the next higher level. This insertion 277 consists of a key or two keys and their corresponding 278 pointers */ 279 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */ 280 ) 281{ 282 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 283 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0] 284 of the affected item */ 285 struct buffer_info bi; 286 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ 287 int snum[2]; /* number of items that will be placed 288 into S_new (includes partially shifted 289 items) */ 290 int sbytes[2]; /* if an item is partially shifted into S_new then 291 if it is a directory item 292 it is the number of entries from the item that are shifted into S_new 293 else 294 it is the number of bytes from the item that are shifted into S_new 295 */ 296 int n, i; 297 int ret_val; 298 int pos_in_item; 299 int zeros_num; 300 301 PROC_INFO_INC(tb->tb_sb, balance_at[0]); 302 303 /* Make balance in case insert_size[0] < 0 */ 304 if (tb->insert_size[0] < 0) 305 return balance_leaf_when_delete(tb, flag); 306 307 zeros_num = 0; 308 if (flag == M_INSERT && !body) 309 zeros_num = ih_item_len(ih); 310 311 pos_in_item = tb->tb_path->pos_in_item; 312 /* for indirect item pos_in_item is measured in unformatted node 313 pointers. Recalculate to bytes */ 314 if (flag != M_INSERT 315 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) 316 pos_in_item *= UNFM_P_SIZE; 317 318 if (tb->lnum[0] > 0) { 319 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ 320 if (item_pos < tb->lnum[0]) { 321 /* new item or it part falls to L[0], shift it too */ 322 n = B_NR_ITEMS(tb->L[0]); 323 324 switch (flag) { 325 case M_INSERT: /* insert item into L[0] */ 326 327 if (item_pos == tb->lnum[0] - 1 328 && tb->lbytes != -1) { 329 /* part of new item falls into L[0] */ 330 int new_item_len; 331 int version; 332 333 ret_val = 334 leaf_shift_left(tb, tb->lnum[0] - 1, 335 -1); 336 337 /* Calculate item length to insert to S[0] */ 338 new_item_len = 339 ih_item_len(ih) - tb->lbytes; 340 /* Calculate and check item length to insert to L[0] */ 341 put_ih_item_len(ih, 342 ih_item_len(ih) - 343 new_item_len); 344 345 RFALSE(ih_item_len(ih) <= 0, 346 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d", 347 ih_item_len(ih)); 348 349 /* Insert new item into L[0] */ 350 buffer_info_init_left(tb, &bi); 351 leaf_insert_into_buf(&bi, 352 n + item_pos - 353 ret_val, ih, body, 354 zeros_num > 355 ih_item_len(ih) ? 356 ih_item_len(ih) : 357 zeros_num); 358 359 version = ih_version(ih); 360 361 /* Calculate key component, item length and body to insert into S[0] */ 362 set_le_ih_k_offset(ih, 363 le_ih_k_offset(ih) + 364 (tb-> 365 lbytes << 366 (is_indirect_le_ih 367 (ih) ? tb->tb_sb-> 368 s_blocksize_bits - 369 UNFM_P_SHIFT : 370 0))); 371 372 put_ih_item_len(ih, new_item_len); 373 if (tb->lbytes > zeros_num) { 374 body += 375 (tb->lbytes - zeros_num); 376 zeros_num = 0; 377 } else 378 zeros_num -= tb->lbytes; 379 380 RFALSE(ih_item_len(ih) <= 0, 381 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d", 382 ih_item_len(ih)); 383 } else { 384 /* new item in whole falls into L[0] */ 385 /* Shift lnum[0]-1 items to L[0] */ 386 ret_val = 387 leaf_shift_left(tb, tb->lnum[0] - 1, 388 tb->lbytes); 389 /* Insert new item into L[0] */ 390 buffer_info_init_left(tb, &bi); 391 leaf_insert_into_buf(&bi, 392 n + item_pos - 393 ret_val, ih, body, 394 zeros_num); 395 tb->insert_size[0] = 0; 396 zeros_num = 0; 397 } 398 break; 399 400 case M_PASTE: /* append item in L[0] */ 401 402 if (item_pos == tb->lnum[0] - 1 403 && tb->lbytes != -1) { 404 /* we must shift the part of the appended item */ 405 if (is_direntry_le_ih 406 (B_N_PITEM_HEAD(tbS0, item_pos))) { 407 408 RFALSE(zeros_num, 409 "PAP-12090: invalid parameter in case of a directory"); 410 /* directory item */ 411 if (tb->lbytes > pos_in_item) { 412 /* new directory entry falls into L[0] */ 413 struct item_head 414 *pasted; 415 int l_pos_in_item = 416 pos_in_item; 417 418 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ 419 ret_val = 420 leaf_shift_left(tb, 421 tb-> 422 lnum 423 [0], 424 tb-> 425 lbytes 426 - 427 1); 428 if (ret_val 429 && !item_pos) { 430 pasted = 431 B_N_PITEM_HEAD 432 (tb->L[0], 433 B_NR_ITEMS 434 (tb-> 435 L[0]) - 436 1); 437 l_pos_in_item += 438 I_ENTRY_COUNT 439 (pasted) - 440 (tb-> 441 lbytes - 442 1); 443 } 444 445 /* Append given directory entry to directory item */ 446 buffer_info_init_left(tb, &bi); 447 leaf_paste_in_buffer 448 (&bi, 449 n + item_pos - 450 ret_val, 451 l_pos_in_item, 452 tb->insert_size[0], 453 body, zeros_num); 454 455 /* previous string prepared space for pasting new entry, following string pastes this entry */ 456 457 /* when we have merge directory item, pos_in_item has been changed too */ 458 459 /* paste new directory entry. 1 is entry number */ 460 leaf_paste_entries(&bi, 461 n + 462 item_pos 463 - 464 ret_val, 465 l_pos_in_item, 466 1, 467 (struct 468 reiserfs_de_head 469 *) 470 body, 471 body 472 + 473 DEH_SIZE, 474 tb-> 475 insert_size 476 [0] 477 ); 478 tb->insert_size[0] = 0; 479 } else { 480 /* new directory item doesn't fall into L[0] */ 481 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */ 482 leaf_shift_left(tb, 483 tb-> 484 lnum[0], 485 tb-> 486 lbytes); 487 } 488 /* Calculate new position to append in item body */ 489 pos_in_item -= tb->lbytes; 490 } else { 491 /* regular object */ 492 RFALSE(tb->lbytes <= 0, 493 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", 494 tb->lbytes); 495 RFALSE(pos_in_item != 496 ih_item_len 497 (B_N_PITEM_HEAD 498 (tbS0, item_pos)), 499 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d", 500 ih_item_len 501 (B_N_PITEM_HEAD 502 (tbS0, item_pos)), 503 pos_in_item); 504 505 if (tb->lbytes >= pos_in_item) { 506 /* appended item will be in L[0] in whole */ 507 int l_n; 508 509 /* this bytes number must be appended to the last item of L[h] */ 510 l_n = 511 tb->lbytes - 512 pos_in_item; 513 514 /* Calculate new insert_size[0] */ 515 tb->insert_size[0] -= 516 l_n; 517 518 RFALSE(tb-> 519 insert_size[0] <= 520 0, 521 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d", 522 tb-> 523 insert_size[0]); 524 ret_val = 525 leaf_shift_left(tb, 526 tb-> 527 lnum 528 [0], 529 ih_item_len 530 (B_N_PITEM_HEAD 531 (tbS0, 532 item_pos))); 533 /* Append to body of item in L[0] */ 534 buffer_info_init_left(tb, &bi); 535 leaf_paste_in_buffer 536 (&bi, 537 n + item_pos - 538 ret_val, 539 ih_item_len 540 (B_N_PITEM_HEAD 541 (tb->L[0], 542 n + item_pos - 543 ret_val)), l_n, 544 body, 545 zeros_num > 546 l_n ? l_n : 547 zeros_num); 548 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */ 549 { 550 int version; 551 int temp_l = 552 l_n; 553 554 RFALSE 555 (ih_item_len 556 (B_N_PITEM_HEAD 557 (tbS0, 558 0)), 559 "PAP-12106: item length must be 0"); 560 RFALSE 561 (comp_short_le_keys 562 (B_N_PKEY 563 (tbS0, 0), 564 B_N_PKEY 565 (tb->L[0], 566 n + 567 item_pos 568 - 569 ret_val)), 570 "PAP-12107: items must be of the same file"); 571 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) { 572 temp_l = 573 l_n 574 << 575 (tb-> 576 tb_sb-> 577 s_blocksize_bits 578 - 579 UNFM_P_SHIFT); 580 } 581 /* update key of first item in S0 */ 582 version = 583 ih_version 584 (B_N_PITEM_HEAD 585 (tbS0, 0)); 586 set_le_key_k_offset 587 (version, 588 B_N_PKEY 589 (tbS0, 0), 590 le_key_k_offset 591 (version, 592 B_N_PKEY 593 (tbS0, 594 0)) + 595 temp_l); 596 /* update left delimiting key */ 597 set_le_key_k_offset 598 (version, 599 B_N_PDELIM_KEY 600 (tb-> 601 CFL[0], 602 tb-> 603 lkey[0]), 604 le_key_k_offset 605 (version, 606 B_N_PDELIM_KEY 607 (tb-> 608 CFL[0], 609 tb-> 610 lkey[0])) 611 + temp_l); 612 } 613 614 /* Calculate new body, position in item and insert_size[0] */ 615 if (l_n > zeros_num) { 616 body += 617 (l_n - 618 zeros_num); 619 zeros_num = 0; 620 } else 621 zeros_num -= 622 l_n; 623 pos_in_item = 0; 624 625 RFALSE 626 (comp_short_le_keys 627 (B_N_PKEY(tbS0, 0), 628 B_N_PKEY(tb->L[0], 629 B_NR_ITEMS 630 (tb-> 631 L[0]) - 632 1)) 633 || 634 !op_is_left_mergeable 635 (B_N_PKEY(tbS0, 0), 636 tbS0->b_size) 637 || 638 !op_is_left_mergeable 639 (B_N_PDELIM_KEY 640 (tb->CFL[0], 641 tb->lkey[0]), 642 tbS0->b_size), 643 "PAP-12120: item must be merge-able with left neighboring item"); 644 } else { /* only part of the appended item will be in L[0] */ 645 646 /* Calculate position in item for append in S[0] */ 647 pos_in_item -= 648 tb->lbytes; 649 650 RFALSE(pos_in_item <= 0, 651 "PAP-12125: no place for paste. pos_in_item=%d", 652 pos_in_item); 653 654 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 655 leaf_shift_left(tb, 656 tb-> 657 lnum[0], 658 tb-> 659 lbytes); 660 } 661 } 662 } else { /* appended item will be in L[0] in whole */ 663 664 struct item_head *pasted; 665 666 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */ 667 /* then increment pos_in_item by the size of the last item in L[0] */ 668 pasted = 669 B_N_PITEM_HEAD(tb->L[0], 670 n - 1); 671 if (is_direntry_le_ih(pasted)) 672 pos_in_item += 673 ih_entry_count 674 (pasted); 675 else 676 pos_in_item += 677 ih_item_len(pasted); 678 } 679 680 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 681 ret_val = 682 leaf_shift_left(tb, tb->lnum[0], 683 tb->lbytes); 684 /* Append to body of item in L[0] */ 685 buffer_info_init_left(tb, &bi); 686 leaf_paste_in_buffer(&bi, 687 n + item_pos - 688 ret_val, 689 pos_in_item, 690 tb->insert_size[0], 691 body, zeros_num); 692 693 /* if appended item is directory, paste entry */ 694 pasted = 695 B_N_PITEM_HEAD(tb->L[0], 696 n + item_pos - 697 ret_val); 698 if (is_direntry_le_ih(pasted)) 699 leaf_paste_entries(&bi, 700 n + 701 item_pos - 702 ret_val, 703 pos_in_item, 704 1, 705 (struct 706 reiserfs_de_head 707 *)body, 708 body + 709 DEH_SIZE, 710 tb-> 711 insert_size 712 [0] 713 ); 714 /* if appended item is indirect item, put unformatted node into un list */ 715 if (is_indirect_le_ih(pasted)) 716 set_ih_free_space(pasted, 0); 717 tb->insert_size[0] = 0; 718 zeros_num = 0; 719 } 720 break; 721 default: /* cases d and t */ 722 reiserfs_panic(tb->tb_sb, "PAP-12130", 723 "lnum > 0: unexpected mode: " 724 " %s(%d)", 725 (flag == 726 M_DELETE) ? "DELETE" : ((flag == 727 M_CUT) 728 ? "CUT" 729 : 730 "UNKNOWN"), 731 flag); 732 } 733 } else { 734 /* new item doesn't fall into L[0] */ 735 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 736 } 737 } 738 739 /* tb->lnum[0] > 0 */ 740 /* Calculate new item position */ 741 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0)); 742 743 if (tb->rnum[0] > 0) { 744 /* shift rnum[0] items from S[0] to the right neighbor R[0] */ 745 n = B_NR_ITEMS(tbS0); 746 switch (flag) { 747 748 case M_INSERT: /* insert item */ 749 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */ 750 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */ 751 loff_t old_key_comp, old_len, 752 r_zeros_number; 753 const char *r_body; 754 int version; 755 loff_t offset; 756 757 leaf_shift_right(tb, tb->rnum[0] - 1, 758 -1); 759 760 version = ih_version(ih); 761 /* Remember key component and item length */ 762 old_key_comp = le_ih_k_offset(ih); 763 old_len = ih_item_len(ih); 764 765 /* Calculate key component and item length to insert into R[0] */ 766 offset = 767 le_ih_k_offset(ih) + 768 ((old_len - 769 tb-> 770 rbytes) << (is_indirect_le_ih(ih) 771 ? tb->tb_sb-> 772 s_blocksize_bits - 773 UNFM_P_SHIFT : 0)); 774 set_le_ih_k_offset(ih, offset); 775 put_ih_item_len(ih, tb->rbytes); 776 /* Insert part of the item into R[0] */ 777 buffer_info_init_right(tb, &bi); 778 if ((old_len - tb->rbytes) > zeros_num) { 779 r_zeros_number = 0; 780 r_body = 781 body + (old_len - 782 tb->rbytes) - 783 zeros_num; 784 } else { 785 r_body = body; 786 r_zeros_number = 787 zeros_num - (old_len - 788 tb->rbytes); 789 zeros_num -= r_zeros_number; 790 } 791 792 leaf_insert_into_buf(&bi, 0, ih, r_body, 793 r_zeros_number); 794 795 /* Replace right delimiting key by first key in R[0] */ 796 replace_key(tb, tb->CFR[0], tb->rkey[0], 797 tb->R[0], 0); 798 799 /* Calculate key component and item length to insert into S[0] */ 800 set_le_ih_k_offset(ih, old_key_comp); 801 put_ih_item_len(ih, 802 old_len - tb->rbytes); 803 804 tb->insert_size[0] -= tb->rbytes; 805 806 } else { /* whole new item falls into R[0] */ 807 808 /* Shift rnum[0]-1 items to R[0] */ 809 ret_val = 810 leaf_shift_right(tb, 811 tb->rnum[0] - 1, 812 tb->rbytes); 813 /* Insert new item into R[0] */ 814 buffer_info_init_right(tb, &bi); 815 leaf_insert_into_buf(&bi, 816 item_pos - n + 817 tb->rnum[0] - 1, 818 ih, body, 819 zeros_num); 820 821 if (item_pos - n + tb->rnum[0] - 1 == 0) { 822 replace_key(tb, tb->CFR[0], 823 tb->rkey[0], 824 tb->R[0], 0); 825 826 } 827 zeros_num = tb->insert_size[0] = 0; 828 } 829 } else { /* new item or part of it doesn't fall into R[0] */ 830 831 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 832 } 833 break; 834 835 case M_PASTE: /* append item */ 836 837 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */ 838 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */ 839 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */ 840 int entry_count; 841 842 RFALSE(zeros_num, 843 "PAP-12145: invalid parameter in case of a directory"); 844 entry_count = 845 I_ENTRY_COUNT(B_N_PITEM_HEAD 846 (tbS0, 847 item_pos)); 848 if (entry_count - tb->rbytes < 849 pos_in_item) 850 /* new directory entry falls into R[0] */ 851 { 852 int paste_entry_position; 853 854 RFALSE(tb->rbytes - 1 >= 855 entry_count 856 || !tb-> 857 insert_size[0], 858 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d", 859 tb->rbytes, 860 entry_count); 861 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */ 862 leaf_shift_right(tb, 863 tb-> 864 rnum 865 [0], 866 tb-> 867 rbytes 868 - 1); 869 /* Paste given directory entry to directory item */ 870 paste_entry_position = 871 pos_in_item - 872 entry_count + 873 tb->rbytes - 1; 874 buffer_info_init_right(tb, &bi); 875 leaf_paste_in_buffer 876 (&bi, 0, 877 paste_entry_position, 878 tb->insert_size[0], 879 body, zeros_num); 880 /* paste entry */ 881 leaf_paste_entries(&bi, 882 0, 883 paste_entry_position, 884 1, 885 (struct 886 reiserfs_de_head 887 *) 888 body, 889 body 890 + 891 DEH_SIZE, 892 tb-> 893 insert_size 894 [0] 895 ); 896 897 if (paste_entry_position 898 == 0) { 899 /* change delimiting keys */ 900 replace_key(tb, 901 tb-> 902 CFR 903 [0], 904 tb-> 905 rkey 906 [0], 907 tb-> 908 R 909 [0], 910 0); 911 } 912 913 tb->insert_size[0] = 0; 914 pos_in_item++; 915 } else { /* new directory entry doesn't fall into R[0] */ 916 917 leaf_shift_right(tb, 918 tb-> 919 rnum 920 [0], 921 tb-> 922 rbytes); 923 } 924 } else { /* regular object */ 925 926 int n_shift, n_rem, 927 r_zeros_number; 928 const char *r_body; 929 930 /* Calculate number of bytes which must be shifted from appended item */ 931 if ((n_shift = 932 tb->rbytes - 933 tb->insert_size[0]) < 0) 934 n_shift = 0; 935 936 RFALSE(pos_in_item != 937 ih_item_len 938 (B_N_PITEM_HEAD 939 (tbS0, item_pos)), 940 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d", 941 pos_in_item, 942 ih_item_len 943 (B_N_PITEM_HEAD 944 (tbS0, item_pos))); 945 946 leaf_shift_right(tb, 947 tb->rnum[0], 948 n_shift); 949 /* Calculate number of bytes which must remain in body after appending to R[0] */ 950 if ((n_rem = 951 tb->insert_size[0] - 952 tb->rbytes) < 0) 953 n_rem = 0; 954 955 { 956 int version; 957 unsigned long temp_rem = 958 n_rem; 959 960 version = 961 ih_version 962 (B_N_PITEM_HEAD 963 (tb->R[0], 0)); 964 if (is_indirect_le_key 965 (version, 966 B_N_PKEY(tb->R[0], 967 0))) { 968 temp_rem = 969 n_rem << 970 (tb->tb_sb-> 971 s_blocksize_bits 972 - 973 UNFM_P_SHIFT); 974 } 975 set_le_key_k_offset 976 (version, 977 B_N_PKEY(tb->R[0], 978 0), 979 le_key_k_offset 980 (version, 981 B_N_PKEY(tb->R[0], 982 0)) + 983 temp_rem); 984 set_le_key_k_offset 985 (version, 986 B_N_PDELIM_KEY(tb-> 987 CFR 988 [0], 989 tb-> 990 rkey 991 [0]), 992 le_key_k_offset 993 (version, 994 B_N_PDELIM_KEY 995 (tb->CFR[0], 996 tb->rkey[0])) + 997 temp_rem); 998 } 999/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; 1000 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ 1001 do_balance_mark_internal_dirty 1002 (tb, tb->CFR[0], 0); 1003 1004 /* Append part of body into R[0] */ 1005 buffer_info_init_right(tb, &bi); 1006 if (n_rem > zeros_num) { 1007 r_zeros_number = 0; 1008 r_body = 1009 body + n_rem - 1010 zeros_num; 1011 } else { 1012 r_body = body; 1013 r_zeros_number = 1014 zeros_num - n_rem; 1015 zeros_num -= 1016 r_zeros_number; 1017 } 1018 1019 leaf_paste_in_buffer(&bi, 0, 1020 n_shift, 1021 tb-> 1022 insert_size 1023 [0] - 1024 n_rem, 1025 r_body, 1026 r_zeros_number); 1027 1028 if (is_indirect_le_ih 1029 (B_N_PITEM_HEAD 1030 (tb->R[0], 0))) { 1031 set_ih_free_space 1032 (B_N_PITEM_HEAD 1033 (tb->R[0], 0), 0); 1034 } 1035 tb->insert_size[0] = n_rem; 1036 if (!n_rem) 1037 pos_in_item++; 1038 } 1039 } else { /* pasted item in whole falls into R[0] */ 1040 1041 struct item_head *pasted; 1042 1043 ret_val = 1044 leaf_shift_right(tb, tb->rnum[0], 1045 tb->rbytes); 1046 /* append item in R[0] */ 1047 if (pos_in_item >= 0) { 1048 buffer_info_init_right(tb, &bi); 1049 leaf_paste_in_buffer(&bi, 1050 item_pos - 1051 n + 1052 tb-> 1053 rnum[0], 1054 pos_in_item, 1055 tb-> 1056 insert_size 1057 [0], body, 1058 zeros_num); 1059 } 1060 1061 /* paste new entry, if item is directory item */ 1062 pasted = 1063 B_N_PITEM_HEAD(tb->R[0], 1064 item_pos - n + 1065 tb->rnum[0]); 1066 if (is_direntry_le_ih(pasted) 1067 && pos_in_item >= 0) { 1068 leaf_paste_entries(&bi, 1069 item_pos - 1070 n + 1071 tb->rnum[0], 1072 pos_in_item, 1073 1, 1074 (struct 1075 reiserfs_de_head 1076 *)body, 1077 body + 1078 DEH_SIZE, 1079 tb-> 1080 insert_size 1081 [0] 1082 ); 1083 if (!pos_in_item) { 1084 1085 RFALSE(item_pos - n + 1086 tb->rnum[0], 1087 "PAP-12165: directory item must be first item of node when pasting is in 0th position"); 1088 1089 /* update delimiting keys */ 1090 replace_key(tb, 1091 tb->CFR[0], 1092 tb->rkey[0], 1093 tb->R[0], 1094 0); 1095 } 1096 } 1097 1098 if (is_indirect_le_ih(pasted)) 1099 set_ih_free_space(pasted, 0); 1100 zeros_num = tb->insert_size[0] = 0; 1101 } 1102 } else { /* new item doesn't fall into R[0] */ 1103 1104 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 1105 } 1106 break; 1107 default: /* cases d and t */ 1108 reiserfs_panic(tb->tb_sb, "PAP-12175", 1109 "rnum > 0: unexpected mode: %s(%d)", 1110 (flag == 1111 M_DELETE) ? "DELETE" : ((flag == 1112 M_CUT) ? "CUT" 1113 : "UNKNOWN"), 1114 flag); 1115 } 1116 1117 } 1118 1119 /* tb->rnum[0] > 0 */ 1120 RFALSE(tb->blknum[0] > 3, 1121 "PAP-12180: blknum can not be %d. It must be <= 3", 1122 tb->blknum[0]); 1123 RFALSE(tb->blknum[0] < 0, 1124 "PAP-12185: blknum can not be %d. It must be >= 0", 1125 tb->blknum[0]); 1126 1127 /* if while adding to a node we discover that it is possible to split 1128 it in two, and merge the left part into the left neighbor and the 1129 right part into the right neighbor, eliminating the node */ 1130 if (tb->blknum[0] == 0) { /* node S[0] is empty now */ 1131 1132 RFALSE(!tb->lnum[0] || !tb->rnum[0], 1133 "PAP-12190: lnum and rnum must not be zero"); 1134 /* if insertion was done before 0-th position in R[0], right 1135 delimiting key of the tb->L[0]'s and left delimiting key are 1136 not set correctly */ 1137 if (tb->CFL[0]) { 1138 if (!tb->CFR[0]) 1139 reiserfs_panic(tb->tb_sb, "vs-12195", 1140 "CFR not initialized"); 1141 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), 1142 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])); 1143 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); 1144 } 1145 1146 reiserfs_invalidate_buffer(tb, tbS0); 1147 return 0; 1148 } 1149 1150 /* Fill new nodes that appear in place of S[0] */ 1151 1152 /* I am told that this copying is because we need an array to enable 1153 the looping code. -Hans */ 1154 snum[0] = tb->s1num, snum[1] = tb->s2num; 1155 sbytes[0] = tb->s1bytes; 1156 sbytes[1] = tb->s2bytes; 1157 for (i = tb->blknum[0] - 2; i >= 0; i--) { 1158 1159 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, 1160 snum[i]); 1161 1162 /* here we shift from S to S_new nodes */ 1163 1164 S_new[i] = get_FEB(tb); 1165 1166 /* initialized block type and tree level */ 1167 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL); 1168 1169 n = B_NR_ITEMS(tbS0); 1170 1171 switch (flag) { 1172 case M_INSERT: /* insert item */ 1173 1174 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */ 1175 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */ 1176 int old_key_comp, old_len, 1177 r_zeros_number; 1178 const char *r_body; 1179 int version; 1180 1181 /* Move snum[i]-1 items from S[0] to S_new[i] */ 1182 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1183 snum[i] - 1, -1, 1184 S_new[i]); 1185 /* Remember key component and item length */ 1186 version = ih_version(ih); 1187 old_key_comp = le_ih_k_offset(ih); 1188 old_len = ih_item_len(ih); 1189 1190 /* Calculate key component and item length to insert into S_new[i] */ 1191 set_le_ih_k_offset(ih, 1192 le_ih_k_offset(ih) + 1193 ((old_len - 1194 sbytes[i]) << 1195 (is_indirect_le_ih 1196 (ih) ? tb->tb_sb-> 1197 s_blocksize_bits - 1198 UNFM_P_SHIFT : 1199 0))); 1200 1201 put_ih_item_len(ih, sbytes[i]); 1202 1203 /* Insert part of the item into S_new[i] before 0-th item */ 1204 buffer_info_init_bh(tb, &bi, S_new[i]); 1205 1206 if ((old_len - sbytes[i]) > zeros_num) { 1207 r_zeros_number = 0; 1208 r_body = 1209 body + (old_len - 1210 sbytes[i]) - 1211 zeros_num; 1212 } else { 1213 r_body = body; 1214 r_zeros_number = 1215 zeros_num - (old_len - 1216 sbytes[i]); 1217 zeros_num -= r_zeros_number; 1218 } 1219 1220 leaf_insert_into_buf(&bi, 0, ih, r_body, 1221 r_zeros_number); 1222 1223 /* Calculate key component and item length to insert into S[i] */ 1224 set_le_ih_k_offset(ih, old_key_comp); 1225 put_ih_item_len(ih, 1226 old_len - sbytes[i]); 1227 tb->insert_size[0] -= sbytes[i]; 1228 } else { /* whole new item falls into S_new[i] */ 1229 1230 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */ 1231 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1232 snum[i] - 1, sbytes[i], 1233 S_new[i]); 1234 1235 /* Insert new item into S_new[i] */ 1236 buffer_info_init_bh(tb, &bi, S_new[i]); 1237 leaf_insert_into_buf(&bi, 1238 item_pos - n + 1239 snum[i] - 1, ih, 1240 body, zeros_num); 1241 1242 zeros_num = tb->insert_size[0] = 0; 1243 } 1244 } 1245 1246 else { /* new item or it part don't falls into S_new[i] */ 1247 1248 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1249 snum[i], sbytes[i], S_new[i]); 1250 } 1251 break; 1252 1253 case M_PASTE: /* append item */ 1254 1255 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */ 1256 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */ 1257 struct item_head *aux_ih; 1258 1259 RFALSE(ih, "PAP-12210: ih must be 0"); 1260 1261 aux_ih = B_N_PITEM_HEAD(tbS0, item_pos); 1262 if (is_direntry_le_ih(aux_ih)) { 1263 /* we append to directory item */ 1264 1265 int entry_count; 1266 1267 entry_count = 1268 ih_entry_count(aux_ih); 1269 1270 if (entry_count - sbytes[i] < 1271 pos_in_item 1272 && pos_in_item <= 1273 entry_count) { 1274 /* new directory entry falls into S_new[i] */ 1275 1276 RFALSE(!tb-> 1277 insert_size[0], 1278 "PAP-12215: insert_size is already 0"); 1279 RFALSE(sbytes[i] - 1 >= 1280 entry_count, 1281 "PAP-12220: there are no so much entries (%d), only %d", 1282 sbytes[i] - 1, 1283 entry_count); 1284 1285 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */ 1286 leaf_move_items 1287 (LEAF_FROM_S_TO_SNEW, 1288 tb, snum[i], 1289 sbytes[i] - 1, 1290 S_new[i]); 1291 /* Paste given directory entry to directory item */ 1292 buffer_info_init_bh(tb, &bi, S_new[i]); 1293 leaf_paste_in_buffer 1294 (&bi, 0, 1295 pos_in_item - 1296 entry_count + 1297 sbytes[i] - 1, 1298 tb->insert_size[0], 1299 body, zeros_num); 1300 /* paste new directory entry */ 1301 leaf_paste_entries(&bi, 1302 0, 1303 pos_in_item 1304 - 1305 entry_count 1306 + 1307 sbytes 1308 [i] - 1309 1, 1, 1310 (struct 1311 reiserfs_de_head 1312 *) 1313 body, 1314 body 1315 + 1316 DEH_SIZE, 1317 tb-> 1318 insert_size 1319 [0] 1320 ); 1321 tb->insert_size[0] = 0; 1322 pos_in_item++; 1323 } else { /* new directory entry doesn't fall into S_new[i] */ 1324 leaf_move_items 1325 (LEAF_FROM_S_TO_SNEW, 1326 tb, snum[i], 1327 sbytes[i], 1328 S_new[i]); 1329 } 1330 } else { /* regular object */ 1331 1332 int n_shift, n_rem, 1333 r_zeros_number; 1334 const char *r_body; 1335 1336 RFALSE(pos_in_item != 1337 ih_item_len 1338 (B_N_PITEM_HEAD 1339 (tbS0, item_pos)) 1340 || tb->insert_size[0] <= 1341 0, 1342 "PAP-12225: item too short or insert_size <= 0"); 1343 1344 /* Calculate number of bytes which must be shifted from appended item */ 1345 n_shift = 1346 sbytes[i] - 1347 tb->insert_size[0]; 1348 if (n_shift < 0) 1349 n_shift = 0; 1350 leaf_move_items 1351 (LEAF_FROM_S_TO_SNEW, tb, 1352 snum[i], n_shift, 1353 S_new[i]); 1354 1355 /* Calculate number of bytes which must remain in body after append to S_new[i] */ 1356 n_rem = 1357 tb->insert_size[0] - 1358 sbytes[i]; 1359 if (n_rem < 0) 1360 n_rem = 0; 1361 /* Append part of body into S_new[0] */ 1362 buffer_info_init_bh(tb, &bi, S_new[i]); 1363 if (n_rem > zeros_num) { 1364 r_zeros_number = 0; 1365 r_body = 1366 body + n_rem - 1367 zeros_num; 1368 } else { 1369 r_body = body; 1370 r_zeros_number = 1371 zeros_num - n_rem; 1372 zeros_num -= 1373 r_zeros_number; 1374 } 1375 1376 leaf_paste_in_buffer(&bi, 0, 1377 n_shift, 1378 tb-> 1379 insert_size 1380 [0] - 1381 n_rem, 1382 r_body, 1383 r_zeros_number); 1384 { 1385 struct item_head *tmp; 1386 1387 tmp = 1388 B_N_PITEM_HEAD(S_new 1389 [i], 1390 0); 1391 if (is_indirect_le_ih 1392 (tmp)) { 1393 set_ih_free_space 1394 (tmp, 0); 1395 set_le_ih_k_offset 1396 (tmp, 1397 le_ih_k_offset 1398 (tmp) + 1399 (n_rem << 1400 (tb-> 1401 tb_sb-> 1402 s_blocksize_bits 1403 - 1404 UNFM_P_SHIFT))); 1405 } else { 1406 set_le_ih_k_offset 1407 (tmp, 1408 le_ih_k_offset 1409 (tmp) + 1410 n_rem); 1411 } 1412 } 1413 1414 tb->insert_size[0] = n_rem; 1415 if (!n_rem) 1416 pos_in_item++; 1417 } 1418 } else 1419 /* item falls wholly into S_new[i] */ 1420 { 1421 int leaf_mi; 1422 struct item_head *pasted; 1423 1424#ifdef CONFIG_REISERFS_CHECK 1425 struct item_head *ih_check = 1426 B_N_PITEM_HEAD(tbS0, item_pos); 1427 1428 if (!is_direntry_le_ih(ih_check) 1429 && (pos_in_item != ih_item_len(ih_check) 1430 || tb->insert_size[0] <= 0)) 1431 reiserfs_panic(tb->tb_sb, 1432 "PAP-12235", 1433 "pos_in_item " 1434 "must be equal " 1435 "to ih_item_len"); 1436#endif /* CONFIG_REISERFS_CHECK */ 1437 1438 leaf_mi = 1439 leaf_move_items(LEAF_FROM_S_TO_SNEW, 1440 tb, snum[i], 1441 sbytes[i], 1442 S_new[i]); 1443 1444 RFALSE(leaf_mi, 1445 "PAP-12240: unexpected value returned by leaf_move_items (%d)", 1446 leaf_mi); 1447 1448 /* paste into item */ 1449 buffer_info_init_bh(tb, &bi, S_new[i]); 1450 leaf_paste_in_buffer(&bi, 1451 item_pos - n + 1452 snum[i], 1453 pos_in_item, 1454 tb->insert_size[0], 1455 body, zeros_num); 1456 1457 pasted = 1458 B_N_PITEM_HEAD(S_new[i], 1459 item_pos - n + 1460 snum[i]); 1461 if (is_direntry_le_ih(pasted)) { 1462 leaf_paste_entries(&bi, 1463 item_pos - 1464 n + snum[i], 1465 pos_in_item, 1466 1, 1467 (struct 1468 reiserfs_de_head 1469 *)body, 1470 body + 1471 DEH_SIZE, 1472 tb-> 1473 insert_size 1474 [0] 1475 ); 1476 } 1477 1478 /* if we paste to indirect item update ih_free_space */ 1479 if (is_indirect_le_ih(pasted)) 1480 set_ih_free_space(pasted, 0); 1481 zeros_num = tb->insert_size[0] = 0; 1482 } 1483 } 1484 1485 else { /* pasted item doesn't fall into S_new[i] */ 1486 1487 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1488 snum[i], sbytes[i], S_new[i]); 1489 } 1490 break; 1491 default: /* cases d and t */ 1492 reiserfs_panic(tb->tb_sb, "PAP-12245", 1493 "blknum > 2: unexpected mode: %s(%d)", 1494 (flag == 1495 M_DELETE) ? "DELETE" : ((flag == 1496 M_CUT) ? "CUT" 1497 : "UNKNOWN"), 1498 flag); 1499 } 1500 1501 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE); 1502 insert_ptr[i] = S_new[i]; 1503 1504 RFALSE(!buffer_journaled(S_new[i]) 1505 || buffer_journal_dirty(S_new[i]) 1506 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)", 1507 i, S_new[i]); 1508 } 1509 1510 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the 1511 affected item which remains in S */ 1512 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */ 1513 1514 switch (flag) { 1515 case M_INSERT: /* insert item into S[0] */ 1516 buffer_info_init_tbS0(tb, &bi); 1517 leaf_insert_into_buf(&bi, item_pos, ih, body, 1518 zeros_num); 1519 1520 /* If we insert the first key change the delimiting key */ 1521 if (item_pos == 0) { 1522 if (tb->CFL[0]) /* can be 0 in reiserfsck */ 1523 replace_key(tb, tb->CFL[0], tb->lkey[0], 1524 tbS0, 0); 1525 1526 } 1527 break; 1528 1529 case M_PASTE:{ /* append item in S[0] */ 1530 struct item_head *pasted; 1531 1532 pasted = B_N_PITEM_HEAD(tbS0, item_pos); 1533 /* when directory, may be new entry already pasted */ 1534 if (is_direntry_le_ih(pasted)) { 1535 if (pos_in_item >= 0 && 1536 pos_in_item <= 1537 ih_entry_count(pasted)) { 1538 1539 RFALSE(!tb->insert_size[0], 1540 "PAP-12260: insert_size is 0 already"); 1541 1542 /* prepare space */ 1543 buffer_info_init_tbS0(tb, &bi); 1544 leaf_paste_in_buffer(&bi, 1545 item_pos, 1546 pos_in_item, 1547 tb-> 1548 insert_size 1549 [0], body, 1550 zeros_num); 1551 1552 /* paste entry */ 1553 leaf_paste_entries(&bi, 1554 item_pos, 1555 pos_in_item, 1556 1, 1557 (struct 1558 reiserfs_de_head 1559 *)body, 1560 body + 1561 DEH_SIZE, 1562 tb-> 1563 insert_size 1564 [0] 1565 ); 1566 if (!item_pos && !pos_in_item) { 1567 RFALSE(!tb->CFL[0] 1568 || !tb->L[0], 1569 "PAP-12270: CFL[0]/L[0] must be specified"); 1570 if (tb->CFL[0]) { 1571 replace_key(tb, 1572 tb-> 1573 CFL 1574 [0], 1575 tb-> 1576 lkey 1577 [0], 1578 tbS0, 1579 0); 1580 1581 } 1582 } 1583 tb->insert_size[0] = 0; 1584 } 1585 } else { /* regular object */ 1586 if (pos_in_item == ih_item_len(pasted)) { 1587 1588 RFALSE(tb->insert_size[0] <= 0, 1589 "PAP-12275: insert size must not be %d", 1590 tb->insert_size[0]); 1591 buffer_info_init_tbS0(tb, &bi); 1592 leaf_paste_in_buffer(&bi, 1593 item_pos, 1594 pos_in_item, 1595 tb-> 1596 insert_size 1597 [0], body, 1598 zeros_num); 1599 1600 if (is_indirect_le_ih(pasted)) { 1601 set_ih_free_space 1602 (pasted, 0); 1603 } 1604 tb->insert_size[0] = 0; 1605 } 1606#ifdef CONFIG_REISERFS_CHECK 1607 else { 1608 if (tb->insert_size[0]) { 1609 print_cur_tb("12285"); 1610 reiserfs_panic(tb-> 1611 tb_sb, 1612 "PAP-12285", 1613 "insert_size " 1614 "must be 0 " 1615 "(%d)", 1616 tb->insert_size[0]); 1617 } 1618 } 1619#endif /* CONFIG_REISERFS_CHECK */ 1620 1621 } 1622 } /* case M_PASTE: */ 1623 } 1624 } 1625#ifdef CONFIG_REISERFS_CHECK 1626 if (flag == M_PASTE && tb->insert_size[0]) { 1627 print_cur_tb("12290"); 1628 reiserfs_panic(tb->tb_sb, 1629 "PAP-12290", "insert_size is still not 0 (%d)", 1630 tb->insert_size[0]); 1631 } 1632#endif /* CONFIG_REISERFS_CHECK */ 1633 return 0; 1634} /* Leaf level of the tree is balanced (end of balance_leaf) */ 1635 1636/* Make empty node */ 1637void make_empty_node(struct buffer_info *bi) 1638{ 1639 struct block_head *blkh; 1640 1641 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL"); 1642 1643 blkh = B_BLK_HEAD(bi->bi_bh); 1644 set_blkh_nr_item(blkh, 0); 1645 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh)); 1646 1647 if (bi->bi_parent) 1648 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */ 1649} 1650 1651/* Get first empty buffer */ 1652struct buffer_head *get_FEB(struct tree_balance *tb) 1653{ 1654 int i; 1655 struct buffer_info bi; 1656 1657 for (i = 0; i < MAX_FEB_SIZE; i++) 1658 if (tb->FEB[i] != NULL) 1659 break; 1660 1661 if (i == MAX_FEB_SIZE) 1662 reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty"); 1663 1664 buffer_info_init_bh(tb, &bi, tb->FEB[i]); 1665 make_empty_node(&bi); 1666 set_buffer_uptodate(tb->FEB[i]); 1667 tb->used[i] = tb->FEB[i]; 1668 tb->FEB[i] = NULL; 1669 1670 return tb->used[i]; 1671} 1672 1673/* This is now used because reiserfs_free_block has to be able to 1674** schedule. 1675*/ 1676static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) 1677{ 1678 int i; 1679 1680 if (buffer_dirty(bh)) 1681 reiserfs_warning(tb->tb_sb, "reiserfs-12320", 1682 "called with dirty buffer"); 1683 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) 1684 if (!tb->thrown[i]) { 1685 tb->thrown[i] = bh; 1686 get_bh(bh); /* free_thrown puts this */ 1687 return; 1688 } 1689 reiserfs_warning(tb->tb_sb, "reiserfs-12321", 1690 "too many thrown buffers"); 1691} 1692 1693static void free_thrown(struct tree_balance *tb) 1694{ 1695 int i; 1696 b_blocknr_t blocknr; 1697 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) { 1698 if (tb->thrown[i]) { 1699 blocknr = tb->thrown[i]->b_blocknr; 1700 if (buffer_dirty(tb->thrown[i])) 1701 reiserfs_warning(tb->tb_sb, "reiserfs-12322", 1702 "called with dirty buffer %d", 1703 blocknr); 1704 brelse(tb->thrown[i]); /* incremented in store_thrown */ 1705 reiserfs_free_block(tb->transaction_handle, NULL, 1706 blocknr, 0); 1707 } 1708 } 1709} 1710 1711void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh) 1712{ 1713 struct block_head *blkh; 1714 blkh = B_BLK_HEAD(bh); 1715 set_blkh_level(blkh, FREE_LEVEL); 1716 set_blkh_nr_item(blkh, 0); 1717 1718 clear_buffer_dirty(bh); 1719 store_thrown(tb, bh); 1720} 1721 1722/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/ 1723void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest, 1724 struct buffer_head *src, int n_src) 1725{ 1726 1727 RFALSE(dest == NULL || src == NULL, 1728 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)", 1729 src, dest); 1730 RFALSE(!B_IS_KEYS_LEVEL(dest), 1731 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf", 1732 dest); 1733 RFALSE(n_dest < 0 || n_src < 0, 1734 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest); 1735 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src), 1736 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big", 1737 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); 1738 1739 if (B_IS_ITEMS_LEVEL(src)) 1740 /* source buffer contains leaf node */ 1741 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src), 1742 KEY_SIZE); 1743 else 1744 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src), 1745 KEY_SIZE); 1746 1747 do_balance_mark_internal_dirty(tb, dest, 0); 1748} 1749 1750int get_left_neighbor_position(struct tree_balance *tb, int h) 1751{ 1752 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); 1753 1754 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL, 1755 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", 1756 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h)); 1757 1758 if (Sh_position == 0) 1759 return B_NR_ITEMS(tb->FL[h]); 1760 else 1761 return Sh_position - 1; 1762} 1763 1764int get_right_neighbor_position(struct tree_balance *tb, int h) 1765{ 1766 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); 1767 1768 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL, 1769 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", 1770 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]); 1771 1772 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h))) 1773 return 0; 1774 else 1775 return Sh_position + 1; 1776} 1777 1778#ifdef CONFIG_REISERFS_CHECK 1779 1780int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); 1781static void check_internal_node(struct super_block *s, struct buffer_head *bh, 1782 char *mes) 1783{ 1784 struct disk_child *dc; 1785 int i; 1786 1787 RFALSE(!bh, "PAP-12336: bh == 0"); 1788 1789 if (!bh || !B_IS_IN_TREE(bh)) 1790 return; 1791 1792 RFALSE(!buffer_dirty(bh) && 1793 !(buffer_journaled(bh) || buffer_journal_dirty(bh)), 1794 "PAP-12337: buffer (%b) must be dirty", bh); 1795 dc = B_N_CHILD(bh, 0); 1796 1797 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) { 1798 if (!is_reusable(s, dc_block_number(dc), 1)) { 1799 print_cur_tb(mes); 1800 reiserfs_panic(s, "PAP-12338", 1801 "invalid child pointer %y in %b", 1802 dc, bh); 1803 } 1804 } 1805} 1806 1807static int locked_or_not_in_tree(struct tree_balance *tb, 1808 struct buffer_head *bh, char *which) 1809{ 1810 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) || 1811 !B_IS_IN_TREE(bh)) { 1812 reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh); 1813 return 1; 1814 } 1815 return 0; 1816} 1817 1818static int check_before_balancing(struct tree_balance *tb) 1819{ 1820 int retval = 0; 1821 1822 if (REISERFS_SB(tb->tb_sb)->cur_tb) { 1823 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " 1824 "occurred based on cur_tb not being null at " 1825 "this point in code. do_balance cannot properly " 1826 "handle concurrent tree accesses on a same " 1827 "mount point."); 1828 } 1829 1830 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have 1831 prepped all of these for us). */ 1832 if (tb->lnum[0]) { 1833 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); 1834 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); 1835 retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]"); 1836 check_leaf(tb->L[0]); 1837 } 1838 if (tb->rnum[0]) { 1839 retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]"); 1840 retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]"); 1841 retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]"); 1842 check_leaf(tb->R[0]); 1843 } 1844 retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path), 1845 "S[0]"); 1846 check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); 1847 1848 return retval; 1849} 1850 1851static void check_after_balance_leaf(struct tree_balance *tb) 1852{ 1853 if (tb->lnum[0]) { 1854 if (B_FREE_SPACE(tb->L[0]) != 1855 MAX_CHILD_SIZE(tb->L[0]) - 1856 dc_size(B_N_CHILD 1857 (tb->FL[0], get_left_neighbor_position(tb, 0)))) { 1858 print_cur_tb("12221"); 1859 reiserfs_panic(tb->tb_sb, "PAP-12355", 1860 "shift to left was incorrect"); 1861 } 1862 } 1863 if (tb->rnum[0]) { 1864 if (B_FREE_SPACE(tb->R[0]) != 1865 MAX_CHILD_SIZE(tb->R[0]) - 1866 dc_size(B_N_CHILD 1867 (tb->FR[0], get_right_neighbor_position(tb, 0)))) { 1868 print_cur_tb("12222"); 1869 reiserfs_panic(tb->tb_sb, "PAP-12360", 1870 "shift to right was incorrect"); 1871 } 1872 } 1873 if (PATH_H_PBUFFER(tb->tb_path, 1) && 1874 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) != 1875 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - 1876 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), 1877 PATH_H_POSITION(tb->tb_path, 1)))))) { 1878 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)); 1879 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - 1880 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), 1881 PATH_H_POSITION(tb->tb_path, 1882 1)))); 1883 print_cur_tb("12223"); 1884 reiserfs_warning(tb->tb_sb, "reiserfs-12363", 1885 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; " 1886 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d", 1887 left, 1888 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)), 1889 PATH_H_PBUFFER(tb->tb_path, 1), 1890 PATH_H_POSITION(tb->tb_path, 1), 1891 dc_size(B_N_CHILD 1892 (PATH_H_PBUFFER(tb->tb_path, 1), 1893 PATH_H_POSITION(tb->tb_path, 1))), 1894 right); 1895 reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect"); 1896 } 1897} 1898 1899static void check_leaf_level(struct tree_balance *tb) 1900{ 1901 check_leaf(tb->L[0]); 1902 check_leaf(tb->R[0]); 1903 check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); 1904} 1905 1906static void check_internal_levels(struct tree_balance *tb) 1907{ 1908 int h; 1909 1910 /* check all internal nodes */ 1911 for (h = 1; tb->insert_size[h]; h++) { 1912 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h), 1913 "BAD BUFFER ON PATH"); 1914 if (tb->lnum[h]) 1915 check_internal_node(tb->tb_sb, tb->L[h], "BAD L"); 1916 if (tb->rnum[h]) 1917 check_internal_node(tb->tb_sb, tb->R[h], "BAD R"); 1918 } 1919 1920} 1921 1922#endif 1923 1924/* Now we have all of the buffers that must be used in balancing of 1925 the tree. We rely on the assumption that schedule() will not occur 1926 while do_balance works. ( Only interrupt handlers are acceptable.) 1927 We balance the tree according to the analysis made before this, 1928 using buffers already obtained. For SMP support it will someday be 1929 necessary to add ordered locking of tb. */ 1930 1931/* Some interesting rules of balancing: 1932 1933 we delete a maximum of two nodes per level per balancing: we never 1934 delete R, when we delete two of three nodes L, S, R then we move 1935 them into R. 1936 1937 we only delete L if we are deleting two nodes, if we delete only 1938 one node we delete S 1939 1940 if we shift leaves then we shift as much as we can: this is a 1941 deliberate policy of extremism in node packing which results in 1942 higher average utilization after repeated random balance operations 1943 at the cost of more memory copies and more balancing as a result of 1944 small insertions to full nodes. 1945 1946 if we shift internal nodes we try to evenly balance the node 1947 utilization, with consequent less balancing at the cost of lower 1948 utilization. 1949 1950 one could argue that the policy for directories in leaves should be 1951 that of internal nodes, but we will wait until another day to 1952 evaluate this.... It would be nice to someday measure and prove 1953 these assumptions as to what is optimal.... 1954 1955*/ 1956 1957static inline void do_balance_starts(struct tree_balance *tb) 1958{ 1959 /* use print_cur_tb() to see initial state of struct 1960 tree_balance */ 1961 1962 /* store_print_tb (tb); */ 1963 1964 /* do not delete, just comment it out */ 1965/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, 1966 "check");*/ 1967 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); 1968#ifdef CONFIG_REISERFS_CHECK 1969 REISERFS_SB(tb->tb_sb)->cur_tb = tb; 1970#endif 1971} 1972 1973static inline void do_balance_completed(struct tree_balance *tb) 1974{ 1975 1976#ifdef CONFIG_REISERFS_CHECK 1977 check_leaf_level(tb); 1978 check_internal_levels(tb); 1979 REISERFS_SB(tb->tb_sb)->cur_tb = NULL; 1980#endif 1981 1982 /* reiserfs_free_block is no longer schedule safe. So, we need to 1983 ** put the buffers we want freed on the thrown list during do_balance, 1984 ** and then free them now 1985 */ 1986 1987 REISERFS_SB(tb->tb_sb)->s_do_balance++; 1988 1989 /* release all nodes hold to perform the balancing */ 1990 unfix_nodes(tb); 1991 1992 free_thrown(tb); 1993} 1994 1995void do_balance(struct tree_balance *tb, /* tree_balance structure */ 1996 struct item_head *ih, /* item header of inserted item */ 1997 const char *body, /* body of inserted item or bytes to paste */ 1998 int flag) 1999{ /* i - insert, d - delete 2000 c - cut, p - paste 2001 2002 Cut means delete part of an item 2003 (includes removing an entry from a 2004 directory). 2005 2006 Delete means delete whole item. 2007 2008 Insert means add a new item into the 2009 tree. 2010 2011 Paste means to append to the end of an 2012 existing file or to insert a directory 2013 entry. */ 2014 int child_pos, /* position of a child node in its parent */ 2015 h; /* level of the tree being processed */ 2016 struct item_head insert_key[2]; /* in our processing of one level 2017 we sometimes determine what 2018 must be inserted into the next 2019 higher level. This insertion 2020 consists of a key or two keys 2021 and their corresponding 2022 pointers */ 2023 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next 2024 level */ 2025 2026 tb->tb_mode = flag; 2027 tb->need_balance_dirty = 0; 2028 2029 if (FILESYSTEM_CHANGED_TB(tb)) { 2030 reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has " 2031 "changed"); 2032 } 2033 /* if we have no real work to do */ 2034 if (!tb->insert_size[0]) { 2035 reiserfs_warning(tb->tb_sb, "PAP-12350", 2036 "insert_size == 0, mode == %c", flag); 2037 unfix_nodes(tb); 2038 return; 2039 } 2040 2041 atomic_inc(&(fs_generation(tb->tb_sb))); 2042 do_balance_starts(tb); 2043 2044 /* balance leaf returns 0 except if combining L R and S into 2045 one node. see balance_internal() for explanation of this 2046 line of code. */ 2047 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + 2048 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); 2049 2050#ifdef CONFIG_REISERFS_CHECK 2051 check_after_balance_leaf(tb); 2052#endif 2053 2054 /* Balance internal level of the tree. */ 2055 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++) 2056 child_pos = 2057 balance_internal(tb, h, child_pos, insert_key, insert_ptr); 2058 2059 do_balance_completed(tb); 2060 2061} 2062