1/* 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 3 */ 4 5/** 6 ** old_item_num 7 ** old_entry_num 8 ** set_entry_sizes 9 ** create_virtual_node 10 ** check_left 11 ** check_right 12 ** directory_part_size 13 ** get_num_ver 14 ** set_parameters 15 ** is_leaf_removable 16 ** are_leaves_removable 17 ** get_empty_nodes 18 ** get_lfree 19 ** get_rfree 20 ** is_left_neighbor_in_cache 21 ** decrement_key 22 ** get_far_parent 23 ** get_parents 24 ** can_node_be_removed 25 ** ip_check_balance 26 ** dc_check_balance_internal 27 ** dc_check_balance_leaf 28 ** dc_check_balance 29 ** check_balance 30 ** get_direct_parent 31 ** get_neighbors 32 ** fix_nodes 33 ** 34 ** 35 **/ 36 37#include <linux/time.h> 38#include <linux/string.h> 39#include <linux/reiserfs_fs.h> 40#include <linux/buffer_head.h> 41 42/* To make any changes in the tree we find a node, that contains item 43 to be changed/deleted or position in the node we insert a new item 44 to. We call this node S. To do balancing we need to decide what we 45 will shift to left/right neighbor, or to a new node, where new item 46 will be etc. To make this analysis simpler we build virtual 47 node. Virtual node is an array of items, that will replace items of 48 node S. (For instance if we are going to delete an item, virtual 49 node does not contain it). Virtual node keeps information about 50 item sizes and types, mergeability of first and last items, sizes 51 of all entries in directory item. We use this array of items when 52 calculating what we can shift to neighbors and how many nodes we 53 have to have if we do not any shiftings, if we shift to left/right 54 neighbor or to both. */ 55 56/* taking item number in virtual node, returns number of item, that it has in source buffer */ 57static inline int old_item_num(int new_num, int affected_item_num, int mode) 58{ 59 if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) 60 return new_num; 61 62 if (mode == M_INSERT) { 63 64 RFALSE(new_num == 0, 65 "vs-8005: for INSERT mode and item number of inserted item"); 66 67 return new_num - 1; 68 } 69 70 RFALSE(mode != M_DELETE, 71 "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", 72 mode); 73 /* delete mode */ 74 return new_num + 1; 75} 76 77static void create_virtual_node(struct tree_balance *tb, int h) 78{ 79 struct item_head *ih; 80 struct virtual_node *vn = tb->tb_vn; 81 int new_num; 82 struct buffer_head *Sh; /* this comes from tb->S[h] */ 83 84 Sh = PATH_H_PBUFFER(tb->tb_path, h); 85 86 /* size of changed node */ 87 vn->vn_size = 88 MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h]; 89 90 /* for internal nodes array if virtual items is not created */ 91 if (h) { 92 vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE); 93 return; 94 } 95 96 /* number of items in virtual node */ 97 vn->vn_nr_item = 98 B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) - 99 ((vn->vn_mode == M_DELETE) ? 1 : 0); 100 101 /* first virtual item */ 102 vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1); 103 memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item)); 104 vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item); 105 106 /* first item in the node */ 107 ih = B_N_PITEM_HEAD(Sh, 0); 108 109 /* define the mergeability for 0-th item (if it is not being deleted) */ 110 if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size) 111 && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) 112 vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; 113 114 /* go through all items those remain in the virtual node (except for the new (inserted) one) */ 115 for (new_num = 0; new_num < vn->vn_nr_item; new_num++) { 116 int j; 117 struct virtual_item *vi = vn->vn_vi + new_num; 118 int is_affected = 119 ((new_num != vn->vn_affected_item_num) ? 0 : 1); 120 121 if (is_affected && vn->vn_mode == M_INSERT) 122 continue; 123 124 /* get item number in source node */ 125 j = old_item_num(new_num, vn->vn_affected_item_num, 126 vn->vn_mode); 127 128 vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE; 129 vi->vi_ih = ih + j; 130 vi->vi_item = B_I_PITEM(Sh, ih + j); 131 vi->vi_uarea = vn->vn_free_ptr; 132 133 // consume too much memory 134 vn->vn_free_ptr += 135 op_create_vi(vn, vi, is_affected, tb->insert_size[0]); 136 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) 137 reiserfs_panic(tb->tb_sb, 138 "vs-8030: create_virtual_node: " 139 "virtual node space consumed"); 140 141 if (!is_affected) 142 /* this is not being changed */ 143 continue; 144 145 if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { 146 vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; 147 vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted 148 } 149 } 150 151 /* virtual inserted item is not defined yet */ 152 if (vn->vn_mode == M_INSERT) { 153 struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num; 154 155 RFALSE(vn->vn_ins_ih == 0, 156 "vs-8040: item header of inserted item is not specified"); 157 vi->vi_item_len = tb->insert_size[0]; 158 vi->vi_ih = vn->vn_ins_ih; 159 vi->vi_item = vn->vn_data; 160 vi->vi_uarea = vn->vn_free_ptr; 161 162 op_create_vi(vn, vi, 0 /*not pasted or cut */ , 163 tb->insert_size[0]); 164 } 165 166 /* set right merge flag we take right delimiting key and check whether it is a mergeable item */ 167 if (tb->CFR[0]) { 168 struct reiserfs_key *key; 169 170 key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]); 171 if (op_is_left_mergeable(key, Sh->b_size) 172 && (vn->vn_mode != M_DELETE 173 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) 174 vn->vn_vi[vn->vn_nr_item - 1].vi_type |= 175 VI_TYPE_RIGHT_MERGEABLE; 176 177#ifdef CONFIG_REISERFS_CHECK 178 if (op_is_left_mergeable(key, Sh->b_size) && 179 !(vn->vn_mode != M_DELETE 180 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) { 181 /* we delete last item and it could be merged with right neighbor's first item */ 182 if (! 183 (B_NR_ITEMS(Sh) == 1 184 && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0)) 185 && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) { 186 /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */ 187 print_block(Sh, 0, -1, -1); 188 reiserfs_panic(tb->tb_sb, 189 "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c", 190 key, vn->vn_affected_item_num, 191 vn->vn_mode, M_DELETE); 192 } 193 } 194#endif 195 196 } 197} 198 199/* using virtual node check, how many items can be shifted to left 200 neighbor */ 201static void check_left(struct tree_balance *tb, int h, int cur_free) 202{ 203 int i; 204 struct virtual_node *vn = tb->tb_vn; 205 struct virtual_item *vi; 206 int d_size, ih_size; 207 208 RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free); 209 210 /* internal level */ 211 if (h > 0) { 212 tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE); 213 return; 214 } 215 216 /* leaf level */ 217 218 if (!cur_free || !vn->vn_nr_item) { 219 /* no free space or nothing to move */ 220 tb->lnum[h] = 0; 221 tb->lbytes = -1; 222 return; 223 } 224 225 RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), 226 "vs-8055: parent does not exist or invalid"); 227 228 vi = vn->vn_vi; 229 if ((unsigned int)cur_free >= 230 (vn->vn_size - 231 ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) { 232 /* all contents of S[0] fits into L[0] */ 233 234 RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, 235 "vs-8055: invalid mode or balance condition failed"); 236 237 tb->lnum[0] = vn->vn_nr_item; 238 tb->lbytes = -1; 239 return; 240 } 241 242 d_size = 0, ih_size = IH_SIZE; 243 244 /* first item may be merge with last item in left neighbor */ 245 if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE) 246 d_size = -((int)IH_SIZE), ih_size = 0; 247 248 tb->lnum[0] = 0; 249 for (i = 0; i < vn->vn_nr_item; 250 i++, ih_size = IH_SIZE, d_size = 0, vi++) { 251 d_size += vi->vi_item_len; 252 if (cur_free >= d_size) { 253 /* the item can be shifted entirely */ 254 cur_free -= d_size; 255 tb->lnum[0]++; 256 continue; 257 } 258 259 /* the item cannot be shifted entirely, try to split it */ 260 /* check whether L[0] can hold ih and at least one byte of the item body */ 261 if (cur_free <= ih_size) { 262 /* cannot shift even a part of the current item */ 263 tb->lbytes = -1; 264 return; 265 } 266 cur_free -= ih_size; 267 268 tb->lbytes = op_check_left(vi, cur_free, 0, 0); 269 if (tb->lbytes != -1) 270 /* count partially shifted item */ 271 tb->lnum[0]++; 272 273 break; 274 } 275 276 return; 277} 278 279/* using virtual node check, how many items can be shifted to right 280 neighbor */ 281static void check_right(struct tree_balance *tb, int h, int cur_free) 282{ 283 int i; 284 struct virtual_node *vn = tb->tb_vn; 285 struct virtual_item *vi; 286 int d_size, ih_size; 287 288 RFALSE(cur_free < 0, "vs-8070: cur_free < 0"); 289 290 /* internal level */ 291 if (h > 0) { 292 tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE); 293 return; 294 } 295 296 /* leaf level */ 297 298 if (!cur_free || !vn->vn_nr_item) { 299 /* no free space */ 300 tb->rnum[h] = 0; 301 tb->rbytes = -1; 302 return; 303 } 304 305 RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), 306 "vs-8075: parent does not exist or invalid"); 307 308 vi = vn->vn_vi + vn->vn_nr_item - 1; 309 if ((unsigned int)cur_free >= 310 (vn->vn_size - 311 ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) { 312 /* all contents of S[0] fits into R[0] */ 313 314 RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, 315 "vs-8080: invalid mode or balance condition failed"); 316 317 tb->rnum[h] = vn->vn_nr_item; 318 tb->rbytes = -1; 319 return; 320 } 321 322 d_size = 0, ih_size = IH_SIZE; 323 324 /* last item may be merge with first item in right neighbor */ 325 if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) 326 d_size = -(int)IH_SIZE, ih_size = 0; 327 328 tb->rnum[0] = 0; 329 for (i = vn->vn_nr_item - 1; i >= 0; 330 i--, d_size = 0, ih_size = IH_SIZE, vi--) { 331 d_size += vi->vi_item_len; 332 if (cur_free >= d_size) { 333 /* the item can be shifted entirely */ 334 cur_free -= d_size; 335 tb->rnum[0]++; 336 continue; 337 } 338 339 /* check whether R[0] can hold ih and at least one byte of the item body */ 340 if (cur_free <= ih_size) { /* cannot shift even a part of the current item */ 341 tb->rbytes = -1; 342 return; 343 } 344 345 /* R[0] can hold the header of the item and at least one byte of its body */ 346 cur_free -= ih_size; /* cur_free is still > 0 */ 347 348 tb->rbytes = op_check_right(vi, cur_free); 349 if (tb->rbytes != -1) 350 /* count partially shifted item */ 351 tb->rnum[0]++; 352 353 break; 354 } 355 356 return; 357} 358 359/* 360 * from - number of items, which are shifted to left neighbor entirely 361 * to - number of item, which are shifted to right neighbor entirely 362 * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor 363 * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */ 364static int get_num_ver(int mode, struct tree_balance *tb, int h, 365 int from, int from_bytes, 366 int to, int to_bytes, short *snum012, int flow) 367{ 368 int i; 369 int cur_free; 370 // int bytes; 371 int units; 372 struct virtual_node *vn = tb->tb_vn; 373 // struct virtual_item * vi; 374 375 int total_node_size, max_node_size, current_item_size; 376 int needed_nodes; 377 int start_item, /* position of item we start filling node from */ 378 end_item, /* position of item we finish filling node by */ 379 start_bytes, /* number of first bytes (entries for directory) of start_item-th item 380 we do not include into node that is being filled */ 381 end_bytes; /* number of last bytes (entries for directory) of end_item-th item 382 we do node include into node that is being filled */ 383 int split_item_positions[2]; /* these are positions in virtual item of 384 items, that are split between S[0] and 385 S1new and S1new and S2new */ 386 387 split_item_positions[0] = -1; 388 split_item_positions[1] = -1; 389 390 /* We only create additional nodes if we are in insert or paste mode 391 or we are in replace mode at the internal level. If h is 0 and 392 the mode is M_REPLACE then in fix_nodes we change the mode to 393 paste or insert before we get here in the code. */ 394 RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), 395 "vs-8100: insert_size < 0 in overflow"); 396 397 max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h)); 398 399 /* snum012 [0-2] - number of items, that lay 400 to S[0], first new node and second new node */ 401 snum012[3] = -1; /* s1bytes */ 402 snum012[4] = -1; /* s2bytes */ 403 404 /* internal level */ 405 if (h > 0) { 406 i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE); 407 if (i == max_node_size) 408 return 1; 409 return (i / max_node_size + 1); 410 } 411 412 /* leaf level */ 413 needed_nodes = 1; 414 total_node_size = 0; 415 cur_free = max_node_size; 416 417 // start from 'from'-th item 418 start_item = from; 419 // skip its first 'start_bytes' units 420 start_bytes = ((from_bytes != -1) ? from_bytes : 0); 421 422 // last included item is the 'end_item'-th one 423 end_item = vn->vn_nr_item - to - 1; 424 // do not count last 'end_bytes' units of 'end_item'-th item 425 end_bytes = (to_bytes != -1) ? to_bytes : 0; 426 427 /* go through all item beginning from the start_item-th item and ending by 428 the end_item-th item. Do not count first 'start_bytes' units of 429 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */ 430 431 for (i = start_item; i <= end_item; i++) { 432 struct virtual_item *vi = vn->vn_vi + i; 433 int skip_from_end = ((i == end_item) ? end_bytes : 0); 434 435 RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed"); 436 437 /* get size of current item */ 438 current_item_size = vi->vi_item_len; 439 440 /* do not take in calculation head part (from_bytes) of from-th item */ 441 current_item_size -= 442 op_part_size(vi, 0 /*from start */ , start_bytes); 443 444 /* do not take in calculation tail part of last item */ 445 current_item_size -= 446 op_part_size(vi, 1 /*from end */ , skip_from_end); 447 448 /* if item fits into current node entierly */ 449 if (total_node_size + current_item_size <= max_node_size) { 450 snum012[needed_nodes - 1]++; 451 total_node_size += current_item_size; 452 start_bytes = 0; 453 continue; 454 } 455 456 if (current_item_size > max_node_size) { 457 /* virtual item length is longer, than max size of item in 458 a node. It is impossible for direct item */ 459 RFALSE(is_direct_le_ih(vi->vi_ih), 460 "vs-8110: " 461 "direct item length is %d. It can not be longer than %d", 462 current_item_size, max_node_size); 463 /* we will try to split it */ 464 flow = 1; 465 } 466 467 if (!flow) { 468 /* as we do not split items, take new node and continue */ 469 needed_nodes++; 470 i--; 471 total_node_size = 0; 472 continue; 473 } 474 // calculate number of item units which fit into node being 475 // filled 476 { 477 int free_space; 478 479 free_space = max_node_size - total_node_size - IH_SIZE; 480 units = 481 op_check_left(vi, free_space, start_bytes, 482 skip_from_end); 483 if (units == -1) { 484 /* nothing fits into current node, take new node and continue */ 485 needed_nodes++, i--, total_node_size = 0; 486 continue; 487 } 488 } 489 490 /* something fits into the current node */ 491 //if (snum012[3] != -1 || needed_nodes != 1) 492 // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required"); 493 //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units; 494 start_bytes += units; 495 snum012[needed_nodes - 1 + 3] = units; 496 497 if (needed_nodes > 2) 498 reiserfs_warning(tb->tb_sb, "vs-8111: get_num_ver: " 499 "split_item_position is out of boundary"); 500 snum012[needed_nodes - 1]++; 501 split_item_positions[needed_nodes - 1] = i; 502 needed_nodes++; 503 /* continue from the same item with start_bytes != -1 */ 504 start_item = i; 505 i--; 506 total_node_size = 0; 507 } 508 509 // sum012[4] (if it is not -1) contains number of units of which 510 // are to be in S1new, snum012[3] - to be in S0. They are supposed 511 // to be S1bytes and S2bytes correspondingly, so recalculate 512 if (snum012[4] > 0) { 513 int split_item_num; 514 int bytes_to_r, bytes_to_l; 515 int bytes_to_S1new; 516 517 split_item_num = split_item_positions[1]; 518 bytes_to_l = 519 ((from == split_item_num 520 && from_bytes != -1) ? from_bytes : 0); 521 bytes_to_r = 522 ((end_item == split_item_num 523 && end_bytes != -1) ? end_bytes : 0); 524 bytes_to_S1new = 525 ((split_item_positions[0] == 526 split_item_positions[1]) ? snum012[3] : 0); 527 528 // s2bytes 529 snum012[4] = 530 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] - 531 bytes_to_r - bytes_to_l - bytes_to_S1new; 532 533 if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY && 534 vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT) 535 reiserfs_warning(tb->tb_sb, "vs-8115: get_num_ver: not " 536 "directory or indirect item"); 537 } 538 539 /* now we know S2bytes, calculate S1bytes */ 540 if (snum012[3] > 0) { 541 int split_item_num; 542 int bytes_to_r, bytes_to_l; 543 int bytes_to_S2new; 544 545 split_item_num = split_item_positions[0]; 546 bytes_to_l = 547 ((from == split_item_num 548 && from_bytes != -1) ? from_bytes : 0); 549 bytes_to_r = 550 ((end_item == split_item_num 551 && end_bytes != -1) ? end_bytes : 0); 552 bytes_to_S2new = 553 ((split_item_positions[0] == split_item_positions[1] 554 && snum012[4] != -1) ? snum012[4] : 0); 555 556 // s1bytes 557 snum012[3] = 558 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] - 559 bytes_to_r - bytes_to_l - bytes_to_S2new; 560 } 561 562 return needed_nodes; 563} 564 565#ifdef CONFIG_REISERFS_CHECK 566extern struct tree_balance *cur_tb; 567#endif 568 569/* Set parameters for balancing. 570 * Performs write of results of analysis of balancing into structure tb, 571 * where it will later be used by the functions that actually do the balancing. 572 * Parameters: 573 * tb tree_balance structure; 574 * h current level of the node; 575 * lnum number of items from S[h] that must be shifted to L[h]; 576 * rnum number of items from S[h] that must be shifted to R[h]; 577 * blk_num number of blocks that S[h] will be splitted into; 578 * s012 number of items that fall into splitted nodes. 579 * lbytes number of bytes which flow to the left neighbor from the item that is not 580 * not shifted entirely 581 * rbytes number of bytes which flow to the right neighbor from the item that is not 582 * not shifted entirely 583 * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array) 584 */ 585 586static void set_parameters(struct tree_balance *tb, int h, int lnum, 587 int rnum, int blk_num, short *s012, int lb, int rb) 588{ 589 590 tb->lnum[h] = lnum; 591 tb->rnum[h] = rnum; 592 tb->blknum[h] = blk_num; 593 594 if (h == 0) { /* only for leaf level */ 595 if (s012 != NULL) { 596 tb->s0num = *s012++, 597 tb->s1num = *s012++, tb->s2num = *s012++; 598 tb->s1bytes = *s012++; 599 tb->s2bytes = *s012; 600 } 601 tb->lbytes = lb; 602 tb->rbytes = rb; 603 } 604 PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum); 605 PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum); 606 607 PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb); 608 PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb); 609} 610 611/* check, does node disappear if we shift tb->lnum[0] items to left 612 neighbor and tb->rnum[0] to the right one. */ 613static int is_leaf_removable(struct tree_balance *tb) 614{ 615 struct virtual_node *vn = tb->tb_vn; 616 int to_left, to_right; 617 int size; 618 int remain_items; 619 620 /* number of items, that will be shifted to left (right) neighbor 621 entirely */ 622 to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); 623 to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); 624 remain_items = vn->vn_nr_item; 625 626 /* how many items remain in S[0] after shiftings to neighbors */ 627 remain_items -= (to_left + to_right); 628 629 if (remain_items < 1) { 630 /* all content of node can be shifted to neighbors */ 631 set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0, 632 NULL, -1, -1); 633 return 1; 634 } 635 636 if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) 637 /* S[0] is not removable */ 638 return 0; 639 640 /* check, whether we can divide 1 remaining item between neighbors */ 641 642 /* get size of remaining item (in item units) */ 643 size = op_unit_num(&(vn->vn_vi[to_left])); 644 645 if (tb->lbytes + tb->rbytes >= size) { 646 set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL, 647 tb->lbytes, -1); 648 return 1; 649 } 650 651 return 0; 652} 653 654/* check whether L, S, R can be joined in one node */ 655static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree) 656{ 657 struct virtual_node *vn = tb->tb_vn; 658 int ih_size; 659 struct buffer_head *S0; 660 661 S0 = PATH_H_PBUFFER(tb->tb_path, 0); 662 663 ih_size = 0; 664 if (vn->vn_nr_item) { 665 if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE) 666 ih_size += IH_SIZE; 667 668 if (vn->vn_vi[vn->vn_nr_item - 1]. 669 vi_type & VI_TYPE_RIGHT_MERGEABLE) 670 ih_size += IH_SIZE; 671 } else { 672 /* there was only one item and it will be deleted */ 673 struct item_head *ih; 674 675 RFALSE(B_NR_ITEMS(S0) != 1, 676 "vs-8125: item number must be 1: it is %d", 677 B_NR_ITEMS(S0)); 678 679 ih = B_N_PITEM_HEAD(S0, 0); 680 if (tb->CFR[0] 681 && !comp_short_le_keys(&(ih->ih_key), 682 B_N_PDELIM_KEY(tb->CFR[0], 683 tb->rkey[0]))) 684 if (is_direntry_le_ih(ih)) { 685 /* Directory must be in correct state here: that is 686 somewhere at the left side should exist first directory 687 item. But the item being deleted can not be that first 688 one because its right neighbor is item of the same 689 directory. (But first item always gets deleted in last 690 turn). So, neighbors of deleted item can be merged, so 691 we can save ih_size */ 692 ih_size = IH_SIZE; 693 694 /* we might check that left neighbor exists and is of the 695 same directory */ 696 RFALSE(le_ih_k_offset(ih) == DOT_OFFSET, 697 "vs-8130: first directory item can not be removed until directory is not empty"); 698 } 699 700 } 701 702 if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) { 703 set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1); 704 PROC_INFO_INC(tb->tb_sb, leaves_removable); 705 return 1; 706 } 707 return 0; 708 709} 710 711/* when we do not split item, lnum and rnum are numbers of entire items */ 712#define SET_PAR_SHIFT_LEFT \ 713if (h)\ 714{\ 715 int to_l;\ 716 \ 717 to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\ 718 (MAX_NR_KEY(Sh) + 1 - lpar);\ 719 \ 720 set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\ 721}\ 722else \ 723{\ 724 if (lset==LEFT_SHIFT_FLOW)\ 725 set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\ 726 tb->lbytes, -1);\ 727 else\ 728 set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\ 729 -1, -1);\ 730} 731 732#define SET_PAR_SHIFT_RIGHT \ 733if (h)\ 734{\ 735 int to_r;\ 736 \ 737 to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\ 738 \ 739 set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\ 740}\ 741else \ 742{\ 743 if (rset==RIGHT_SHIFT_FLOW)\ 744 set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\ 745 -1, tb->rbytes);\ 746 else\ 747 set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\ 748 -1, -1);\ 749} 750 751static void free_buffers_in_tb(struct tree_balance *p_s_tb) 752{ 753 int n_counter; 754 755 decrement_counters_in_path(p_s_tb->tb_path); 756 757 for (n_counter = 0; n_counter < MAX_HEIGHT; n_counter++) { 758 decrement_bcount(p_s_tb->L[n_counter]); 759 p_s_tb->L[n_counter] = NULL; 760 decrement_bcount(p_s_tb->R[n_counter]); 761 p_s_tb->R[n_counter] = NULL; 762 decrement_bcount(p_s_tb->FL[n_counter]); 763 p_s_tb->FL[n_counter] = NULL; 764 decrement_bcount(p_s_tb->FR[n_counter]); 765 p_s_tb->FR[n_counter] = NULL; 766 decrement_bcount(p_s_tb->CFL[n_counter]); 767 p_s_tb->CFL[n_counter] = NULL; 768 decrement_bcount(p_s_tb->CFR[n_counter]); 769 p_s_tb->CFR[n_counter] = NULL; 770 } 771} 772 773/* Get new buffers for storing new nodes that are created while balancing. 774 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 775 * CARRY_ON - schedule didn't occur while the function worked; 776 * NO_DISK_SPACE - no disk space. 777 */ 778/* The function is NOT SCHEDULE-SAFE! */ 779static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h) 780{ 781 struct buffer_head *p_s_new_bh, 782 *p_s_Sh = PATH_H_PBUFFER(p_s_tb->tb_path, n_h); 783 b_blocknr_t *p_n_blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; 784 int n_counter, n_number_of_freeblk, n_amount_needed, /* number of needed empty blocks */ 785 n_retval = CARRY_ON; 786 struct super_block *p_s_sb = p_s_tb->tb_sb; 787 788 /* number_of_freeblk is the number of empty blocks which have been 789 acquired for use by the balancing algorithm minus the number of 790 empty blocks used in the previous levels of the analysis, 791 number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs 792 after empty blocks are acquired, and the balancing analysis is 793 then restarted, amount_needed is the number needed by this level 794 (n_h) of the balancing analysis. 795 796 Note that for systems with many processes writing, it would be 797 more layout optimal to calculate the total number needed by all 798 levels and then to run reiserfs_new_blocks to get all of them at once. */ 799 800 /* Initiate number_of_freeblk to the amount acquired prior to the restart of 801 the analysis or 0 if not restarted, then subtract the amount needed 802 by all of the levels of the tree below n_h. */ 803 /* blknum includes S[n_h], so we subtract 1 in this calculation */ 804 for (n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum; 805 n_counter < n_h; n_counter++) 806 n_number_of_freeblk -= 807 (p_s_tb->blknum[n_counter]) ? (p_s_tb->blknum[n_counter] - 808 1) : 0; 809 810 /* Allocate missing empty blocks. */ 811 /* if p_s_Sh == 0 then we are getting a new root */ 812 n_amount_needed = (p_s_Sh) ? (p_s_tb->blknum[n_h] - 1) : 1; 813 /* Amount_needed = the amount that we need more than the amount that we have. */ 814 if (n_amount_needed > n_number_of_freeblk) 815 n_amount_needed -= n_number_of_freeblk; 816 else /* If we have enough already then there is nothing to do. */ 817 return CARRY_ON; 818 819 /* No need to check quota - is not allocated for blocks used for formatted nodes */ 820 if (reiserfs_new_form_blocknrs(p_s_tb, a_n_blocknrs, 821 n_amount_needed) == NO_DISK_SPACE) 822 return NO_DISK_SPACE; 823 824 /* for each blocknumber we just got, get a buffer and stick it on FEB */ 825 for (p_n_blocknr = a_n_blocknrs, n_counter = 0; 826 n_counter < n_amount_needed; p_n_blocknr++, n_counter++) { 827 828 RFALSE(!*p_n_blocknr, 829 "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); 830 831 p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr); 832 RFALSE(buffer_dirty(p_s_new_bh) || 833 buffer_journaled(p_s_new_bh) || 834 buffer_journal_dirty(p_s_new_bh), 835 "PAP-8140: journlaled or dirty buffer %b for the new block", 836 p_s_new_bh); 837 838 /* Put empty buffers into the array. */ 839 RFALSE(p_s_tb->FEB[p_s_tb->cur_blknum], 840 "PAP-8141: busy slot for new buffer"); 841 842 set_buffer_journal_new(p_s_new_bh); 843 p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh; 844 } 845 846 if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(p_s_tb)) 847 n_retval = REPEAT_SEARCH; 848 849 return n_retval; 850} 851 852/* Get free space of the left neighbor, which is stored in the parent 853 * node of the left neighbor. */ 854static int get_lfree(struct tree_balance *tb, int h) 855{ 856 struct buffer_head *l, *f; 857 int order; 858 859 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0) 860 return 0; 861 862 if (f == l) 863 order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1; 864 else { 865 order = B_NR_ITEMS(l); 866 f = l; 867 } 868 869 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); 870} 871 872/* Get free space of the right neighbor, 873 * which is stored in the parent node of the right neighbor. 874 */ 875static int get_rfree(struct tree_balance *tb, int h) 876{ 877 struct buffer_head *r, *f; 878 int order; 879 880 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0) 881 return 0; 882 883 if (f == r) 884 order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1; 885 else { 886 order = 0; 887 f = r; 888 } 889 890 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); 891 892} 893 894/* Check whether left neighbor is in memory. */ 895static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h) 896{ 897 struct buffer_head *p_s_father, *left; 898 struct super_block *p_s_sb = p_s_tb->tb_sb; 899 b_blocknr_t n_left_neighbor_blocknr; 900 int n_left_neighbor_position; 901 902 if (!p_s_tb->FL[n_h]) /* Father of the left neighbor does not exist. */ 903 return 0; 904 905 /* Calculate father of the node to be balanced. */ 906 p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1); 907 908 RFALSE(!p_s_father || 909 !B_IS_IN_TREE(p_s_father) || 910 !B_IS_IN_TREE(p_s_tb->FL[n_h]) || 911 !buffer_uptodate(p_s_father) || 912 !buffer_uptodate(p_s_tb->FL[n_h]), 913 "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", 914 p_s_father, p_s_tb->FL[n_h]); 915 916 /* Get position of the pointer to the left neighbor into the left father. */ 917 n_left_neighbor_position = (p_s_father == p_s_tb->FL[n_h]) ? 918 p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->FL[n_h]); 919 /* Get left neighbor block number. */ 920 n_left_neighbor_blocknr = 921 B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position); 922 /* Look for the left neighbor in the cache. */ 923 if ((left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr))) { 924 925 RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left), 926 "vs-8170: left neighbor (%b %z) is not in the tree", 927 left, left); 928 put_bh(left); 929 return 1; 930 } 931 932 return 0; 933} 934 935#define LEFT_PARENTS 'l' 936#define RIGHT_PARENTS 'r' 937 938static void decrement_key(struct cpu_key *p_s_key) 939{ 940 // call item specific function for this key 941 item_ops[cpu_key_k_type(p_s_key)]->decrement_key(p_s_key); 942} 943 944/* Calculate far left/right parent of the left/right neighbor of the current node, that 945 * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h]. 946 * Calculate left/right common parent of the current node and L[h]/R[h]. 947 * Calculate left/right delimiting key position. 948 * Returns: PATH_INCORRECT - path in the tree is not correct; 949 SCHEDULE_OCCURRED - schedule occurred while the function worked; 950 * CARRY_ON - schedule didn't occur while the function worked; 951 */ 952static int get_far_parent(struct tree_balance *p_s_tb, 953 int n_h, 954 struct buffer_head **pp_s_father, 955 struct buffer_head **pp_s_com_father, char c_lr_par) 956{ 957 struct buffer_head *p_s_parent; 958 INITIALIZE_PATH(s_path_to_neighbor_father); 959 struct treepath *p_s_path = p_s_tb->tb_path; 960 struct cpu_key s_lr_father_key; 961 int n_counter, 962 n_position = INT_MAX, 963 n_first_last_position = 0, 964 n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h); 965 966 /* Starting from F[n_h] go upwards in the tree, and look for the common 967 ancestor of F[n_h], and its neighbor l/r, that should be obtained. */ 968 969 n_counter = n_path_offset; 970 971 RFALSE(n_counter < FIRST_PATH_ELEMENT_OFFSET, 972 "PAP-8180: invalid path length"); 973 974 for (; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--) { 975 /* Check whether parent of the current buffer in the path is really parent in the tree. */ 976 if (!B_IS_IN_TREE 977 (p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1))) 978 return REPEAT_SEARCH; 979 /* Check whether position in the parent is correct. */ 980 if ((n_position = 981 PATH_OFFSET_POSITION(p_s_path, 982 n_counter - 1)) > 983 B_NR_ITEMS(p_s_parent)) 984 return REPEAT_SEARCH; 985 /* Check whether parent at the path really points to the child. */ 986 if (B_N_CHILD_NUM(p_s_parent, n_position) != 987 PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr) 988 return REPEAT_SEARCH; 989 /* Return delimiting key if position in the parent is not equal to first/last one. */ 990 if (c_lr_par == RIGHT_PARENTS) 991 n_first_last_position = B_NR_ITEMS(p_s_parent); 992 if (n_position != n_first_last_position) { 993 *pp_s_com_father = p_s_parent; 994 get_bh(*pp_s_com_father); 995 /*(*pp_s_com_father = p_s_parent)->b_count++; */ 996 break; 997 } 998 } 999 1000 /* if we are in the root of the tree, then there is no common father */ 1001 if (n_counter == FIRST_PATH_ELEMENT_OFFSET) { 1002 /* Check whether first buffer in the path is the root of the tree. */ 1003 if (PATH_OFFSET_PBUFFER 1004 (p_s_tb->tb_path, 1005 FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == 1006 SB_ROOT_BLOCK(p_s_tb->tb_sb)) { 1007 *pp_s_father = *pp_s_com_father = NULL; 1008 return CARRY_ON; 1009 } 1010 return REPEAT_SEARCH; 1011 } 1012 1013 RFALSE(B_LEVEL(*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL, 1014 "PAP-8185: (%b %z) level too small", 1015 *pp_s_com_father, *pp_s_com_father); 1016 1017 /* Check whether the common parent is locked. */ 1018 1019 if (buffer_locked(*pp_s_com_father)) { 1020 __wait_on_buffer(*pp_s_com_father); 1021 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 1022 decrement_bcount(*pp_s_com_father); 1023 return REPEAT_SEARCH; 1024 } 1025 } 1026 1027 /* So, we got common parent of the current node and its left/right neighbor. 1028 Now we are geting the parent of the left/right neighbor. */ 1029 1030 /* Form key to get parent of the left/right neighbor. */ 1031 le_key2cpu_key(&s_lr_father_key, 1032 B_N_PDELIM_KEY(*pp_s_com_father, 1033 (c_lr_par == 1034 LEFT_PARENTS) ? (p_s_tb->lkey[n_h - 1] = 1035 n_position - 1036 1) : (p_s_tb->rkey[n_h - 1037 1] = 1038 n_position))); 1039 1040 if (c_lr_par == LEFT_PARENTS) 1041 decrement_key(&s_lr_father_key); 1042 1043 if (search_by_key 1044 (p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, 1045 n_h + 1) == IO_ERROR) 1046 // path is released 1047 return IO_ERROR; 1048 1049 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 1050 decrement_counters_in_path(&s_path_to_neighbor_father); 1051 decrement_bcount(*pp_s_com_father); 1052 return REPEAT_SEARCH; 1053 } 1054 1055 *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); 1056 1057 RFALSE(B_LEVEL(*pp_s_father) != n_h + 1, 1058 "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father); 1059 RFALSE(s_path_to_neighbor_father.path_length < 1060 FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small"); 1061 1062 s_path_to_neighbor_father.path_length--; 1063 decrement_counters_in_path(&s_path_to_neighbor_father); 1064 return CARRY_ON; 1065} 1066 1067/* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of 1068 * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset], 1069 * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset]. 1070 * Calculate numbers of left and right delimiting keys position: lkey[n_path_offset], rkey[n_path_offset]. 1071 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 1072 * CARRY_ON - schedule didn't occur while the function worked; 1073 */ 1074static int get_parents(struct tree_balance *p_s_tb, int n_h) 1075{ 1076 struct treepath *p_s_path = p_s_tb->tb_path; 1077 int n_position, 1078 n_ret_value, 1079 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); 1080 struct buffer_head *p_s_curf, *p_s_curcf; 1081 1082 /* Current node is the root of the tree or will be root of the tree */ 1083 if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) { 1084 /* The root can not have parents. 1085 Release nodes which previously were obtained as parents of the current node neighbors. */ 1086 decrement_bcount(p_s_tb->FL[n_h]); 1087 decrement_bcount(p_s_tb->CFL[n_h]); 1088 decrement_bcount(p_s_tb->FR[n_h]); 1089 decrement_bcount(p_s_tb->CFR[n_h]); 1090 p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] = 1091 p_s_tb->CFR[n_h] = NULL; 1092 return CARRY_ON; 1093 } 1094 1095 /* Get parent FL[n_path_offset] of L[n_path_offset]. */ 1096 if ((n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1))) { 1097 /* Current node is not the first child of its parent. */ 1098 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */ 1099 p_s_curf = p_s_curcf = 1100 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); 1101 get_bh(p_s_curf); 1102 get_bh(p_s_curf); 1103 p_s_tb->lkey[n_h] = n_position - 1; 1104 } else { 1105 /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node. 1106 Calculate current common parent of L[n_path_offset] and the current node. Note that 1107 CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset]. 1108 Calculate lkey[n_path_offset]. */ 1109 if ((n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf, 1110 &p_s_curcf, 1111 LEFT_PARENTS)) != CARRY_ON) 1112 return n_ret_value; 1113 } 1114 1115 decrement_bcount(p_s_tb->FL[n_h]); 1116 p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */ 1117 decrement_bcount(p_s_tb->CFL[n_h]); 1118 p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */ 1119 1120 RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) || 1121 (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)), 1122 "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf); 1123 1124/* Get parent FR[n_h] of R[n_h]. */ 1125 1126/* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */ 1127 if (n_position == B_NR_ITEMS(PATH_H_PBUFFER(p_s_path, n_h + 1))) { 1128/* Calculate current parent of R[n_h], which is the right neighbor of F[n_h]. 1129 Calculate current common parent of R[n_h] and current node. Note that CFR[n_h] 1130 not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */ 1131 if ((n_ret_value = 1132 get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf, 1133 RIGHT_PARENTS)) != CARRY_ON) 1134 return n_ret_value; 1135 } else { 1136/* Current node is not the last child of its parent F[n_h]. */ 1137 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */ 1138 p_s_curf = p_s_curcf = 1139 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); 1140 get_bh(p_s_curf); 1141 get_bh(p_s_curf); 1142 p_s_tb->rkey[n_h] = n_position; 1143 } 1144 1145 decrement_bcount(p_s_tb->FR[n_h]); 1146 p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */ 1147 1148 decrement_bcount(p_s_tb->CFR[n_h]); 1149 p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */ 1150 1151 RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) || 1152 (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)), 1153 "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf); 1154 1155 return CARRY_ON; 1156} 1157 1158/* it is possible to remove node as result of shiftings to 1159 neighbors even when we insert or paste item. */ 1160static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, 1161 struct tree_balance *tb, int h) 1162{ 1163 struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h); 1164 int levbytes = tb->insert_size[h]; 1165 struct item_head *ih; 1166 struct reiserfs_key *r_key = NULL; 1167 1168 ih = B_N_PITEM_HEAD(Sh, 0); 1169 if (tb->CFR[h]) 1170 r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]); 1171 1172 if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes 1173 /* shifting may merge items which might save space */ 1174 - 1175 ((!h 1176 && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0) 1177 - 1178 ((!h && r_key 1179 && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0) 1180 + ((h) ? KEY_SIZE : 0)) { 1181 /* node can not be removed */ 1182 if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ 1183 if (!h) 1184 tb->s0num = 1185 B_NR_ITEMS(Sh) + 1186 ((mode == M_INSERT) ? 1 : 0); 1187 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1188 return NO_BALANCING_NEEDED; 1189 } 1190 } 1191 PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]); 1192 return !NO_BALANCING_NEEDED; 1193} 1194 1195/* Check whether current node S[h] is balanced when increasing its size by 1196 * Inserting or Pasting. 1197 * Calculate parameters for balancing for current level h. 1198 * Parameters: 1199 * tb tree_balance structure; 1200 * h current level of the node; 1201 * inum item number in S[h]; 1202 * mode i - insert, p - paste; 1203 * Returns: 1 - schedule occurred; 1204 * 0 - balancing for higher levels needed; 1205 * -1 - no balancing for higher levels needed; 1206 * -2 - no disk space. 1207 */ 1208/* ip means Inserting or Pasting */ 1209static int ip_check_balance(struct tree_balance *tb, int h) 1210{ 1211 struct virtual_node *vn = tb->tb_vn; 1212 int levbytes, /* Number of bytes that must be inserted into (value 1213 is negative if bytes are deleted) buffer which 1214 contains node being balanced. The mnemonic is 1215 that the attempted change in node space used level 1216 is levbytes bytes. */ 1217 n_ret_value; 1218 1219 int lfree, sfree, rfree /* free space in L, S and R */ ; 1220 1221 /* nver is short for number of vertixes, and lnver is the number if 1222 we shift to the left, rnver is the number if we shift to the 1223 right, and lrnver is the number if we shift in both directions. 1224 The goal is to minimize first the number of vertixes, and second, 1225 the number of vertixes whose contents are changed by shifting, 1226 and third the number of uncached vertixes whose contents are 1227 changed by shifting and must be read from disk. */ 1228 int nver, lnver, rnver, lrnver; 1229 1230 /* used at leaf level only, S0 = S[0] is the node being balanced, 1231 sInum [ I = 0,1,2 ] is the number of items that will 1232 remain in node SI after balancing. S1 and S2 are new 1233 nodes that might be created. */ 1234 1235 /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters. 1236 where 4th parameter is s1bytes and 5th - s2bytes 1237 */ 1238 short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases 1239 0,1 - do not shift and do not shift but bottle 1240 2 - shift only whole item to left 1241 3 - shift to left and bottle as much as possible 1242 4,5 - shift to right (whole items and as much as possible 1243 6,7 - shift to both directions (whole items and as much as possible) 1244 */ 1245 1246 /* Sh is the node whose balance is currently being checked */ 1247 struct buffer_head *Sh; 1248 1249 Sh = PATH_H_PBUFFER(tb->tb_path, h); 1250 levbytes = tb->insert_size[h]; 1251 1252 /* Calculate balance parameters for creating new root. */ 1253 if (!Sh) { 1254 if (!h) 1255 reiserfs_panic(tb->tb_sb, 1256 "vs-8210: ip_check_balance: S[0] can not be 0"); 1257 switch (n_ret_value = get_empty_nodes(tb, h)) { 1258 case CARRY_ON: 1259 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1260 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ 1261 1262 case NO_DISK_SPACE: 1263 case REPEAT_SEARCH: 1264 return n_ret_value; 1265 default: 1266 reiserfs_panic(tb->tb_sb, 1267 "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes"); 1268 } 1269 } 1270 1271 if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */ 1272 return n_ret_value; 1273 1274 sfree = B_FREE_SPACE(Sh); 1275 1276 /* get free space of neighbors */ 1277 rfree = get_rfree(tb, h); 1278 lfree = get_lfree(tb, h); 1279 1280 if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) == 1281 NO_BALANCING_NEEDED) 1282 /* and new item fits into node S[h] without any shifting */ 1283 return NO_BALANCING_NEEDED; 1284 1285 create_virtual_node(tb, h); 1286 1287 /* 1288 determine maximal number of items we can shift to the left neighbor (in tb structure) 1289 and the maximal number of bytes that can flow to the left neighbor 1290 from the left most liquid item that cannot be shifted from S[0] entirely (returned value) 1291 */ 1292 check_left(tb, h, lfree); 1293 1294 /* 1295 determine maximal number of items we can shift to the right neighbor (in tb structure) 1296 and the maximal number of bytes that can flow to the right neighbor 1297 from the right most liquid item that cannot be shifted from S[0] entirely (returned value) 1298 */ 1299 check_right(tb, h, rfree); 1300 1301 /* all contents of internal node S[h] can be moved into its 1302 neighbors, S[h] will be removed after balancing */ 1303 if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { 1304 int to_r; 1305 1306 /* Since we are working on internal nodes, and our internal 1307 nodes have fixed size entries, then we can balance by the 1308 number of items rather than the space they consume. In this 1309 routine we set the left node equal to the right node, 1310 allowing a difference of less than or equal to 1 child 1311 pointer. */ 1312 to_r = 1313 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + 1314 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - 1315 tb->rnum[h]); 1316 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, 1317 -1, -1); 1318 return CARRY_ON; 1319 } 1320 1321 /* this checks balance condition, that any two neighboring nodes can not fit in one node */ 1322 RFALSE(h && 1323 (tb->lnum[h] >= vn->vn_nr_item + 1 || 1324 tb->rnum[h] >= vn->vn_nr_item + 1), 1325 "vs-8220: tree is not balanced on internal level"); 1326 RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) || 1327 (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))), 1328 "vs-8225: tree is not balanced on leaf level"); 1329 1330 /* all contents of S[0] can be moved into its neighbors 1331 S[0] will be removed after balancing. */ 1332 if (!h && is_leaf_removable(tb)) 1333 return CARRY_ON; 1334 1335 /* why do we perform this check here rather than earlier?? 1336 Answer: we can win 1 node in some cases above. Moreover we 1337 checked it above, when we checked, that S[0] is not removable 1338 in principle */ 1339 if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ 1340 if (!h) 1341 tb->s0num = vn->vn_nr_item; 1342 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1343 return NO_BALANCING_NEEDED; 1344 } 1345 1346 { 1347 int lpar, rpar, nset, lset, rset, lrset; 1348 /* 1349 * regular overflowing of the node 1350 */ 1351 1352 /* get_num_ver works in 2 modes (FLOW & NO_FLOW) 1353 lpar, rpar - number of items we can shift to left/right neighbor (including splitting item) 1354 nset, lset, rset, lrset - shows, whether flowing items give better packing 1355 */ 1356#define FLOW 1 1357#define NO_FLOW 0 /* do not any splitting */ 1358 1359 /* we choose one the following */ 1360#define NOTHING_SHIFT_NO_FLOW 0 1361#define NOTHING_SHIFT_FLOW 5 1362#define LEFT_SHIFT_NO_FLOW 10 1363#define LEFT_SHIFT_FLOW 15 1364#define RIGHT_SHIFT_NO_FLOW 20 1365#define RIGHT_SHIFT_FLOW 25 1366#define LR_SHIFT_NO_FLOW 30 1367#define LR_SHIFT_FLOW 35 1368 1369 lpar = tb->lnum[h]; 1370 rpar = tb->rnum[h]; 1371 1372 /* calculate number of blocks S[h] must be split into when 1373 nothing is shifted to the neighbors, 1374 as well as number of items in each part of the split node (s012 numbers), 1375 and number of bytes (s1bytes) of the shared drop which flow to S1 if any */ 1376 nset = NOTHING_SHIFT_NO_FLOW; 1377 nver = get_num_ver(vn->vn_mode, tb, h, 1378 0, -1, h ? vn->vn_nr_item : 0, -1, 1379 snum012, NO_FLOW); 1380 1381 if (!h) { 1382 int nver1; 1383 1384 /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */ 1385 nver1 = get_num_ver(vn->vn_mode, tb, h, 1386 0, -1, 0, -1, 1387 snum012 + NOTHING_SHIFT_FLOW, FLOW); 1388 if (nver > nver1) 1389 nset = NOTHING_SHIFT_FLOW, nver = nver1; 1390 } 1391 1392 /* calculate number of blocks S[h] must be split into when 1393 l_shift_num first items and l_shift_bytes of the right most 1394 liquid item to be shifted are shifted to the left neighbor, 1395 as well as number of items in each part of the splitted node (s012 numbers), 1396 and number of bytes (s1bytes) of the shared drop which flow to S1 if any 1397 */ 1398 lset = LEFT_SHIFT_NO_FLOW; 1399 lnver = get_num_ver(vn->vn_mode, tb, h, 1400 lpar - ((h || tb->lbytes == -1) ? 0 : 1), 1401 -1, h ? vn->vn_nr_item : 0, -1, 1402 snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW); 1403 if (!h) { 1404 int lnver1; 1405 1406 lnver1 = get_num_ver(vn->vn_mode, tb, h, 1407 lpar - 1408 ((tb->lbytes != -1) ? 1 : 0), 1409 tb->lbytes, 0, -1, 1410 snum012 + LEFT_SHIFT_FLOW, FLOW); 1411 if (lnver > lnver1) 1412 lset = LEFT_SHIFT_FLOW, lnver = lnver1; 1413 } 1414 1415 /* calculate number of blocks S[h] must be split into when 1416 r_shift_num first items and r_shift_bytes of the left most 1417 liquid item to be shifted are shifted to the right neighbor, 1418 as well as number of items in each part of the splitted node (s012 numbers), 1419 and number of bytes (s1bytes) of the shared drop which flow to S1 if any 1420 */ 1421 rset = RIGHT_SHIFT_NO_FLOW; 1422 rnver = get_num_ver(vn->vn_mode, tb, h, 1423 0, -1, 1424 h ? (vn->vn_nr_item - rpar) : (rpar - 1425 ((tb-> 1426 rbytes != 1427 -1) ? 1 : 1428 0)), -1, 1429 snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW); 1430 if (!h) { 1431 int rnver1; 1432 1433 rnver1 = get_num_ver(vn->vn_mode, tb, h, 1434 0, -1, 1435 (rpar - 1436 ((tb->rbytes != -1) ? 1 : 0)), 1437 tb->rbytes, 1438 snum012 + RIGHT_SHIFT_FLOW, FLOW); 1439 1440 if (rnver > rnver1) 1441 rset = RIGHT_SHIFT_FLOW, rnver = rnver1; 1442 } 1443 1444 /* calculate number of blocks S[h] must be split into when 1445 items are shifted in both directions, 1446 as well as number of items in each part of the splitted node (s012 numbers), 1447 and number of bytes (s1bytes) of the shared drop which flow to S1 if any 1448 */ 1449 lrset = LR_SHIFT_NO_FLOW; 1450 lrnver = get_num_ver(vn->vn_mode, tb, h, 1451 lpar - ((h || tb->lbytes == -1) ? 0 : 1), 1452 -1, 1453 h ? (vn->vn_nr_item - rpar) : (rpar - 1454 ((tb-> 1455 rbytes != 1456 -1) ? 1 : 1457 0)), -1, 1458 snum012 + LR_SHIFT_NO_FLOW, NO_FLOW); 1459 if (!h) { 1460 int lrnver1; 1461 1462 lrnver1 = get_num_ver(vn->vn_mode, tb, h, 1463 lpar - 1464 ((tb->lbytes != -1) ? 1 : 0), 1465 tb->lbytes, 1466 (rpar - 1467 ((tb->rbytes != -1) ? 1 : 0)), 1468 tb->rbytes, 1469 snum012 + LR_SHIFT_FLOW, FLOW); 1470 if (lrnver > lrnver1) 1471 lrset = LR_SHIFT_FLOW, lrnver = lrnver1; 1472 } 1473 1474 /* Our general shifting strategy is: 1475 1) to minimized number of new nodes; 1476 2) to minimized number of neighbors involved in shifting; 1477 3) to minimized number of disk reads; */ 1478 1479 /* we can win TWO or ONE nodes by shifting in both directions */ 1480 if (lrnver < lnver && lrnver < rnver) { 1481 RFALSE(h && 1482 (tb->lnum[h] != 1 || 1483 tb->rnum[h] != 1 || 1484 lrnver != 1 || rnver != 2 || lnver != 2 1485 || h != 1), "vs-8230: bad h"); 1486 if (lrset == LR_SHIFT_FLOW) 1487 set_parameters(tb, h, tb->lnum[h], tb->rnum[h], 1488 lrnver, snum012 + lrset, 1489 tb->lbytes, tb->rbytes); 1490 else 1491 set_parameters(tb, h, 1492 tb->lnum[h] - 1493 ((tb->lbytes == -1) ? 0 : 1), 1494 tb->rnum[h] - 1495 ((tb->rbytes == -1) ? 0 : 1), 1496 lrnver, snum012 + lrset, -1, -1); 1497 1498 return CARRY_ON; 1499 } 1500 1501 /* if shifting doesn't lead to better packing then don't shift */ 1502 if (nver == lrnver) { 1503 set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1, 1504 -1); 1505 return CARRY_ON; 1506 } 1507 1508 /* now we know that for better packing shifting in only one 1509 direction either to the left or to the right is required */ 1510 1511 /* if shifting to the left is better than shifting to the right */ 1512 if (lnver < rnver) { 1513 SET_PAR_SHIFT_LEFT; 1514 return CARRY_ON; 1515 } 1516 1517 /* if shifting to the right is better than shifting to the left */ 1518 if (lnver > rnver) { 1519 SET_PAR_SHIFT_RIGHT; 1520 return CARRY_ON; 1521 } 1522 1523 /* now shifting in either direction gives the same number 1524 of nodes and we can make use of the cached neighbors */ 1525 if (is_left_neighbor_in_cache(tb, h)) { 1526 SET_PAR_SHIFT_LEFT; 1527 return CARRY_ON; 1528 } 1529 1530 /* shift to the right independently on whether the right neighbor in cache or not */ 1531 SET_PAR_SHIFT_RIGHT; 1532 return CARRY_ON; 1533 } 1534} 1535 1536/* Check whether current node S[h] is balanced when Decreasing its size by 1537 * Deleting or Cutting for INTERNAL node of S+tree. 1538 * Calculate parameters for balancing for current level h. 1539 * Parameters: 1540 * tb tree_balance structure; 1541 * h current level of the node; 1542 * inum item number in S[h]; 1543 * mode i - insert, p - paste; 1544 * Returns: 1 - schedule occurred; 1545 * 0 - balancing for higher levels needed; 1546 * -1 - no balancing for higher levels needed; 1547 * -2 - no disk space. 1548 * 1549 * Note: Items of internal nodes have fixed size, so the balance condition for 1550 * the internal part of S+tree is as for the B-trees. 1551 */ 1552static int dc_check_balance_internal(struct tree_balance *tb, int h) 1553{ 1554 struct virtual_node *vn = tb->tb_vn; 1555 1556 /* Sh is the node whose balance is currently being checked, 1557 and Fh is its father. */ 1558 struct buffer_head *Sh, *Fh; 1559 int maxsize, n_ret_value; 1560 int lfree, rfree /* free space in L and R */ ; 1561 1562 Sh = PATH_H_PBUFFER(tb->tb_path, h); 1563 Fh = PATH_H_PPARENT(tb->tb_path, h); 1564 1565 maxsize = MAX_CHILD_SIZE(Sh); 1566 1567/* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */ 1568/* new_nr_item = number of items node would have if operation is */ 1569/* performed without balancing (new_nr_item); */ 1570 create_virtual_node(tb, h); 1571 1572 if (!Fh) { /* S[h] is the root. */ 1573 if (vn->vn_nr_item > 0) { 1574 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1575 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ 1576 } 1577 /* new_nr_item == 0. 1578 * Current root will be deleted resulting in 1579 * decrementing the tree height. */ 1580 set_parameters(tb, h, 0, 0, 0, NULL, -1, -1); 1581 return CARRY_ON; 1582 } 1583 1584 if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) 1585 return n_ret_value; 1586 1587 /* get free space of neighbors */ 1588 rfree = get_rfree(tb, h); 1589 lfree = get_lfree(tb, h); 1590 1591 /* determine maximal number of items we can fit into neighbors */ 1592 check_left(tb, h, lfree); 1593 check_right(tb, h, rfree); 1594 1595 if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid. 1596 * In this case we balance only if it leads to better packing. */ 1597 if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors, 1598 * which is impossible with greater values of new_nr_item. */ 1599 if (tb->lnum[h] >= vn->vn_nr_item + 1) { 1600 /* All contents of S[h] can be moved to L[h]. */ 1601 int n; 1602 int order_L; 1603 1604 order_L = 1605 ((n = 1606 PATH_H_B_ITEM_ORDER(tb->tb_path, 1607 h)) == 1608 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; 1609 n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / 1610 (DC_SIZE + KEY_SIZE); 1611 set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, 1612 -1); 1613 return CARRY_ON; 1614 } 1615 1616 if (tb->rnum[h] >= vn->vn_nr_item + 1) { 1617 /* All contents of S[h] can be moved to R[h]. */ 1618 int n; 1619 int order_R; 1620 1621 order_R = 1622 ((n = 1623 PATH_H_B_ITEM_ORDER(tb->tb_path, 1624 h)) == 1625 B_NR_ITEMS(Fh)) ? 0 : n + 1; 1626 n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / 1627 (DC_SIZE + KEY_SIZE); 1628 set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, 1629 -1); 1630 return CARRY_ON; 1631 } 1632 } 1633 1634 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { 1635 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ 1636 int to_r; 1637 1638 to_r = 1639 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - 1640 tb->rnum[h] + vn->vn_nr_item + 1) / 2 - 1641 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); 1642 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 1643 0, NULL, -1, -1); 1644 return CARRY_ON; 1645 } 1646 1647 /* Balancing does not lead to better packing. */ 1648 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1649 return NO_BALANCING_NEEDED; 1650 } 1651 1652 /* Current node contain insufficient number of items. Balancing is required. */ 1653 /* Check whether we can merge S[h] with left neighbor. */ 1654 if (tb->lnum[h] >= vn->vn_nr_item + 1) 1655 if (is_left_neighbor_in_cache(tb, h) 1656 || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) { 1657 int n; 1658 int order_L; 1659 1660 order_L = 1661 ((n = 1662 PATH_H_B_ITEM_ORDER(tb->tb_path, 1663 h)) == 1664 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; 1665 n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE + 1666 KEY_SIZE); 1667 set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1); 1668 return CARRY_ON; 1669 } 1670 1671 /* Check whether we can merge S[h] with right neighbor. */ 1672 if (tb->rnum[h] >= vn->vn_nr_item + 1) { 1673 int n; 1674 int order_R; 1675 1676 order_R = 1677 ((n = 1678 PATH_H_B_ITEM_ORDER(tb->tb_path, 1679 h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1); 1680 n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE + 1681 KEY_SIZE); 1682 set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1); 1683 return CARRY_ON; 1684 } 1685 1686 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ 1687 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { 1688 int to_r; 1689 1690 to_r = 1691 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + 1692 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - 1693 tb->rnum[h]); 1694 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, 1695 -1, -1); 1696 return CARRY_ON; 1697 } 1698 1699 /* For internal nodes try to borrow item from a neighbor */ 1700 RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root"); 1701 1702 /* Borrow one or two items from caching neighbor */ 1703 if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) { 1704 int from_l; 1705 1706 from_l = 1707 (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + 1708 1) / 2 - (vn->vn_nr_item + 1); 1709 set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1); 1710 return CARRY_ON; 1711 } 1712 1713 set_parameters(tb, h, 0, 1714 -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item + 1715 1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1); 1716 return CARRY_ON; 1717} 1718 1719/* Check whether current node S[h] is balanced when Decreasing its size by 1720 * Deleting or Truncating for LEAF node of S+tree. 1721 * Calculate parameters for balancing for current level h. 1722 * Parameters: 1723 * tb tree_balance structure; 1724 * h current level of the node; 1725 * inum item number in S[h]; 1726 * mode i - insert, p - paste; 1727 * Returns: 1 - schedule occurred; 1728 * 0 - balancing for higher levels needed; 1729 * -1 - no balancing for higher levels needed; 1730 * -2 - no disk space. 1731 */ 1732static int dc_check_balance_leaf(struct tree_balance *tb, int h) 1733{ 1734 struct virtual_node *vn = tb->tb_vn; 1735 1736 /* Number of bytes that must be deleted from 1737 (value is negative if bytes are deleted) buffer which 1738 contains node being balanced. The mnemonic is that the 1739 attempted change in node space used level is levbytes bytes. */ 1740 int levbytes; 1741 /* the maximal item size */ 1742 int maxsize, n_ret_value; 1743 /* S0 is the node whose balance is currently being checked, 1744 and F0 is its father. */ 1745 struct buffer_head *S0, *F0; 1746 int lfree, rfree /* free space in L and R */ ; 1747 1748 S0 = PATH_H_PBUFFER(tb->tb_path, 0); 1749 F0 = PATH_H_PPARENT(tb->tb_path, 0); 1750 1751 levbytes = tb->insert_size[h]; 1752 1753 maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */ 1754 1755 if (!F0) { /* S[0] is the root now. */ 1756 1757 RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0), 1758 "vs-8240: attempt to create empty buffer tree"); 1759 1760 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1761 return NO_BALANCING_NEEDED; 1762 } 1763 1764 if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) 1765 return n_ret_value; 1766 1767 /* get free space of neighbors */ 1768 rfree = get_rfree(tb, h); 1769 lfree = get_lfree(tb, h); 1770 1771 create_virtual_node(tb, h); 1772 1773 /* if 3 leaves can be merge to one, set parameters and return */ 1774 if (are_leaves_removable(tb, lfree, rfree)) 1775 return CARRY_ON; 1776 1777 /* determine maximal number of items we can shift to the left/right neighbor 1778 and the maximal number of bytes that can flow to the left/right neighbor 1779 from the left/right most liquid item that cannot be shifted from S[0] entirely 1780 */ 1781 check_left(tb, h, lfree); 1782 check_right(tb, h, rfree); 1783 1784 /* check whether we can merge S with left neighbor. */ 1785 if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1) 1786 if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */ 1787 !tb->FR[h]) { 1788 1789 RFALSE(!tb->FL[h], 1790 "vs-8245: dc_check_balance_leaf: FL[h] must exist"); 1791 1792 /* set parameter to merge S[0] with its left neighbor */ 1793 set_parameters(tb, h, -1, 0, 0, NULL, -1, -1); 1794 return CARRY_ON; 1795 } 1796 1797 /* check whether we can merge S[0] with right neighbor. */ 1798 if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) { 1799 set_parameters(tb, h, 0, -1, 0, NULL, -1, -1); 1800 return CARRY_ON; 1801 } 1802 1803 /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */ 1804 if (is_leaf_removable(tb)) 1805 return CARRY_ON; 1806 1807 /* Balancing is not required. */ 1808 tb->s0num = vn->vn_nr_item; 1809 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1810 return NO_BALANCING_NEEDED; 1811} 1812 1813/* Check whether current node S[h] is balanced when Decreasing its size by 1814 * Deleting or Cutting. 1815 * Calculate parameters for balancing for current level h. 1816 * Parameters: 1817 * tb tree_balance structure; 1818 * h current level of the node; 1819 * inum item number in S[h]; 1820 * mode d - delete, c - cut. 1821 * Returns: 1 - schedule occurred; 1822 * 0 - balancing for higher levels needed; 1823 * -1 - no balancing for higher levels needed; 1824 * -2 - no disk space. 1825 */ 1826static int dc_check_balance(struct tree_balance *tb, int h) 1827{ 1828 RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)), 1829 "vs-8250: S is not initialized"); 1830 1831 if (h) 1832 return dc_check_balance_internal(tb, h); 1833 else 1834 return dc_check_balance_leaf(tb, h); 1835} 1836 1837/* Check whether current node S[h] is balanced. 1838 * Calculate parameters for balancing for current level h. 1839 * Parameters: 1840 * 1841 * tb tree_balance structure: 1842 * 1843 * tb is a large structure that must be read about in the header file 1844 * at the same time as this procedure if the reader is to successfully 1845 * understand this procedure 1846 * 1847 * h current level of the node; 1848 * inum item number in S[h]; 1849 * mode i - insert, p - paste, d - delete, c - cut. 1850 * Returns: 1 - schedule occurred; 1851 * 0 - balancing for higher levels needed; 1852 * -1 - no balancing for higher levels needed; 1853 * -2 - no disk space. 1854 */ 1855static int check_balance(int mode, 1856 struct tree_balance *tb, 1857 int h, 1858 int inum, 1859 int pos_in_item, 1860 struct item_head *ins_ih, const void *data) 1861{ 1862 struct virtual_node *vn; 1863 1864 vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf); 1865 vn->vn_free_ptr = (char *)(tb->tb_vn + 1); 1866 vn->vn_mode = mode; 1867 vn->vn_affected_item_num = inum; 1868 vn->vn_pos_in_item = pos_in_item; 1869 vn->vn_ins_ih = ins_ih; 1870 vn->vn_data = data; 1871 1872 RFALSE(mode == M_INSERT && !vn->vn_ins_ih, 1873 "vs-8255: ins_ih can not be 0 in insert mode"); 1874 1875 if (tb->insert_size[h] > 0) 1876 /* Calculate balance parameters when size of node is increasing. */ 1877 return ip_check_balance(tb, h); 1878 1879 /* Calculate balance parameters when size of node is decreasing. */ 1880 return dc_check_balance(tb, h); 1881} 1882 1883/* Check whether parent at the path is the really parent of the current node.*/ 1884static int get_direct_parent(struct tree_balance *p_s_tb, int n_h) 1885{ 1886 struct buffer_head *p_s_bh; 1887 struct treepath *p_s_path = p_s_tb->tb_path; 1888 int n_position, 1889 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); 1890 1891 /* We are in the root or in the new root. */ 1892 if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) { 1893 1894 RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1, 1895 "PAP-8260: invalid offset in the path"); 1896 1897 if (PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)-> 1898 b_blocknr == SB_ROOT_BLOCK(p_s_tb->tb_sb)) { 1899 /* Root is not changed. */ 1900 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL; 1901 PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0; 1902 return CARRY_ON; 1903 } 1904 return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */ 1905 } 1906 1907 if (!B_IS_IN_TREE 1908 (p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))) 1909 return REPEAT_SEARCH; /* Parent in the path is not in the tree. */ 1910 1911 if ((n_position = 1912 PATH_OFFSET_POSITION(p_s_path, 1913 n_path_offset - 1)) > B_NR_ITEMS(p_s_bh)) 1914 return REPEAT_SEARCH; 1915 1916 if (B_N_CHILD_NUM(p_s_bh, n_position) != 1917 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr) 1918 /* Parent in the path is not parent of the current node in the tree. */ 1919 return REPEAT_SEARCH; 1920 1921 if (buffer_locked(p_s_bh)) { 1922 __wait_on_buffer(p_s_bh); 1923 if (FILESYSTEM_CHANGED_TB(p_s_tb)) 1924 return REPEAT_SEARCH; 1925 } 1926 1927 return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */ 1928} 1929 1930/* Using lnum[n_h] and rnum[n_h] we should determine what neighbors 1931 * of S[n_h] we 1932 * need in order to balance S[n_h], and get them if necessary. 1933 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 1934 * CARRY_ON - schedule didn't occur while the function worked; 1935 */ 1936static int get_neighbors(struct tree_balance *p_s_tb, int n_h) 1937{ 1938 int n_child_position, 1939 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1); 1940 unsigned long n_son_number; 1941 struct super_block *p_s_sb = p_s_tb->tb_sb; 1942 struct buffer_head *p_s_bh; 1943 1944 PROC_INFO_INC(p_s_sb, get_neighbors[n_h]); 1945 1946 if (p_s_tb->lnum[n_h]) { 1947 /* We need left neighbor to balance S[n_h]. */ 1948 PROC_INFO_INC(p_s_sb, need_l_neighbor[n_h]); 1949 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); 1950 1951 RFALSE(p_s_bh == p_s_tb->FL[n_h] && 1952 !PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset), 1953 "PAP-8270: invalid position in the parent"); 1954 1955 n_child_position = 1956 (p_s_bh == 1957 p_s_tb->FL[n_h]) ? p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb-> 1958 FL[n_h]); 1959 n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position); 1960 p_s_bh = sb_bread(p_s_sb, n_son_number); 1961 if (!p_s_bh) 1962 return IO_ERROR; 1963 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 1964 decrement_bcount(p_s_bh); 1965 PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]); 1966 return REPEAT_SEARCH; 1967 } 1968 1969 RFALSE(!B_IS_IN_TREE(p_s_tb->FL[n_h]) || 1970 n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) || 1971 B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) != 1972 p_s_bh->b_blocknr, "PAP-8275: invalid parent"); 1973 RFALSE(!B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child"); 1974 RFALSE(!n_h && 1975 B_FREE_SPACE(p_s_bh) != 1976 MAX_CHILD_SIZE(p_s_bh) - 1977 dc_size(B_N_CHILD(p_s_tb->FL[0], n_child_position)), 1978 "PAP-8290: invalid child size of left neighbor"); 1979 1980 decrement_bcount(p_s_tb->L[n_h]); 1981 p_s_tb->L[n_h] = p_s_bh; 1982 } 1983 1984 if (p_s_tb->rnum[n_h]) { /* We need right neighbor to balance S[n_path_offset]. */ 1985 PROC_INFO_INC(p_s_sb, need_r_neighbor[n_h]); 1986 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); 1987 1988 RFALSE(p_s_bh == p_s_tb->FR[n_h] && 1989 PATH_OFFSET_POSITION(p_s_tb->tb_path, 1990 n_path_offset) >= 1991 B_NR_ITEMS(p_s_bh), 1992 "PAP-8295: invalid position in the parent"); 1993 1994 n_child_position = 1995 (p_s_bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0; 1996 n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position); 1997 p_s_bh = sb_bread(p_s_sb, n_son_number); 1998 if (!p_s_bh) 1999 return IO_ERROR; 2000 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 2001 decrement_bcount(p_s_bh); 2002 PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]); 2003 return REPEAT_SEARCH; 2004 } 2005 decrement_bcount(p_s_tb->R[n_h]); 2006 p_s_tb->R[n_h] = p_s_bh; 2007 2008 RFALSE(!n_h 2009 && B_FREE_SPACE(p_s_bh) != 2010 MAX_CHILD_SIZE(p_s_bh) - 2011 dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)), 2012 "PAP-8300: invalid child size of right neighbor (%d != %d - %d)", 2013 B_FREE_SPACE(p_s_bh), MAX_CHILD_SIZE(p_s_bh), 2014 dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position))); 2015 2016 } 2017 return CARRY_ON; 2018} 2019 2020static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh) 2021{ 2022 int max_num_of_items; 2023 int max_num_of_entries; 2024 unsigned long blocksize = sb->s_blocksize; 2025 2026#define MIN_NAME_LEN 1 2027 2028 max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN); 2029 max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) / 2030 (DEH_SIZE + MIN_NAME_LEN); 2031 2032 return sizeof(struct virtual_node) + 2033 max(max_num_of_items * sizeof(struct virtual_item), 2034 sizeof(struct virtual_item) + sizeof(struct direntry_uarea) + 2035 (max_num_of_entries - 1) * sizeof(__u16)); 2036} 2037 2038/* maybe we should fail balancing we are going to perform when kmalloc 2039 fails several times. But now it will loop until kmalloc gets 2040 required memory */ 2041static int get_mem_for_virtual_node(struct tree_balance *tb) 2042{ 2043 int check_fs = 0; 2044 int size; 2045 char *buf; 2046 2047 size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path)); 2048 2049 if (size > tb->vn_buf_size) { 2050 /* we have to allocate more memory for virtual node */ 2051 if (tb->vn_buf) { 2052 /* free memory allocated before */ 2053 kfree(tb->vn_buf); 2054 /* this is not needed if kfree is atomic */ 2055 check_fs = 1; 2056 } 2057 2058 /* virtual node requires now more memory */ 2059 tb->vn_buf_size = size; 2060 2061 /* get memory for virtual item */ 2062 buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); 2063 if (!buf) { 2064 /* getting memory with GFP_KERNEL priority may involve 2065 balancing now (due to indirect_to_direct conversion on 2066 dcache shrinking). So, release path and collected 2067 resources here */ 2068 free_buffers_in_tb(tb); 2069 buf = kmalloc(size, GFP_NOFS); 2070 if (!buf) { 2071 tb->vn_buf_size = 0; 2072 } 2073 tb->vn_buf = buf; 2074 schedule(); 2075 return REPEAT_SEARCH; 2076 } 2077 2078 tb->vn_buf = buf; 2079 } 2080 2081 if (check_fs && FILESYSTEM_CHANGED_TB(tb)) 2082 return REPEAT_SEARCH; 2083 2084 return CARRY_ON; 2085} 2086 2087#ifdef CONFIG_REISERFS_CHECK 2088static void tb_buffer_sanity_check(struct super_block *p_s_sb, 2089 struct buffer_head *p_s_bh, 2090 const char *descr, int level) 2091{ 2092 if (p_s_bh) { 2093 if (atomic_read(&(p_s_bh->b_count)) <= 0) { 2094 2095 reiserfs_panic(p_s_sb, 2096 "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n", 2097 descr, level, p_s_bh); 2098 } 2099 2100 if (!buffer_uptodate(p_s_bh)) { 2101 reiserfs_panic(p_s_sb, 2102 "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n", 2103 descr, level, p_s_bh); 2104 } 2105 2106 if (!B_IS_IN_TREE(p_s_bh)) { 2107 reiserfs_panic(p_s_sb, 2108 "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n", 2109 descr, level, p_s_bh); 2110 } 2111 2112 if (p_s_bh->b_bdev != p_s_sb->s_bdev) { 2113 reiserfs_panic(p_s_sb, 2114 "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n", 2115 descr, level, p_s_bh); 2116 } 2117 2118 if (p_s_bh->b_size != p_s_sb->s_blocksize) { 2119 reiserfs_panic(p_s_sb, 2120 "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n", 2121 descr, level, p_s_bh); 2122 } 2123 2124 if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { 2125 reiserfs_panic(p_s_sb, 2126 "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n", 2127 descr, level, p_s_bh); 2128 } 2129 } 2130} 2131#else 2132static void tb_buffer_sanity_check(struct super_block *p_s_sb, 2133 struct buffer_head *p_s_bh, 2134 const char *descr, int level) 2135{; 2136} 2137#endif 2138 2139static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh) 2140{ 2141 return reiserfs_prepare_for_journal(s, bh, 0); 2142} 2143 2144static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb) 2145{ 2146 struct buffer_head *locked; 2147#ifdef CONFIG_REISERFS_CHECK 2148 int repeat_counter = 0; 2149#endif 2150 int i; 2151 2152 do { 2153 2154 locked = NULL; 2155 2156 for (i = p_s_tb->tb_path->path_length; 2157 !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { 2158 if (PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) { 2159 /* if I understand correctly, we can only be sure the last buffer 2160 ** in the path is in the tree --clm 2161 */ 2162#ifdef CONFIG_REISERFS_CHECK 2163 if (PATH_PLAST_BUFFER(p_s_tb->tb_path) == 2164 PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) { 2165 tb_buffer_sanity_check(p_s_tb->tb_sb, 2166 PATH_OFFSET_PBUFFER 2167 (p_s_tb->tb_path, 2168 i), "S", 2169 p_s_tb->tb_path-> 2170 path_length - i); 2171 } 2172#endif 2173 if (!clear_all_dirty_bits(p_s_tb->tb_sb, 2174 PATH_OFFSET_PBUFFER 2175 (p_s_tb->tb_path, 2176 i))) { 2177 locked = 2178 PATH_OFFSET_PBUFFER(p_s_tb->tb_path, 2179 i); 2180 } 2181 } 2182 } 2183 2184 for (i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i]; 2185 i++) { 2186 2187 if (p_s_tb->lnum[i]) { 2188 2189 if (p_s_tb->L[i]) { 2190 tb_buffer_sanity_check(p_s_tb->tb_sb, 2191 p_s_tb->L[i], 2192 "L", i); 2193 if (!clear_all_dirty_bits 2194 (p_s_tb->tb_sb, p_s_tb->L[i])) 2195 locked = p_s_tb->L[i]; 2196 } 2197 2198 if (!locked && p_s_tb->FL[i]) { 2199 tb_buffer_sanity_check(p_s_tb->tb_sb, 2200 p_s_tb->FL[i], 2201 "FL", i); 2202 if (!clear_all_dirty_bits 2203 (p_s_tb->tb_sb, p_s_tb->FL[i])) 2204 locked = p_s_tb->FL[i]; 2205 } 2206 2207 if (!locked && p_s_tb->CFL[i]) { 2208 tb_buffer_sanity_check(p_s_tb->tb_sb, 2209 p_s_tb->CFL[i], 2210 "CFL", i); 2211 if (!clear_all_dirty_bits 2212 (p_s_tb->tb_sb, p_s_tb->CFL[i])) 2213 locked = p_s_tb->CFL[i]; 2214 } 2215 2216 } 2217 2218 if (!locked && (p_s_tb->rnum[i])) { 2219 2220 if (p_s_tb->R[i]) { 2221 tb_buffer_sanity_check(p_s_tb->tb_sb, 2222 p_s_tb->R[i], 2223 "R", i); 2224 if (!clear_all_dirty_bits 2225 (p_s_tb->tb_sb, p_s_tb->R[i])) 2226 locked = p_s_tb->R[i]; 2227 } 2228 2229 if (!locked && p_s_tb->FR[i]) { 2230 tb_buffer_sanity_check(p_s_tb->tb_sb, 2231 p_s_tb->FR[i], 2232 "FR", i); 2233 if (!clear_all_dirty_bits 2234 (p_s_tb->tb_sb, p_s_tb->FR[i])) 2235 locked = p_s_tb->FR[i]; 2236 } 2237 2238 if (!locked && p_s_tb->CFR[i]) { 2239 tb_buffer_sanity_check(p_s_tb->tb_sb, 2240 p_s_tb->CFR[i], 2241 "CFR", i); 2242 if (!clear_all_dirty_bits 2243 (p_s_tb->tb_sb, p_s_tb->CFR[i])) 2244 locked = p_s_tb->CFR[i]; 2245 } 2246 } 2247 } 2248 /* as far as I can tell, this is not required. The FEB list seems 2249 ** to be full of newly allocated nodes, which will never be locked, 2250 ** dirty, or anything else. 2251 ** To be safe, I'm putting in the checks and waits in. For the moment, 2252 ** they are needed to keep the code in journal.c from complaining 2253 ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well. 2254 ** --clm 2255 */ 2256 for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { 2257 if (p_s_tb->FEB[i]) { 2258 if (!clear_all_dirty_bits 2259 (p_s_tb->tb_sb, p_s_tb->FEB[i])) 2260 locked = p_s_tb->FEB[i]; 2261 } 2262 } 2263 2264 if (locked) { 2265#ifdef CONFIG_REISERFS_CHECK 2266 repeat_counter++; 2267 if ((repeat_counter % 10000) == 0) { 2268 reiserfs_warning(p_s_tb->tb_sb, 2269 "wait_tb_buffers_until_released(): too many " 2270 "iterations waiting for buffer to unlock " 2271 "(%b)", locked); 2272 2273 /* Don't loop forever. Try to recover from possible error. */ 2274 2275 return (FILESYSTEM_CHANGED_TB(p_s_tb)) ? 2276 REPEAT_SEARCH : CARRY_ON; 2277 } 2278#endif 2279 __wait_on_buffer(locked); 2280 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 2281 return REPEAT_SEARCH; 2282 } 2283 } 2284 2285 } while (locked); 2286 2287 return CARRY_ON; 2288} 2289 2290/* Prepare for balancing, that is 2291 * get all necessary parents, and neighbors; 2292 * analyze what and where should be moved; 2293 * get sufficient number of new nodes; 2294 * Balancing will start only after all resources will be collected at a time. 2295 * 2296 * When ported to SMP kernels, only at the last moment after all needed nodes 2297 * are collected in cache, will the resources be locked using the usual 2298 * textbook ordered lock acquisition algorithms. Note that ensuring that 2299 * this code neither write locks what it does not need to write lock nor locks out of order 2300 * will be a pain in the butt that could have been avoided. Grumble grumble. -Hans 2301 * 2302 * fix is meant in the sense of render unchanging 2303 * 2304 * Latency might be improved by first gathering a list of what buffers are needed 2305 * and then getting as many of them in parallel as possible? -Hans 2306 * 2307 * Parameters: 2308 * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append) 2309 * tb tree_balance structure; 2310 * inum item number in S[h]; 2311 * pos_in_item - comment this if you can 2312 * ins_ih & ins_sd are used when inserting 2313 * Returns: 1 - schedule occurred while the function worked; 2314 * 0 - schedule didn't occur while the function worked; 2315 * -1 - if no_disk_space 2316 */ 2317 2318int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_ins_ih, // item head of item being inserted 2319 const void *data // inserted item or data to be pasted 2320 ) 2321{ 2322 int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path); 2323 int n_pos_in_item; 2324 2325 /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared 2326 ** during wait_tb_buffers_run 2327 */ 2328 int wait_tb_buffers_run = 0; 2329 struct buffer_head *p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path); 2330 2331 ++REISERFS_SB(p_s_tb->tb_sb)->s_fix_nodes; 2332 2333 n_pos_in_item = p_s_tb->tb_path->pos_in_item; 2334 2335 p_s_tb->fs_gen = get_generation(p_s_tb->tb_sb); 2336 2337 /* we prepare and log the super here so it will already be in the 2338 ** transaction when do_balance needs to change it. 2339 ** This way do_balance won't have to schedule when trying to prepare 2340 ** the super for logging 2341 */ 2342 reiserfs_prepare_for_journal(p_s_tb->tb_sb, 2343 SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1); 2344 journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb, 2345 SB_BUFFER_WITH_SB(p_s_tb->tb_sb)); 2346 if (FILESYSTEM_CHANGED_TB(p_s_tb)) 2347 return REPEAT_SEARCH; 2348 2349 /* if it possible in indirect_to_direct conversion */ 2350 if (buffer_locked(p_s_tbS0)) { 2351 __wait_on_buffer(p_s_tbS0); 2352 if (FILESYSTEM_CHANGED_TB(p_s_tb)) 2353 return REPEAT_SEARCH; 2354 } 2355#ifdef CONFIG_REISERFS_CHECK 2356 if (cur_tb) { 2357 print_cur_tb("fix_nodes"); 2358 reiserfs_panic(p_s_tb->tb_sb, 2359 "PAP-8305: fix_nodes: there is pending do_balance"); 2360 } 2361 2362 if (!buffer_uptodate(p_s_tbS0) || !B_IS_IN_TREE(p_s_tbS0)) { 2363 reiserfs_panic(p_s_tb->tb_sb, 2364 "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate " 2365 "at the beginning of fix_nodes or not in tree (mode %c)", 2366 p_s_tbS0, p_s_tbS0, n_op_mode); 2367 } 2368 2369 /* Check parameters. */ 2370 switch (n_op_mode) { 2371 case M_INSERT: 2372 if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0)) 2373 reiserfs_panic(p_s_tb->tb_sb, 2374 "PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert", 2375 n_item_num, B_NR_ITEMS(p_s_tbS0)); 2376 break; 2377 case M_PASTE: 2378 case M_DELETE: 2379 case M_CUT: 2380 if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0)) { 2381 print_block(p_s_tbS0, 0, -1, -1); 2382 reiserfs_panic(p_s_tb->tb_sb, 2383 "PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n", 2384 n_item_num, n_op_mode, 2385 p_s_tb->insert_size[0]); 2386 } 2387 break; 2388 default: 2389 reiserfs_panic(p_s_tb->tb_sb, 2390 "PAP-8340: fix_nodes: Incorrect mode of operation"); 2391 } 2392#endif 2393 2394 if (get_mem_for_virtual_node(p_s_tb) == REPEAT_SEARCH) 2395 return REPEAT_SEARCH; 2396 2397 /* Starting from the leaf level; for all levels n_h of the tree. */ 2398 for (n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++) { 2399 if ((n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON) { 2400 goto repeat; 2401 } 2402 2403 if ((n_ret_value = 2404 check_balance(n_op_mode, p_s_tb, n_h, n_item_num, 2405 n_pos_in_item, p_s_ins_ih, 2406 data)) != CARRY_ON) { 2407 if (n_ret_value == NO_BALANCING_NEEDED) { 2408 /* No balancing for higher levels needed. */ 2409 if ((n_ret_value = 2410 get_neighbors(p_s_tb, n_h)) != CARRY_ON) { 2411 goto repeat; 2412 } 2413 if (n_h != MAX_HEIGHT - 1) 2414 p_s_tb->insert_size[n_h + 1] = 0; 2415 /* ok, analysis and resource gathering are complete */ 2416 break; 2417 } 2418 goto repeat; 2419 } 2420 2421 if ((n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON) { 2422 goto repeat; 2423 } 2424 2425 if ((n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON) { 2426 goto repeat; /* No disk space, or schedule occurred and 2427 analysis may be invalid and needs to be redone. */ 2428 } 2429 2430 if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h)) { 2431 /* We have a positive insert size but no nodes exist on this 2432 level, this means that we are creating a new root. */ 2433 2434 RFALSE(p_s_tb->blknum[n_h] != 1, 2435 "PAP-8350: creating new empty root"); 2436 2437 if (n_h < MAX_HEIGHT - 1) 2438 p_s_tb->insert_size[n_h + 1] = 0; 2439 } else if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1)) { 2440 if (p_s_tb->blknum[n_h] > 1) { 2441 /* The tree needs to be grown, so this node S[n_h] 2442 which is the root node is split into two nodes, 2443 and a new node (S[n_h+1]) will be created to 2444 become the root node. */ 2445 2446 RFALSE(n_h == MAX_HEIGHT - 1, 2447 "PAP-8355: attempt to create too high of a tree"); 2448 2449 p_s_tb->insert_size[n_h + 1] = 2450 (DC_SIZE + 2451 KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) + 2452 DC_SIZE; 2453 } else if (n_h < MAX_HEIGHT - 1) 2454 p_s_tb->insert_size[n_h + 1] = 0; 2455 } else 2456 p_s_tb->insert_size[n_h + 1] = 2457 (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1); 2458 } 2459 2460 if ((n_ret_value = wait_tb_buffers_until_unlocked(p_s_tb)) == CARRY_ON) { 2461 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 2462 wait_tb_buffers_run = 1; 2463 n_ret_value = REPEAT_SEARCH; 2464 goto repeat; 2465 } else { 2466 return CARRY_ON; 2467 } 2468 } else { 2469 wait_tb_buffers_run = 1; 2470 goto repeat; 2471 } 2472 2473 repeat: 2474 // fix_nodes was unable to perform its calculation due to 2475 // filesystem got changed under us, lack of free disk space or i/o 2476 // failure. If the first is the case - the search will be 2477 // repeated. For now - free all resources acquired so far except 2478 // for the new allocated nodes 2479 { 2480 int i; 2481 2482 /* Release path buffers. */ 2483 if (wait_tb_buffers_run) { 2484 pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path); 2485 } else { 2486 pathrelse(p_s_tb->tb_path); 2487 } 2488 /* brelse all resources collected for balancing */ 2489 for (i = 0; i < MAX_HEIGHT; i++) { 2490 if (wait_tb_buffers_run) { 2491 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2492 p_s_tb->L[i]); 2493 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2494 p_s_tb->R[i]); 2495 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2496 p_s_tb->FL[i]); 2497 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2498 p_s_tb->FR[i]); 2499 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2500 p_s_tb-> 2501 CFL[i]); 2502 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2503 p_s_tb-> 2504 CFR[i]); 2505 } 2506 2507 brelse(p_s_tb->L[i]); 2508 p_s_tb->L[i] = NULL; 2509 brelse(p_s_tb->R[i]); 2510 p_s_tb->R[i] = NULL; 2511 brelse(p_s_tb->FL[i]); 2512 p_s_tb->FL[i] = NULL; 2513 brelse(p_s_tb->FR[i]); 2514 p_s_tb->FR[i] = NULL; 2515 brelse(p_s_tb->CFL[i]); 2516 p_s_tb->CFL[i] = NULL; 2517 brelse(p_s_tb->CFR[i]); 2518 p_s_tb->CFR[i] = NULL; 2519 } 2520 2521 if (wait_tb_buffers_run) { 2522 for (i = 0; i < MAX_FEB_SIZE; i++) { 2523 if (p_s_tb->FEB[i]) { 2524 reiserfs_restore_prepared_buffer 2525 (p_s_tb->tb_sb, p_s_tb->FEB[i]); 2526 } 2527 } 2528 } 2529 return n_ret_value; 2530 } 2531 2532} 2533 2534/* Anatoly will probably forgive me renaming p_s_tb to tb. I just 2535 wanted to make lines shorter */ 2536void unfix_nodes(struct tree_balance *tb) 2537{ 2538 int i; 2539 2540 /* Release path buffers. */ 2541 pathrelse_and_restore(tb->tb_sb, tb->tb_path); 2542 2543 /* brelse all resources collected for balancing */ 2544 for (i = 0; i < MAX_HEIGHT; i++) { 2545 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]); 2546 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]); 2547 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]); 2548 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]); 2549 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]); 2550 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]); 2551 2552 brelse(tb->L[i]); 2553 brelse(tb->R[i]); 2554 brelse(tb->FL[i]); 2555 brelse(tb->FR[i]); 2556 brelse(tb->CFL[i]); 2557 brelse(tb->CFR[i]); 2558 } 2559 2560 /* deal with list of allocated (used and unused) nodes */ 2561 for (i = 0; i < MAX_FEB_SIZE; i++) { 2562 if (tb->FEB[i]) { 2563 b_blocknr_t blocknr = tb->FEB[i]->b_blocknr; 2564 /* de-allocated block which was not used by balancing and 2565 bforget about buffer for it */ 2566 brelse(tb->FEB[i]); 2567 reiserfs_free_block(tb->transaction_handle, NULL, 2568 blocknr, 0); 2569 } 2570 if (tb->used[i]) { 2571 /* release used as new nodes including a new root */ 2572 brelse(tb->used[i]); 2573 } 2574 } 2575 2576 kfree(tb->vn_buf); 2577 2578} 2579