1/* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18#include "xfs.h" 19#include "xfs_fs.h" 20#include "xfs_types.h" 21#include "xfs_bit.h" 22#include "xfs_log.h" 23#include "xfs_inum.h" 24#include "xfs_trans.h" 25#include "xfs_sb.h" 26#include "xfs_ag.h" 27#include "xfs_dir2.h" 28#include "xfs_dmapi.h" 29#include "xfs_mount.h" 30#include "xfs_da_btree.h" 31#include "xfs_bmap_btree.h" 32#include "xfs_alloc_btree.h" 33#include "xfs_ialloc_btree.h" 34#include "xfs_dir2_sf.h" 35#include "xfs_attr_sf.h" 36#include "xfs_dinode.h" 37#include "xfs_inode.h" 38#include "xfs_inode_item.h" 39#include "xfs_alloc.h" 40#include "xfs_btree.h" 41#include "xfs_bmap.h" 42#include "xfs_attr.h" 43#include "xfs_attr_leaf.h" 44#include "xfs_dir2_data.h" 45#include "xfs_dir2_leaf.h" 46#include "xfs_dir2_block.h" 47#include "xfs_dir2_node.h" 48#include "xfs_error.h" 49 50/* 51 * xfs_da_btree.c 52 * 53 * Routines to implement directories as Btrees of hashed names. 54 */ 55 56/*======================================================================== 57 * Function prototypes for the kernel. 58 *========================================================================*/ 59 60/* 61 * Routines used for growing the Btree. 62 */ 63STATIC int xfs_da_root_split(xfs_da_state_t *state, 64 xfs_da_state_blk_t *existing_root, 65 xfs_da_state_blk_t *new_child); 66STATIC int xfs_da_node_split(xfs_da_state_t *state, 67 xfs_da_state_blk_t *existing_blk, 68 xfs_da_state_blk_t *split_blk, 69 xfs_da_state_blk_t *blk_to_add, 70 int treelevel, 71 int *result); 72STATIC void xfs_da_node_rebalance(xfs_da_state_t *state, 73 xfs_da_state_blk_t *node_blk_1, 74 xfs_da_state_blk_t *node_blk_2); 75STATIC void xfs_da_node_add(xfs_da_state_t *state, 76 xfs_da_state_blk_t *old_node_blk, 77 xfs_da_state_blk_t *new_node_blk); 78 79/* 80 * Routines used for shrinking the Btree. 81 */ 82STATIC int xfs_da_root_join(xfs_da_state_t *state, 83 xfs_da_state_blk_t *root_blk); 84STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval); 85STATIC void xfs_da_node_remove(xfs_da_state_t *state, 86 xfs_da_state_blk_t *drop_blk); 87STATIC void xfs_da_node_unbalance(xfs_da_state_t *state, 88 xfs_da_state_blk_t *src_node_blk, 89 xfs_da_state_blk_t *dst_node_blk); 90 91/* 92 * Utility routines. 93 */ 94STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count); 95STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp); 96STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra); 97STATIC int xfs_da_blk_unlink(xfs_da_state_t *state, 98 xfs_da_state_blk_t *drop_blk, 99 xfs_da_state_blk_t *save_blk); 100STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state); 101 102/*======================================================================== 103 * Routines used for growing the Btree. 104 *========================================================================*/ 105 106/* 107 * Create the initial contents of an intermediate node. 108 */ 109int 110xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, 111 xfs_dabuf_t **bpp, int whichfork) 112{ 113 xfs_da_intnode_t *node; 114 xfs_dabuf_t *bp; 115 int error; 116 xfs_trans_t *tp; 117 118 tp = args->trans; 119 error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork); 120 if (error) 121 return(error); 122 ASSERT(bp != NULL); 123 node = bp->data; 124 node->hdr.info.forw = 0; 125 node->hdr.info.back = 0; 126 node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC); 127 node->hdr.info.pad = 0; 128 node->hdr.count = 0; 129 node->hdr.level = cpu_to_be16(level); 130 131 xfs_da_log_buf(tp, bp, 132 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); 133 134 *bpp = bp; 135 return(0); 136} 137 138/* 139 * Split a leaf node, rebalance, then possibly split 140 * intermediate nodes, rebalance, etc. 141 */ 142int /* error */ 143xfs_da_split(xfs_da_state_t *state) 144{ 145 xfs_da_state_blk_t *oldblk, *newblk, *addblk; 146 xfs_da_intnode_t *node; 147 xfs_dabuf_t *bp; 148 int max, action, error, i; 149 150 /* 151 * Walk back up the tree splitting/inserting/adjusting as necessary. 152 * If we need to insert and there isn't room, split the node, then 153 * decide which fragment to insert the new block from below into. 154 * Note that we may split the root this way, but we need more fixup. 155 */ 156 max = state->path.active - 1; 157 ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH)); 158 ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC || 159 state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); 160 161 addblk = &state->path.blk[max]; /* initial dummy value */ 162 for (i = max; (i >= 0) && addblk; state->path.active--, i--) { 163 oldblk = &state->path.blk[i]; 164 newblk = &state->altpath.blk[i]; 165 166 /* 167 * If a leaf node then 168 * Allocate a new leaf node, then rebalance across them. 169 * else if an intermediate node then 170 * We split on the last layer, must we split the node? 171 */ 172 switch (oldblk->magic) { 173 case XFS_ATTR_LEAF_MAGIC: 174 error = xfs_attr_leaf_split(state, oldblk, newblk); 175 if ((error != 0) && (error != ENOSPC)) { 176 return(error); /* GROT: attr is inconsistent */ 177 } 178 if (!error) { 179 addblk = newblk; 180 break; 181 } 182 /* 183 * Entry wouldn't fit, split the leaf again. 184 */ 185 state->extravalid = 1; 186 if (state->inleaf) { 187 state->extraafter = 0; /* before newblk */ 188 error = xfs_attr_leaf_split(state, oldblk, 189 &state->extrablk); 190 } else { 191 state->extraafter = 1; /* after newblk */ 192 error = xfs_attr_leaf_split(state, newblk, 193 &state->extrablk); 194 } 195 if (error) 196 return(error); /* GROT: attr inconsistent */ 197 addblk = newblk; 198 break; 199 case XFS_DIR2_LEAFN_MAGIC: 200 error = xfs_dir2_leafn_split(state, oldblk, newblk); 201 if (error) 202 return error; 203 addblk = newblk; 204 break; 205 case XFS_DA_NODE_MAGIC: 206 error = xfs_da_node_split(state, oldblk, newblk, addblk, 207 max - i, &action); 208 xfs_da_buf_done(addblk->bp); 209 addblk->bp = NULL; 210 if (error) 211 return(error); /* GROT: dir is inconsistent */ 212 /* 213 * Record the newly split block for the next time thru? 214 */ 215 if (action) 216 addblk = newblk; 217 else 218 addblk = NULL; 219 break; 220 } 221 222 /* 223 * Update the btree to show the new hashval for this child. 224 */ 225 xfs_da_fixhashpath(state, &state->path); 226 /* 227 * If we won't need this block again, it's getting dropped 228 * from the active path by the loop control, so we need 229 * to mark it done now. 230 */ 231 if (i > 0 || !addblk) 232 xfs_da_buf_done(oldblk->bp); 233 } 234 if (!addblk) 235 return(0); 236 237 /* 238 * Split the root node. 239 */ 240 ASSERT(state->path.active == 0); 241 oldblk = &state->path.blk[0]; 242 error = xfs_da_root_split(state, oldblk, addblk); 243 if (error) { 244 xfs_da_buf_done(oldblk->bp); 245 xfs_da_buf_done(addblk->bp); 246 addblk->bp = NULL; 247 return(error); /* GROT: dir is inconsistent */ 248 } 249 250 /* 251 * Update pointers to the node which used to be block 0 and 252 * just got bumped because of the addition of a new root node. 253 * There might be three blocks involved if a double split occurred, 254 * and the original block 0 could be at any position in the list. 255 */ 256 257 node = oldblk->bp->data; 258 if (node->hdr.info.forw) { 259 if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) { 260 bp = addblk->bp; 261 } else { 262 ASSERT(state->extravalid); 263 bp = state->extrablk.bp; 264 } 265 node = bp->data; 266 node->hdr.info.back = cpu_to_be32(oldblk->blkno); 267 xfs_da_log_buf(state->args->trans, bp, 268 XFS_DA_LOGRANGE(node, &node->hdr.info, 269 sizeof(node->hdr.info))); 270 } 271 node = oldblk->bp->data; 272 if (node->hdr.info.back) { 273 if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) { 274 bp = addblk->bp; 275 } else { 276 ASSERT(state->extravalid); 277 bp = state->extrablk.bp; 278 } 279 node = bp->data; 280 node->hdr.info.forw = cpu_to_be32(oldblk->blkno); 281 xfs_da_log_buf(state->args->trans, bp, 282 XFS_DA_LOGRANGE(node, &node->hdr.info, 283 sizeof(node->hdr.info))); 284 } 285 xfs_da_buf_done(oldblk->bp); 286 xfs_da_buf_done(addblk->bp); 287 addblk->bp = NULL; 288 return(0); 289} 290 291/* 292 * Split the root. We have to create a new root and point to the two 293 * parts (the split old root) that we just created. Copy block zero to 294 * the EOF, extending the inode in process. 295 */ 296STATIC int /* error */ 297xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, 298 xfs_da_state_blk_t *blk2) 299{ 300 xfs_da_intnode_t *node, *oldroot; 301 xfs_da_args_t *args; 302 xfs_dablk_t blkno; 303 xfs_dabuf_t *bp; 304 int error, size; 305 xfs_inode_t *dp; 306 xfs_trans_t *tp; 307 xfs_mount_t *mp; 308 xfs_dir2_leaf_t *leaf; 309 310 /* 311 * Copy the existing (incorrect) block from the root node position 312 * to a free space somewhere. 313 */ 314 args = state->args; 315 ASSERT(args != NULL); 316 error = xfs_da_grow_inode(args, &blkno); 317 if (error) 318 return(error); 319 dp = args->dp; 320 tp = args->trans; 321 mp = state->mp; 322 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); 323 if (error) 324 return(error); 325 ASSERT(bp != NULL); 326 node = bp->data; 327 oldroot = blk1->bp->data; 328 if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) { 329 size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] - 330 (char *)oldroot); 331 } else { 332 ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 333 leaf = (xfs_dir2_leaf_t *)oldroot; 334 size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] - 335 (char *)leaf); 336 } 337 memcpy(node, oldroot, size); 338 xfs_da_log_buf(tp, bp, 0, size - 1); 339 xfs_da_buf_done(blk1->bp); 340 blk1->bp = bp; 341 blk1->blkno = blkno; 342 343 /* 344 * Set up the new root node. 345 */ 346 error = xfs_da_node_create(args, 347 (args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0, 348 be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork); 349 if (error) 350 return(error); 351 node = bp->data; 352 node->btree[0].hashval = cpu_to_be32(blk1->hashval); 353 node->btree[0].before = cpu_to_be32(blk1->blkno); 354 node->btree[1].hashval = cpu_to_be32(blk2->hashval); 355 node->btree[1].before = cpu_to_be32(blk2->blkno); 356 node->hdr.count = cpu_to_be16(2); 357 358#ifdef DEBUG 359 if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) { 360 ASSERT(blk1->blkno >= mp->m_dirleafblk && 361 blk1->blkno < mp->m_dirfreeblk); 362 ASSERT(blk2->blkno >= mp->m_dirleafblk && 363 blk2->blkno < mp->m_dirfreeblk); 364 } 365#endif 366 367 /* Header is already logged by xfs_da_node_create */ 368 xfs_da_log_buf(tp, bp, 369 XFS_DA_LOGRANGE(node, node->btree, 370 sizeof(xfs_da_node_entry_t) * 2)); 371 xfs_da_buf_done(bp); 372 373 return(0); 374} 375 376/* 377 * Split the node, rebalance, then add the new entry. 378 */ 379STATIC int /* error */ 380xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, 381 xfs_da_state_blk_t *newblk, 382 xfs_da_state_blk_t *addblk, 383 int treelevel, int *result) 384{ 385 xfs_da_intnode_t *node; 386 xfs_dablk_t blkno; 387 int newcount, error; 388 int useextra; 389 390 node = oldblk->bp->data; 391 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 392 393 /* 394 * With V2 dirs the extra block is data or freespace. 395 */ 396 useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK; 397 newcount = 1 + useextra; 398 /* 399 * Do we have to split the node? 400 */ 401 if ((be16_to_cpu(node->hdr.count) + newcount) > state->node_ents) { 402 /* 403 * Allocate a new node, add to the doubly linked chain of 404 * nodes, then move some of our excess entries into it. 405 */ 406 error = xfs_da_grow_inode(state->args, &blkno); 407 if (error) 408 return(error); /* GROT: dir is inconsistent */ 409 410 error = xfs_da_node_create(state->args, blkno, treelevel, 411 &newblk->bp, state->args->whichfork); 412 if (error) 413 return(error); /* GROT: dir is inconsistent */ 414 newblk->blkno = blkno; 415 newblk->magic = XFS_DA_NODE_MAGIC; 416 xfs_da_node_rebalance(state, oldblk, newblk); 417 error = xfs_da_blk_link(state, oldblk, newblk); 418 if (error) 419 return(error); 420 *result = 1; 421 } else { 422 *result = 0; 423 } 424 425 /* 426 * Insert the new entry(s) into the correct block 427 * (updating last hashval in the process). 428 * 429 * xfs_da_node_add() inserts BEFORE the given index, 430 * and as a result of using node_lookup_int() we always 431 * point to a valid entry (not after one), but a split 432 * operation always results in a new block whose hashvals 433 * FOLLOW the current block. 434 * 435 * If we had double-split op below us, then add the extra block too. 436 */ 437 node = oldblk->bp->data; 438 if (oldblk->index <= be16_to_cpu(node->hdr.count)) { 439 oldblk->index++; 440 xfs_da_node_add(state, oldblk, addblk); 441 if (useextra) { 442 if (state->extraafter) 443 oldblk->index++; 444 xfs_da_node_add(state, oldblk, &state->extrablk); 445 state->extravalid = 0; 446 } 447 } else { 448 newblk->index++; 449 xfs_da_node_add(state, newblk, addblk); 450 if (useextra) { 451 if (state->extraafter) 452 newblk->index++; 453 xfs_da_node_add(state, newblk, &state->extrablk); 454 state->extravalid = 0; 455 } 456 } 457 458 return(0); 459} 460 461/* 462 * Balance the btree elements between two intermediate nodes, 463 * usually one full and one empty. 464 * 465 * NOTE: if blk2 is empty, then it will get the upper half of blk1. 466 */ 467STATIC void 468xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, 469 xfs_da_state_blk_t *blk2) 470{ 471 xfs_da_intnode_t *node1, *node2, *tmpnode; 472 xfs_da_node_entry_t *btree_s, *btree_d; 473 int count, tmp; 474 xfs_trans_t *tp; 475 476 node1 = blk1->bp->data; 477 node2 = blk2->bp->data; 478 /* 479 * Figure out how many entries need to move, and in which direction. 480 * Swap the nodes around if that makes it simpler. 481 */ 482 if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) && 483 ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) || 484 (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) < 485 be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) { 486 tmpnode = node1; 487 node1 = node2; 488 node2 = tmpnode; 489 } 490 ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC); 491 ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC); 492 count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2; 493 if (count == 0) 494 return; 495 tp = state->args->trans; 496 /* 497 * Two cases: high-to-low and low-to-high. 498 */ 499 if (count > 0) { 500 /* 501 * Move elements in node2 up to make a hole. 502 */ 503 if ((tmp = be16_to_cpu(node2->hdr.count)) > 0) { 504 tmp *= (uint)sizeof(xfs_da_node_entry_t); 505 btree_s = &node2->btree[0]; 506 btree_d = &node2->btree[count]; 507 memmove(btree_d, btree_s, tmp); 508 } 509 510 /* 511 * Move the req'd B-tree elements from high in node1 to 512 * low in node2. 513 */ 514 be16_add(&node2->hdr.count, count); 515 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 516 btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count]; 517 btree_d = &node2->btree[0]; 518 memcpy(btree_d, btree_s, tmp); 519 be16_add(&node1->hdr.count, -count); 520 } else { 521 /* 522 * Move the req'd B-tree elements from low in node2 to 523 * high in node1. 524 */ 525 count = -count; 526 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 527 btree_s = &node2->btree[0]; 528 btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)]; 529 memcpy(btree_d, btree_s, tmp); 530 be16_add(&node1->hdr.count, count); 531 xfs_da_log_buf(tp, blk1->bp, 532 XFS_DA_LOGRANGE(node1, btree_d, tmp)); 533 534 /* 535 * Move elements in node2 down to fill the hole. 536 */ 537 tmp = be16_to_cpu(node2->hdr.count) - count; 538 tmp *= (uint)sizeof(xfs_da_node_entry_t); 539 btree_s = &node2->btree[count]; 540 btree_d = &node2->btree[0]; 541 memmove(btree_d, btree_s, tmp); 542 be16_add(&node2->hdr.count, -count); 543 } 544 545 /* 546 * Log header of node 1 and all current bits of node 2. 547 */ 548 xfs_da_log_buf(tp, blk1->bp, 549 XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr))); 550 xfs_da_log_buf(tp, blk2->bp, 551 XFS_DA_LOGRANGE(node2, &node2->hdr, 552 sizeof(node2->hdr) + 553 sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count))); 554 555 /* 556 * Record the last hashval from each block for upward propagation. 557 * (note: don't use the swapped node pointers) 558 */ 559 node1 = blk1->bp->data; 560 node2 = blk2->bp->data; 561 blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval); 562 blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval); 563 564 /* 565 * Adjust the expected index for insertion. 566 */ 567 if (blk1->index >= be16_to_cpu(node1->hdr.count)) { 568 blk2->index = blk1->index - be16_to_cpu(node1->hdr.count); 569 blk1->index = be16_to_cpu(node1->hdr.count) + 1; /* make it invalid */ 570 } 571} 572 573/* 574 * Add a new entry to an intermediate node. 575 */ 576STATIC void 577xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, 578 xfs_da_state_blk_t *newblk) 579{ 580 xfs_da_intnode_t *node; 581 xfs_da_node_entry_t *btree; 582 int tmp; 583 xfs_mount_t *mp; 584 585 node = oldblk->bp->data; 586 mp = state->mp; 587 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 588 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); 589 ASSERT(newblk->blkno != 0); 590 if (state->args->whichfork == XFS_DATA_FORK) 591 ASSERT(newblk->blkno >= mp->m_dirleafblk && 592 newblk->blkno < mp->m_dirfreeblk); 593 594 /* 595 * We may need to make some room before we insert the new node. 596 */ 597 tmp = 0; 598 btree = &node->btree[ oldblk->index ]; 599 if (oldblk->index < be16_to_cpu(node->hdr.count)) { 600 tmp = (be16_to_cpu(node->hdr.count) - oldblk->index) * (uint)sizeof(*btree); 601 memmove(btree + 1, btree, tmp); 602 } 603 btree->hashval = cpu_to_be32(newblk->hashval); 604 btree->before = cpu_to_be32(newblk->blkno); 605 xfs_da_log_buf(state->args->trans, oldblk->bp, 606 XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree))); 607 be16_add(&node->hdr.count, 1); 608 xfs_da_log_buf(state->args->trans, oldblk->bp, 609 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); 610 611 /* 612 * Copy the last hash value from the oldblk to propagate upwards. 613 */ 614 oldblk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1 ].hashval); 615} 616 617/*======================================================================== 618 * Routines used for shrinking the Btree. 619 *========================================================================*/ 620 621/* 622 * Deallocate an empty leaf node, remove it from its parent, 623 * possibly deallocating that block, etc... 624 */ 625int 626xfs_da_join(xfs_da_state_t *state) 627{ 628 xfs_da_state_blk_t *drop_blk, *save_blk; 629 int action, error; 630 631 action = 0; 632 drop_blk = &state->path.blk[ state->path.active-1 ]; 633 save_blk = &state->altpath.blk[ state->path.active-1 ]; 634 ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); 635 ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC || 636 drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); 637 638 /* 639 * Walk back up the tree joining/deallocating as necessary. 640 * When we stop dropping blocks, break out. 641 */ 642 for ( ; state->path.active >= 2; drop_blk--, save_blk--, 643 state->path.active--) { 644 /* 645 * See if we can combine the block with a neighbor. 646 * (action == 0) => no options, just leave 647 * (action == 1) => coalesce, then unlink 648 * (action == 2) => block empty, unlink it 649 */ 650 switch (drop_blk->magic) { 651 case XFS_ATTR_LEAF_MAGIC: 652 error = xfs_attr_leaf_toosmall(state, &action); 653 if (error) 654 return(error); 655 if (action == 0) 656 return(0); 657 xfs_attr_leaf_unbalance(state, drop_blk, save_blk); 658 break; 659 case XFS_DIR2_LEAFN_MAGIC: 660 error = xfs_dir2_leafn_toosmall(state, &action); 661 if (error) 662 return error; 663 if (action == 0) 664 return 0; 665 xfs_dir2_leafn_unbalance(state, drop_blk, save_blk); 666 break; 667 case XFS_DA_NODE_MAGIC: 668 /* 669 * Remove the offending node, fixup hashvals, 670 * check for a toosmall neighbor. 671 */ 672 xfs_da_node_remove(state, drop_blk); 673 xfs_da_fixhashpath(state, &state->path); 674 error = xfs_da_node_toosmall(state, &action); 675 if (error) 676 return(error); 677 if (action == 0) 678 return 0; 679 xfs_da_node_unbalance(state, drop_blk, save_blk); 680 break; 681 } 682 xfs_da_fixhashpath(state, &state->altpath); 683 error = xfs_da_blk_unlink(state, drop_blk, save_blk); 684 xfs_da_state_kill_altpath(state); 685 if (error) 686 return(error); 687 error = xfs_da_shrink_inode(state->args, drop_blk->blkno, 688 drop_blk->bp); 689 drop_blk->bp = NULL; 690 if (error) 691 return(error); 692 } 693 /* 694 * We joined all the way to the top. If it turns out that 695 * we only have one entry in the root, make the child block 696 * the new root. 697 */ 698 xfs_da_node_remove(state, drop_blk); 699 xfs_da_fixhashpath(state, &state->path); 700 error = xfs_da_root_join(state, &state->path.blk[0]); 701 return(error); 702} 703 704/* 705 * We have only one entry in the root. Copy the only remaining child of 706 * the old root to block 0 as the new root node. 707 */ 708STATIC int 709xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) 710{ 711 xfs_da_intnode_t *oldroot; 712 /* REFERENCED */ 713 xfs_da_blkinfo_t *blkinfo; 714 xfs_da_args_t *args; 715 xfs_dablk_t child; 716 xfs_dabuf_t *bp; 717 int error; 718 719 args = state->args; 720 ASSERT(args != NULL); 721 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); 722 oldroot = root_blk->bp->data; 723 ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC); 724 ASSERT(!oldroot->hdr.info.forw); 725 ASSERT(!oldroot->hdr.info.back); 726 727 /* 728 * If the root has more than one child, then don't do anything. 729 */ 730 if (be16_to_cpu(oldroot->hdr.count) > 1) 731 return(0); 732 733 /* 734 * Read in the (only) child block, then copy those bytes into 735 * the root block's buffer and free the original child block. 736 */ 737 child = be32_to_cpu(oldroot->btree[0].before); 738 ASSERT(child != 0); 739 error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp, 740 args->whichfork); 741 if (error) 742 return(error); 743 ASSERT(bp != NULL); 744 blkinfo = bp->data; 745 if (be16_to_cpu(oldroot->hdr.level) == 1) { 746 ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC || 747 be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC); 748 } else { 749 ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC); 750 } 751 ASSERT(!blkinfo->forw); 752 ASSERT(!blkinfo->back); 753 memcpy(root_blk->bp->data, bp->data, state->blocksize); 754 xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); 755 error = xfs_da_shrink_inode(args, child, bp); 756 return(error); 757} 758 759/* 760 * Check a node block and its neighbors to see if the block should be 761 * collapsed into one or the other neighbor. Always keep the block 762 * with the smaller block number. 763 * If the current block is over 50% full, don't try to join it, return 0. 764 * If the block is empty, fill in the state structure and return 2. 765 * If it can be collapsed, fill in the state structure and return 1. 766 * If nothing can be done, return 0. 767 */ 768STATIC int 769xfs_da_node_toosmall(xfs_da_state_t *state, int *action) 770{ 771 xfs_da_intnode_t *node; 772 xfs_da_state_blk_t *blk; 773 xfs_da_blkinfo_t *info; 774 int count, forward, error, retval, i; 775 xfs_dablk_t blkno; 776 xfs_dabuf_t *bp; 777 778 /* 779 * Check for the degenerate case of the block being over 50% full. 780 * If so, it's not worth even looking to see if we might be able 781 * to coalesce with a sibling. 782 */ 783 blk = &state->path.blk[ state->path.active-1 ]; 784 info = blk->bp->data; 785 ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC); 786 node = (xfs_da_intnode_t *)info; 787 count = be16_to_cpu(node->hdr.count); 788 if (count > (state->node_ents >> 1)) { 789 *action = 0; /* blk over 50%, don't try to join */ 790 return(0); /* blk over 50%, don't try to join */ 791 } 792 793 /* 794 * Check for the degenerate case of the block being empty. 795 * If the block is empty, we'll simply delete it, no need to 796 * coalesce it with a sibling block. We choose (arbitrarily) 797 * to merge with the forward block unless it is NULL. 798 */ 799 if (count == 0) { 800 /* 801 * Make altpath point to the block we want to keep and 802 * path point to the block we want to drop (this one). 803 */ 804 forward = (info->forw != 0); 805 memcpy(&state->altpath, &state->path, sizeof(state->path)); 806 error = xfs_da_path_shift(state, &state->altpath, forward, 807 0, &retval); 808 if (error) 809 return(error); 810 if (retval) { 811 *action = 0; 812 } else { 813 *action = 2; 814 } 815 return(0); 816 } 817 818 /* 819 * Examine each sibling block to see if we can coalesce with 820 * at least 25% free space to spare. We need to figure out 821 * whether to merge with the forward or the backward block. 822 * We prefer coalescing with the lower numbered sibling so as 823 * to shrink a directory over time. 824 */ 825 /* start with smaller blk num */ 826 forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back)); 827 for (i = 0; i < 2; forward = !forward, i++) { 828 if (forward) 829 blkno = be32_to_cpu(info->forw); 830 else 831 blkno = be32_to_cpu(info->back); 832 if (blkno == 0) 833 continue; 834 error = xfs_da_read_buf(state->args->trans, state->args->dp, 835 blkno, -1, &bp, state->args->whichfork); 836 if (error) 837 return(error); 838 ASSERT(bp != NULL); 839 840 node = (xfs_da_intnode_t *)info; 841 count = state->node_ents; 842 count -= state->node_ents >> 2; 843 count -= be16_to_cpu(node->hdr.count); 844 node = bp->data; 845 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 846 count -= be16_to_cpu(node->hdr.count); 847 xfs_da_brelse(state->args->trans, bp); 848 if (count >= 0) 849 break; /* fits with at least 25% to spare */ 850 } 851 if (i >= 2) { 852 *action = 0; 853 return(0); 854 } 855 856 /* 857 * Make altpath point to the block we want to keep (the lower 858 * numbered block) and path point to the block we want to drop. 859 */ 860 memcpy(&state->altpath, &state->path, sizeof(state->path)); 861 if (blkno < blk->blkno) { 862 error = xfs_da_path_shift(state, &state->altpath, forward, 863 0, &retval); 864 if (error) { 865 return(error); 866 } 867 if (retval) { 868 *action = 0; 869 return(0); 870 } 871 } else { 872 error = xfs_da_path_shift(state, &state->path, forward, 873 0, &retval); 874 if (error) { 875 return(error); 876 } 877 if (retval) { 878 *action = 0; 879 return(0); 880 } 881 } 882 *action = 1; 883 return(0); 884} 885 886/* 887 * Walk back up the tree adjusting hash values as necessary, 888 * when we stop making changes, return. 889 */ 890void 891xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path) 892{ 893 xfs_da_state_blk_t *blk; 894 xfs_da_intnode_t *node; 895 xfs_da_node_entry_t *btree; 896 xfs_dahash_t lasthash=0; 897 int level, count; 898 899 level = path->active-1; 900 blk = &path->blk[ level ]; 901 switch (blk->magic) { 902 case XFS_ATTR_LEAF_MAGIC: 903 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count); 904 if (count == 0) 905 return; 906 break; 907 case XFS_DIR2_LEAFN_MAGIC: 908 lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count); 909 if (count == 0) 910 return; 911 break; 912 case XFS_DA_NODE_MAGIC: 913 lasthash = xfs_da_node_lasthash(blk->bp, &count); 914 if (count == 0) 915 return; 916 break; 917 } 918 for (blk--, level--; level >= 0; blk--, level--) { 919 node = blk->bp->data; 920 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 921 btree = &node->btree[ blk->index ]; 922 if (be32_to_cpu(btree->hashval) == lasthash) 923 break; 924 blk->hashval = lasthash; 925 btree->hashval = cpu_to_be32(lasthash); 926 xfs_da_log_buf(state->args->trans, blk->bp, 927 XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); 928 929 lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval); 930 } 931} 932 933/* 934 * Remove an entry from an intermediate node. 935 */ 936STATIC void 937xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk) 938{ 939 xfs_da_intnode_t *node; 940 xfs_da_node_entry_t *btree; 941 int tmp; 942 943 node = drop_blk->bp->data; 944 ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count)); 945 ASSERT(drop_blk->index >= 0); 946 947 /* 948 * Copy over the offending entry, or just zero it out. 949 */ 950 btree = &node->btree[drop_blk->index]; 951 if (drop_blk->index < (be16_to_cpu(node->hdr.count)-1)) { 952 tmp = be16_to_cpu(node->hdr.count) - drop_blk->index - 1; 953 tmp *= (uint)sizeof(xfs_da_node_entry_t); 954 memmove(btree, btree + 1, tmp); 955 xfs_da_log_buf(state->args->trans, drop_blk->bp, 956 XFS_DA_LOGRANGE(node, btree, tmp)); 957 btree = &node->btree[be16_to_cpu(node->hdr.count)-1]; 958 } 959 memset((char *)btree, 0, sizeof(xfs_da_node_entry_t)); 960 xfs_da_log_buf(state->args->trans, drop_blk->bp, 961 XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); 962 be16_add(&node->hdr.count, -1); 963 xfs_da_log_buf(state->args->trans, drop_blk->bp, 964 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); 965 966 /* 967 * Copy the last hash value from the block to propagate upwards. 968 */ 969 btree--; 970 drop_blk->hashval = be32_to_cpu(btree->hashval); 971} 972 973/* 974 * Unbalance the btree elements between two intermediate nodes, 975 * move all Btree elements from one node into another. 976 */ 977STATIC void 978xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, 979 xfs_da_state_blk_t *save_blk) 980{ 981 xfs_da_intnode_t *drop_node, *save_node; 982 xfs_da_node_entry_t *btree; 983 int tmp; 984 xfs_trans_t *tp; 985 986 drop_node = drop_blk->bp->data; 987 save_node = save_blk->bp->data; 988 ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 989 ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 990 tp = state->args->trans; 991 992 /* 993 * If the dying block has lower hashvals, then move all the 994 * elements in the remaining block up to make a hole. 995 */ 996 if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) || 997 (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) < 998 be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval))) 999 { 1000 btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)]; 1001 tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t); 1002 memmove(btree, &save_node->btree[0], tmp); 1003 btree = &save_node->btree[0]; 1004 xfs_da_log_buf(tp, save_blk->bp, 1005 XFS_DA_LOGRANGE(save_node, btree, 1006 (be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) * 1007 sizeof(xfs_da_node_entry_t))); 1008 } else { 1009 btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)]; 1010 xfs_da_log_buf(tp, save_blk->bp, 1011 XFS_DA_LOGRANGE(save_node, btree, 1012 be16_to_cpu(drop_node->hdr.count) * 1013 sizeof(xfs_da_node_entry_t))); 1014 } 1015 1016 /* 1017 * Move all the B-tree elements from drop_blk to save_blk. 1018 */ 1019 tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t); 1020 memcpy(btree, &drop_node->btree[0], tmp); 1021 be16_add(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count)); 1022 1023 xfs_da_log_buf(tp, save_blk->bp, 1024 XFS_DA_LOGRANGE(save_node, &save_node->hdr, 1025 sizeof(save_node->hdr))); 1026 1027 /* 1028 * Save the last hashval in the remaining block for upward propagation. 1029 */ 1030 save_blk->hashval = be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval); 1031} 1032 1033/*======================================================================== 1034 * Routines used for finding things in the Btree. 1035 *========================================================================*/ 1036 1037/* 1038 * Walk down the Btree looking for a particular filename, filling 1039 * in the state structure as we go. 1040 * 1041 * We will set the state structure to point to each of the elements 1042 * in each of the nodes where either the hashval is or should be. 1043 * 1044 * We support duplicate hashval's so for each entry in the current 1045 * node that could contain the desired hashval, descend. This is a 1046 * pruned depth-first tree search. 1047 */ 1048int /* error */ 1049xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) 1050{ 1051 xfs_da_state_blk_t *blk; 1052 xfs_da_blkinfo_t *curr; 1053 xfs_da_intnode_t *node; 1054 xfs_da_node_entry_t *btree; 1055 xfs_dablk_t blkno; 1056 int probe, span, max, error, retval; 1057 xfs_dahash_t hashval, btreehashval; 1058 xfs_da_args_t *args; 1059 1060 args = state->args; 1061 1062 /* 1063 * Descend thru the B-tree searching each level for the right 1064 * node to use, until the right hashval is found. 1065 */ 1066 blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0; 1067 for (blk = &state->path.blk[0], state->path.active = 1; 1068 state->path.active <= XFS_DA_NODE_MAXDEPTH; 1069 blk++, state->path.active++) { 1070 /* 1071 * Read the next node down in the tree. 1072 */ 1073 blk->blkno = blkno; 1074 error = xfs_da_read_buf(args->trans, args->dp, blkno, 1075 -1, &blk->bp, args->whichfork); 1076 if (error) { 1077 blk->blkno = 0; 1078 state->path.active--; 1079 return(error); 1080 } 1081 curr = blk->bp->data; 1082 blk->magic = be16_to_cpu(curr->magic); 1083 ASSERT(blk->magic == XFS_DA_NODE_MAGIC || 1084 blk->magic == XFS_DIR2_LEAFN_MAGIC || 1085 blk->magic == XFS_ATTR_LEAF_MAGIC); 1086 1087 /* 1088 * Search an intermediate node for a match. 1089 */ 1090 if (blk->magic == XFS_DA_NODE_MAGIC) { 1091 node = blk->bp->data; 1092 max = be16_to_cpu(node->hdr.count); 1093 blk->hashval = be32_to_cpu(node->btree[max-1].hashval); 1094 1095 /* 1096 * Binary search. (note: small blocks will skip loop) 1097 */ 1098 probe = span = max / 2; 1099 hashval = args->hashval; 1100 for (btree = &node->btree[probe]; span > 4; 1101 btree = &node->btree[probe]) { 1102 span /= 2; 1103 btreehashval = be32_to_cpu(btree->hashval); 1104 if (btreehashval < hashval) 1105 probe += span; 1106 else if (btreehashval > hashval) 1107 probe -= span; 1108 else 1109 break; 1110 } 1111 ASSERT((probe >= 0) && (probe < max)); 1112 ASSERT((span <= 4) || (be32_to_cpu(btree->hashval) == hashval)); 1113 1114 /* 1115 * Since we may have duplicate hashval's, find the first 1116 * matching hashval in the node. 1117 */ 1118 while ((probe > 0) && (be32_to_cpu(btree->hashval) >= hashval)) { 1119 btree--; 1120 probe--; 1121 } 1122 while ((probe < max) && (be32_to_cpu(btree->hashval) < hashval)) { 1123 btree++; 1124 probe++; 1125 } 1126 1127 /* 1128 * Pick the right block to descend on. 1129 */ 1130 if (probe == max) { 1131 blk->index = max-1; 1132 blkno = be32_to_cpu(node->btree[max-1].before); 1133 } else { 1134 blk->index = probe; 1135 blkno = be32_to_cpu(btree->before); 1136 } 1137 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1138 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); 1139 break; 1140 } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { 1141 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL); 1142 break; 1143 } 1144 } 1145 1146 /* 1147 * A leaf block that ends in the hashval that we are interested in 1148 * (final hashval == search hashval) means that the next block may 1149 * contain more entries with the same hashval, shift upward to the 1150 * next leaf and keep searching. 1151 */ 1152 for (;;) { 1153 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { 1154 retval = xfs_dir2_leafn_lookup_int(blk->bp, args, 1155 &blk->index, state); 1156 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1157 retval = xfs_attr_leaf_lookup_int(blk->bp, args); 1158 blk->index = args->index; 1159 args->blkno = blk->blkno; 1160 } else { 1161 ASSERT(0); 1162 return XFS_ERROR(EFSCORRUPTED); 1163 } 1164 if (((retval == ENOENT) || (retval == ENOATTR)) && 1165 (blk->hashval == args->hashval)) { 1166 error = xfs_da_path_shift(state, &state->path, 1, 1, 1167 &retval); 1168 if (error) 1169 return(error); 1170 if (retval == 0) { 1171 continue; 1172 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1173 /* path_shift() gives ENOENT */ 1174 retval = XFS_ERROR(ENOATTR); 1175 } 1176 } 1177 break; 1178 } 1179 *result = retval; 1180 return(0); 1181} 1182 1183/*======================================================================== 1184 * Utility routines. 1185 *========================================================================*/ 1186 1187/* 1188 * Link a new block into a doubly linked list of blocks (of whatever type). 1189 */ 1190int /* error */ 1191xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, 1192 xfs_da_state_blk_t *new_blk) 1193{ 1194 xfs_da_blkinfo_t *old_info, *new_info, *tmp_info; 1195 xfs_da_args_t *args; 1196 int before=0, error; 1197 xfs_dabuf_t *bp; 1198 1199 /* 1200 * Set up environment. 1201 */ 1202 args = state->args; 1203 ASSERT(args != NULL); 1204 old_info = old_blk->bp->data; 1205 new_info = new_blk->bp->data; 1206 ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || 1207 old_blk->magic == XFS_DIR2_LEAFN_MAGIC || 1208 old_blk->magic == XFS_ATTR_LEAF_MAGIC); 1209 ASSERT(old_blk->magic == be16_to_cpu(old_info->magic)); 1210 ASSERT(new_blk->magic == be16_to_cpu(new_info->magic)); 1211 ASSERT(old_blk->magic == new_blk->magic); 1212 1213 switch (old_blk->magic) { 1214 case XFS_ATTR_LEAF_MAGIC: 1215 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); 1216 break; 1217 case XFS_DIR2_LEAFN_MAGIC: 1218 before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp); 1219 break; 1220 case XFS_DA_NODE_MAGIC: 1221 before = xfs_da_node_order(old_blk->bp, new_blk->bp); 1222 break; 1223 } 1224 1225 /* 1226 * Link blocks in appropriate order. 1227 */ 1228 if (before) { 1229 /* 1230 * Link new block in before existing block. 1231 */ 1232 new_info->forw = cpu_to_be32(old_blk->blkno); 1233 new_info->back = old_info->back; 1234 if (old_info->back) { 1235 error = xfs_da_read_buf(args->trans, args->dp, 1236 be32_to_cpu(old_info->back), 1237 -1, &bp, args->whichfork); 1238 if (error) 1239 return(error); 1240 ASSERT(bp != NULL); 1241 tmp_info = bp->data; 1242 ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic)); 1243 ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno); 1244 tmp_info->forw = cpu_to_be32(new_blk->blkno); 1245 xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); 1246 xfs_da_buf_done(bp); 1247 } 1248 old_info->back = cpu_to_be32(new_blk->blkno); 1249 } else { 1250 /* 1251 * Link new block in after existing block. 1252 */ 1253 new_info->forw = old_info->forw; 1254 new_info->back = cpu_to_be32(old_blk->blkno); 1255 if (old_info->forw) { 1256 error = xfs_da_read_buf(args->trans, args->dp, 1257 be32_to_cpu(old_info->forw), 1258 -1, &bp, args->whichfork); 1259 if (error) 1260 return(error); 1261 ASSERT(bp != NULL); 1262 tmp_info = bp->data; 1263 ASSERT(tmp_info->magic == old_info->magic); 1264 ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno); 1265 tmp_info->back = cpu_to_be32(new_blk->blkno); 1266 xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); 1267 xfs_da_buf_done(bp); 1268 } 1269 old_info->forw = cpu_to_be32(new_blk->blkno); 1270 } 1271 1272 xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); 1273 xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); 1274 return(0); 1275} 1276 1277/* 1278 * Compare two intermediate nodes for "order". 1279 */ 1280STATIC int 1281xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp) 1282{ 1283 xfs_da_intnode_t *node1, *node2; 1284 1285 node1 = node1_bp->data; 1286 node2 = node2_bp->data; 1287 ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) && 1288 (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC)); 1289 if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) && 1290 ((be32_to_cpu(node2->btree[0].hashval) < 1291 be32_to_cpu(node1->btree[0].hashval)) || 1292 (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) < 1293 be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) { 1294 return(1); 1295 } 1296 return(0); 1297} 1298 1299/* 1300 * Pick up the last hashvalue from an intermediate node. 1301 */ 1302STATIC uint 1303xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count) 1304{ 1305 xfs_da_intnode_t *node; 1306 1307 node = bp->data; 1308 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 1309 if (count) 1310 *count = be16_to_cpu(node->hdr.count); 1311 if (!node->hdr.count) 1312 return(0); 1313 return be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval); 1314} 1315 1316/* 1317 * Unlink a block from a doubly linked list of blocks. 1318 */ 1319STATIC int /* error */ 1320xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, 1321 xfs_da_state_blk_t *save_blk) 1322{ 1323 xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info; 1324 xfs_da_args_t *args; 1325 xfs_dabuf_t *bp; 1326 int error; 1327 1328 /* 1329 * Set up environment. 1330 */ 1331 args = state->args; 1332 ASSERT(args != NULL); 1333 save_info = save_blk->bp->data; 1334 drop_info = drop_blk->bp->data; 1335 ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || 1336 save_blk->magic == XFS_DIR2_LEAFN_MAGIC || 1337 save_blk->magic == XFS_ATTR_LEAF_MAGIC); 1338 ASSERT(save_blk->magic == be16_to_cpu(save_info->magic)); 1339 ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic)); 1340 ASSERT(save_blk->magic == drop_blk->magic); 1341 ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) || 1342 (be32_to_cpu(save_info->back) == drop_blk->blkno)); 1343 ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) || 1344 (be32_to_cpu(drop_info->back) == save_blk->blkno)); 1345 1346 /* 1347 * Unlink the leaf block from the doubly linked chain of leaves. 1348 */ 1349 if (be32_to_cpu(save_info->back) == drop_blk->blkno) { 1350 save_info->back = drop_info->back; 1351 if (drop_info->back) { 1352 error = xfs_da_read_buf(args->trans, args->dp, 1353 be32_to_cpu(drop_info->back), 1354 -1, &bp, args->whichfork); 1355 if (error) 1356 return(error); 1357 ASSERT(bp != NULL); 1358 tmp_info = bp->data; 1359 ASSERT(tmp_info->magic == save_info->magic); 1360 ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno); 1361 tmp_info->forw = cpu_to_be32(save_blk->blkno); 1362 xfs_da_log_buf(args->trans, bp, 0, 1363 sizeof(*tmp_info) - 1); 1364 xfs_da_buf_done(bp); 1365 } 1366 } else { 1367 save_info->forw = drop_info->forw; 1368 if (drop_info->forw) { 1369 error = xfs_da_read_buf(args->trans, args->dp, 1370 be32_to_cpu(drop_info->forw), 1371 -1, &bp, args->whichfork); 1372 if (error) 1373 return(error); 1374 ASSERT(bp != NULL); 1375 tmp_info = bp->data; 1376 ASSERT(tmp_info->magic == save_info->magic); 1377 ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno); 1378 tmp_info->back = cpu_to_be32(save_blk->blkno); 1379 xfs_da_log_buf(args->trans, bp, 0, 1380 sizeof(*tmp_info) - 1); 1381 xfs_da_buf_done(bp); 1382 } 1383 } 1384 1385 xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); 1386 return(0); 1387} 1388 1389/* 1390 * Move a path "forward" or "!forward" one block at the current level. 1391 * 1392 * This routine will adjust a "path" to point to the next block 1393 * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the 1394 * Btree, including updating pointers to the intermediate nodes between 1395 * the new bottom and the root. 1396 */ 1397int /* error */ 1398xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, 1399 int forward, int release, int *result) 1400{ 1401 xfs_da_state_blk_t *blk; 1402 xfs_da_blkinfo_t *info; 1403 xfs_da_intnode_t *node; 1404 xfs_da_args_t *args; 1405 xfs_dablk_t blkno=0; 1406 int level, error; 1407 1408 /* 1409 * Roll up the Btree looking for the first block where our 1410 * current index is not at the edge of the block. Note that 1411 * we skip the bottom layer because we want the sibling block. 1412 */ 1413 args = state->args; 1414 ASSERT(args != NULL); 1415 ASSERT(path != NULL); 1416 ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); 1417 level = (path->active-1) - 1; /* skip bottom layer in path */ 1418 for (blk = &path->blk[level]; level >= 0; blk--, level--) { 1419 ASSERT(blk->bp != NULL); 1420 node = blk->bp->data; 1421 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 1422 if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) { 1423 blk->index++; 1424 blkno = be32_to_cpu(node->btree[blk->index].before); 1425 break; 1426 } else if (!forward && (blk->index > 0)) { 1427 blk->index--; 1428 blkno = be32_to_cpu(node->btree[blk->index].before); 1429 break; 1430 } 1431 } 1432 if (level < 0) { 1433 *result = XFS_ERROR(ENOENT); /* we're out of our tree */ 1434 ASSERT(args->oknoent); 1435 return(0); 1436 } 1437 1438 /* 1439 * Roll down the edge of the subtree until we reach the 1440 * same depth we were at originally. 1441 */ 1442 for (blk++, level++; level < path->active; blk++, level++) { 1443 /* 1444 * Release the old block. 1445 * (if it's dirty, trans won't actually let go) 1446 */ 1447 if (release) 1448 xfs_da_brelse(args->trans, blk->bp); 1449 1450 /* 1451 * Read the next child block. 1452 */ 1453 blk->blkno = blkno; 1454 error = xfs_da_read_buf(args->trans, args->dp, blkno, -1, 1455 &blk->bp, args->whichfork); 1456 if (error) 1457 return(error); 1458 ASSERT(blk->bp != NULL); 1459 info = blk->bp->data; 1460 ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC || 1461 be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC || 1462 be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC); 1463 blk->magic = be16_to_cpu(info->magic); 1464 if (blk->magic == XFS_DA_NODE_MAGIC) { 1465 node = (xfs_da_intnode_t *)info; 1466 blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval); 1467 if (forward) 1468 blk->index = 0; 1469 else 1470 blk->index = be16_to_cpu(node->hdr.count)-1; 1471 blkno = be32_to_cpu(node->btree[blk->index].before); 1472 } else { 1473 ASSERT(level == path->active-1); 1474 blk->index = 0; 1475 switch(blk->magic) { 1476 case XFS_ATTR_LEAF_MAGIC: 1477 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, 1478 NULL); 1479 break; 1480 case XFS_DIR2_LEAFN_MAGIC: 1481 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, 1482 NULL); 1483 break; 1484 default: 1485 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC || 1486 blk->magic == XFS_DIR2_LEAFN_MAGIC); 1487 break; 1488 } 1489 } 1490 } 1491 *result = 0; 1492 return(0); 1493} 1494 1495 1496/*======================================================================== 1497 * Utility routines. 1498 *========================================================================*/ 1499 1500/* 1501 * Implement a simple hash on a character string. 1502 * Rotate the hash value by 7 bits, then XOR each character in. 1503 * This is implemented with some source-level loop unrolling. 1504 */ 1505xfs_dahash_t 1506xfs_da_hashname(const uchar_t *name, int namelen) 1507{ 1508 xfs_dahash_t hash; 1509 1510 /* 1511 * Do four characters at a time as long as we can. 1512 */ 1513 for (hash = 0; namelen >= 4; namelen -= 4, name += 4) 1514 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^ 1515 (name[3] << 0) ^ rol32(hash, 7 * 4); 1516 1517 /* 1518 * Now do the rest of the characters. 1519 */ 1520 switch (namelen) { 1521 case 3: 1522 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^ 1523 rol32(hash, 7 * 3); 1524 case 2: 1525 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2); 1526 case 1: 1527 return (name[0] << 0) ^ rol32(hash, 7 * 1); 1528 default: /* case 0: */ 1529 return hash; 1530 } 1531} 1532 1533/* 1534 * Add a block to the btree ahead of the file. 1535 * Return the new block number to the caller. 1536 */ 1537int 1538xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) 1539{ 1540 xfs_fileoff_t bno, b; 1541 xfs_bmbt_irec_t map; 1542 xfs_bmbt_irec_t *mapp; 1543 xfs_inode_t *dp; 1544 int nmap, error, w, count, c, got, i, mapi; 1545 xfs_trans_t *tp; 1546 xfs_mount_t *mp; 1547 1548 dp = args->dp; 1549 mp = dp->i_mount; 1550 w = args->whichfork; 1551 tp = args->trans; 1552 /* 1553 * For new directories adjust the file offset and block count. 1554 */ 1555 if (w == XFS_DATA_FORK) { 1556 bno = mp->m_dirleafblk; 1557 count = mp->m_dirblkfsbs; 1558 } else { 1559 bno = 0; 1560 count = 1; 1561 } 1562 /* 1563 * Find a spot in the file space to put the new block. 1564 */ 1565 if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) 1566 return error; 1567 if (w == XFS_DATA_FORK) 1568 ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk); 1569 /* 1570 * Try mapping it in one filesystem block. 1571 */ 1572 nmap = 1; 1573 ASSERT(args->firstblock != NULL); 1574 if ((error = xfs_bmapi(tp, dp, bno, count, 1575 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA| 1576 XFS_BMAPI_CONTIG, 1577 args->firstblock, args->total, &map, &nmap, 1578 args->flist, NULL))) { 1579 return error; 1580 } 1581 ASSERT(nmap <= 1); 1582 if (nmap == 1) { 1583 mapp = ↦ 1584 mapi = 1; 1585 } 1586 /* 1587 * If we didn't get it and the block might work if fragmented, 1588 * try without the CONTIG flag. Loop until we get it all. 1589 */ 1590 else if (nmap == 0 && count > 1) { 1591 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); 1592 for (b = bno, mapi = 0; b < bno + count; ) { 1593 nmap = MIN(XFS_BMAP_MAX_NMAP, count); 1594 c = (int)(bno + count - b); 1595 if ((error = xfs_bmapi(tp, dp, b, c, 1596 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE| 1597 XFS_BMAPI_METADATA, 1598 args->firstblock, args->total, 1599 &mapp[mapi], &nmap, args->flist, 1600 NULL))) { 1601 kmem_free(mapp, sizeof(*mapp) * count); 1602 return error; 1603 } 1604 if (nmap < 1) 1605 break; 1606 mapi += nmap; 1607 b = mapp[mapi - 1].br_startoff + 1608 mapp[mapi - 1].br_blockcount; 1609 } 1610 } else { 1611 mapi = 0; 1612 mapp = NULL; 1613 } 1614 /* 1615 * Count the blocks we got, make sure it matches the total. 1616 */ 1617 for (i = 0, got = 0; i < mapi; i++) 1618 got += mapp[i].br_blockcount; 1619 if (got != count || mapp[0].br_startoff != bno || 1620 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != 1621 bno + count) { 1622 if (mapp != &map) 1623 kmem_free(mapp, sizeof(*mapp) * count); 1624 return XFS_ERROR(ENOSPC); 1625 } 1626 if (mapp != &map) 1627 kmem_free(mapp, sizeof(*mapp) * count); 1628 *new_blkno = (xfs_dablk_t)bno; 1629 return 0; 1630} 1631 1632/* 1633 * Ick. We need to always be able to remove a btree block, even 1634 * if there's no space reservation because the filesystem is full. 1635 * This is called if xfs_bunmapi on a btree block fails due to ENOSPC. 1636 * It swaps the target block with the last block in the file. The 1637 * last block in the file can always be removed since it can't cause 1638 * a bmap btree split to do that. 1639 */ 1640STATIC int 1641xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, 1642 xfs_dabuf_t **dead_bufp) 1643{ 1644 xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno; 1645 xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf; 1646 xfs_fileoff_t lastoff; 1647 xfs_inode_t *ip; 1648 xfs_trans_t *tp; 1649 xfs_mount_t *mp; 1650 int error, w, entno, level, dead_level; 1651 xfs_da_blkinfo_t *dead_info, *sib_info; 1652 xfs_da_intnode_t *par_node, *dead_node; 1653 xfs_dir2_leaf_t *dead_leaf2; 1654 xfs_dahash_t dead_hash; 1655 1656 dead_buf = *dead_bufp; 1657 dead_blkno = *dead_blknop; 1658 tp = args->trans; 1659 ip = args->dp; 1660 w = args->whichfork; 1661 ASSERT(w == XFS_DATA_FORK); 1662 mp = ip->i_mount; 1663 lastoff = mp->m_dirfreeblk; 1664 error = xfs_bmap_last_before(tp, ip, &lastoff, w); 1665 if (error) 1666 return error; 1667 if (unlikely(lastoff == 0)) { 1668 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW, 1669 mp); 1670 return XFS_ERROR(EFSCORRUPTED); 1671 } 1672 /* 1673 * Read the last block in the btree space. 1674 */ 1675 last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs; 1676 if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w))) 1677 return error; 1678 /* 1679 * Copy the last block into the dead buffer and log it. 1680 */ 1681 memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize); 1682 xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1); 1683 dead_info = dead_buf->data; 1684 /* 1685 * Get values from the moved block. 1686 */ 1687 if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) { 1688 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; 1689 dead_level = 0; 1690 dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval); 1691 } else { 1692 ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC); 1693 dead_node = (xfs_da_intnode_t *)dead_info; 1694 dead_level = be16_to_cpu(dead_node->hdr.level); 1695 dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval); 1696 } 1697 sib_buf = par_buf = NULL; 1698 /* 1699 * If the moved block has a left sibling, fix up the pointers. 1700 */ 1701 if ((sib_blkno = be32_to_cpu(dead_info->back))) { 1702 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w))) 1703 goto done; 1704 sib_info = sib_buf->data; 1705 if (unlikely( 1706 be32_to_cpu(sib_info->forw) != last_blkno || 1707 sib_info->magic != dead_info->magic)) { 1708 XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)", 1709 XFS_ERRLEVEL_LOW, mp); 1710 error = XFS_ERROR(EFSCORRUPTED); 1711 goto done; 1712 } 1713 sib_info->forw = cpu_to_be32(dead_blkno); 1714 xfs_da_log_buf(tp, sib_buf, 1715 XFS_DA_LOGRANGE(sib_info, &sib_info->forw, 1716 sizeof(sib_info->forw))); 1717 xfs_da_buf_done(sib_buf); 1718 sib_buf = NULL; 1719 } 1720 /* 1721 * If the moved block has a right sibling, fix up the pointers. 1722 */ 1723 if ((sib_blkno = be32_to_cpu(dead_info->forw))) { 1724 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w))) 1725 goto done; 1726 sib_info = sib_buf->data; 1727 if (unlikely( 1728 be32_to_cpu(sib_info->back) != last_blkno || 1729 sib_info->magic != dead_info->magic)) { 1730 XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)", 1731 XFS_ERRLEVEL_LOW, mp); 1732 error = XFS_ERROR(EFSCORRUPTED); 1733 goto done; 1734 } 1735 sib_info->back = cpu_to_be32(dead_blkno); 1736 xfs_da_log_buf(tp, sib_buf, 1737 XFS_DA_LOGRANGE(sib_info, &sib_info->back, 1738 sizeof(sib_info->back))); 1739 xfs_da_buf_done(sib_buf); 1740 sib_buf = NULL; 1741 } 1742 par_blkno = mp->m_dirleafblk; 1743 level = -1; 1744 /* 1745 * Walk down the tree looking for the parent of the moved block. 1746 */ 1747 for (;;) { 1748 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))) 1749 goto done; 1750 par_node = par_buf->data; 1751 if (unlikely( 1752 be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC || 1753 (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) { 1754 XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", 1755 XFS_ERRLEVEL_LOW, mp); 1756 error = XFS_ERROR(EFSCORRUPTED); 1757 goto done; 1758 } 1759 level = be16_to_cpu(par_node->hdr.level); 1760 for (entno = 0; 1761 entno < be16_to_cpu(par_node->hdr.count) && 1762 be32_to_cpu(par_node->btree[entno].hashval) < dead_hash; 1763 entno++) 1764 continue; 1765 if (unlikely(entno == be16_to_cpu(par_node->hdr.count))) { 1766 XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)", 1767 XFS_ERRLEVEL_LOW, mp); 1768 error = XFS_ERROR(EFSCORRUPTED); 1769 goto done; 1770 } 1771 par_blkno = be32_to_cpu(par_node->btree[entno].before); 1772 if (level == dead_level + 1) 1773 break; 1774 xfs_da_brelse(tp, par_buf); 1775 par_buf = NULL; 1776 } 1777 /* 1778 * We're in the right parent block. 1779 * Look for the right entry. 1780 */ 1781 for (;;) { 1782 for (; 1783 entno < be16_to_cpu(par_node->hdr.count) && 1784 be32_to_cpu(par_node->btree[entno].before) != last_blkno; 1785 entno++) 1786 continue; 1787 if (entno < be16_to_cpu(par_node->hdr.count)) 1788 break; 1789 par_blkno = be32_to_cpu(par_node->hdr.info.forw); 1790 xfs_da_brelse(tp, par_buf); 1791 par_buf = NULL; 1792 if (unlikely(par_blkno == 0)) { 1793 XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)", 1794 XFS_ERRLEVEL_LOW, mp); 1795 error = XFS_ERROR(EFSCORRUPTED); 1796 goto done; 1797 } 1798 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))) 1799 goto done; 1800 par_node = par_buf->data; 1801 if (unlikely( 1802 be16_to_cpu(par_node->hdr.level) != level || 1803 be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) { 1804 XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", 1805 XFS_ERRLEVEL_LOW, mp); 1806 error = XFS_ERROR(EFSCORRUPTED); 1807 goto done; 1808 } 1809 entno = 0; 1810 } 1811 /* 1812 * Update the parent entry pointing to the moved block. 1813 */ 1814 par_node->btree[entno].before = cpu_to_be32(dead_blkno); 1815 xfs_da_log_buf(tp, par_buf, 1816 XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before, 1817 sizeof(par_node->btree[entno].before))); 1818 xfs_da_buf_done(par_buf); 1819 xfs_da_buf_done(dead_buf); 1820 *dead_blknop = last_blkno; 1821 *dead_bufp = last_buf; 1822 return 0; 1823done: 1824 if (par_buf) 1825 xfs_da_brelse(tp, par_buf); 1826 if (sib_buf) 1827 xfs_da_brelse(tp, sib_buf); 1828 xfs_da_brelse(tp, last_buf); 1829 return error; 1830} 1831 1832/* 1833 * Remove a btree block from a directory or attribute. 1834 */ 1835int 1836xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, 1837 xfs_dabuf_t *dead_buf) 1838{ 1839 xfs_inode_t *dp; 1840 int done, error, w, count; 1841 xfs_trans_t *tp; 1842 xfs_mount_t *mp; 1843 1844 dp = args->dp; 1845 w = args->whichfork; 1846 tp = args->trans; 1847 mp = dp->i_mount; 1848 if (w == XFS_DATA_FORK) 1849 count = mp->m_dirblkfsbs; 1850 else 1851 count = 1; 1852 for (;;) { 1853 /* 1854 * Remove extents. If we get ENOSPC for a dir we have to move 1855 * the last block to the place we want to kill. 1856 */ 1857 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count, 1858 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA, 1859 0, args->firstblock, args->flist, NULL, 1860 &done)) == ENOSPC) { 1861 if (w != XFS_DATA_FORK) 1862 break; 1863 if ((error = xfs_da_swap_lastblock(args, &dead_blkno, 1864 &dead_buf))) 1865 break; 1866 } else { 1867 break; 1868 } 1869 } 1870 xfs_da_binval(tp, dead_buf); 1871 return error; 1872} 1873 1874/* 1875 * See if the mapping(s) for this btree block are valid, i.e. 1876 * don't contain holes, are logically contiguous, and cover the whole range. 1877 */ 1878STATIC int 1879xfs_da_map_covers_blocks( 1880 int nmap, 1881 xfs_bmbt_irec_t *mapp, 1882 xfs_dablk_t bno, 1883 int count) 1884{ 1885 int i; 1886 xfs_fileoff_t off; 1887 1888 for (i = 0, off = bno; i < nmap; i++) { 1889 if (mapp[i].br_startblock == HOLESTARTBLOCK || 1890 mapp[i].br_startblock == DELAYSTARTBLOCK) { 1891 return 0; 1892 } 1893 if (off != mapp[i].br_startoff) { 1894 return 0; 1895 } 1896 off += mapp[i].br_blockcount; 1897 } 1898 return off == bno + count; 1899} 1900 1901/* 1902 * Make a dabuf. 1903 * Used for get_buf, read_buf, read_bufr, and reada_buf. 1904 */ 1905STATIC int 1906xfs_da_do_buf( 1907 xfs_trans_t *trans, 1908 xfs_inode_t *dp, 1909 xfs_dablk_t bno, 1910 xfs_daddr_t *mappedbnop, 1911 xfs_dabuf_t **bpp, 1912 int whichfork, 1913 int caller, 1914 inst_t *ra) 1915{ 1916 xfs_buf_t *bp = NULL; 1917 xfs_buf_t **bplist; 1918 int error=0; 1919 int i; 1920 xfs_bmbt_irec_t map; 1921 xfs_bmbt_irec_t *mapp; 1922 xfs_daddr_t mappedbno; 1923 xfs_mount_t *mp; 1924 int nbplist=0; 1925 int nfsb; 1926 int nmap; 1927 xfs_dabuf_t *rbp; 1928 1929 mp = dp->i_mount; 1930 nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1; 1931 mappedbno = *mappedbnop; 1932 /* 1933 * Caller doesn't have a mapping. -2 means don't complain 1934 * if we land in a hole. 1935 */ 1936 if (mappedbno == -1 || mappedbno == -2) { 1937 /* 1938 * Optimize the one-block case. 1939 */ 1940 if (nfsb == 1) { 1941 xfs_fsblock_t fsb; 1942 1943 if ((error = 1944 xfs_bmapi_single(trans, dp, whichfork, &fsb, 1945 (xfs_fileoff_t)bno))) { 1946 return error; 1947 } 1948 mapp = ↦ 1949 if (fsb == NULLFSBLOCK) { 1950 nmap = 0; 1951 } else { 1952 map.br_startblock = fsb; 1953 map.br_startoff = (xfs_fileoff_t)bno; 1954 map.br_blockcount = 1; 1955 nmap = 1; 1956 } 1957 } else { 1958 mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP); 1959 nmap = nfsb; 1960 if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno, 1961 nfsb, 1962 XFS_BMAPI_METADATA | 1963 XFS_BMAPI_AFLAG(whichfork), 1964 NULL, 0, mapp, &nmap, NULL, NULL))) 1965 goto exit0; 1966 } 1967 } else { 1968 map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); 1969 map.br_startoff = (xfs_fileoff_t)bno; 1970 map.br_blockcount = nfsb; 1971 mapp = ↦ 1972 nmap = 1; 1973 } 1974 if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) { 1975 error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED); 1976 if (unlikely(error == EFSCORRUPTED)) { 1977 if (xfs_error_level >= XFS_ERRLEVEL_LOW) { 1978 int i; 1979 cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n", 1980 (long long)bno); 1981 cmn_err(CE_ALERT, "dir: inode %lld\n", 1982 (long long)dp->i_ino); 1983 for (i = 0; i < nmap; i++) { 1984 cmn_err(CE_ALERT, 1985 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n", 1986 i, 1987 (long long)mapp[i].br_startoff, 1988 (long long)mapp[i].br_startblock, 1989 (long long)mapp[i].br_blockcount, 1990 mapp[i].br_state); 1991 } 1992 } 1993 XFS_ERROR_REPORT("xfs_da_do_buf(1)", 1994 XFS_ERRLEVEL_LOW, mp); 1995 } 1996 goto exit0; 1997 } 1998 if (caller != 3 && nmap > 1) { 1999 bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP); 2000 nbplist = 0; 2001 } else 2002 bplist = NULL; 2003 /* 2004 * Turn the mapping(s) into buffer(s). 2005 */ 2006 for (i = 0; i < nmap; i++) { 2007 int nmapped; 2008 2009 mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock); 2010 if (i == 0) 2011 *mappedbnop = mappedbno; 2012 nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount); 2013 switch (caller) { 2014 case 0: 2015 bp = xfs_trans_get_buf(trans, mp->m_ddev_targp, 2016 mappedbno, nmapped, 0); 2017 error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO); 2018 break; 2019 case 1: 2020 case 2: 2021 bp = NULL; 2022 error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp, 2023 mappedbno, nmapped, 0, &bp); 2024 break; 2025 case 3: 2026 xfs_baread(mp->m_ddev_targp, mappedbno, nmapped); 2027 error = 0; 2028 bp = NULL; 2029 break; 2030 } 2031 if (error) { 2032 if (bp) 2033 xfs_trans_brelse(trans, bp); 2034 goto exit1; 2035 } 2036 if (!bp) 2037 continue; 2038 if (caller == 1) { 2039 if (whichfork == XFS_ATTR_FORK) { 2040 XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE, 2041 XFS_ATTR_BTREE_REF); 2042 } else { 2043 XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE, 2044 XFS_DIR_BTREE_REF); 2045 } 2046 } 2047 if (bplist) { 2048 bplist[nbplist++] = bp; 2049 } 2050 } 2051 /* 2052 * Build a dabuf structure. 2053 */ 2054 if (bplist) { 2055 rbp = xfs_da_buf_make(nbplist, bplist, ra); 2056 } else if (bp) 2057 rbp = xfs_da_buf_make(1, &bp, ra); 2058 else 2059 rbp = NULL; 2060 /* 2061 * For read_buf, check the magic number. 2062 */ 2063 if (caller == 1) { 2064 xfs_dir2_data_t *data; 2065 xfs_dir2_free_t *free; 2066 xfs_da_blkinfo_t *info; 2067 uint magic, magic1; 2068 2069 info = rbp->data; 2070 data = rbp->data; 2071 free = rbp->data; 2072 magic = be16_to_cpu(info->magic); 2073 magic1 = be32_to_cpu(data->hdr.magic); 2074 if (unlikely( 2075 XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) && 2076 (magic != XFS_ATTR_LEAF_MAGIC) && 2077 (magic != XFS_DIR2_LEAF1_MAGIC) && 2078 (magic != XFS_DIR2_LEAFN_MAGIC) && 2079 (magic1 != XFS_DIR2_BLOCK_MAGIC) && 2080 (magic1 != XFS_DIR2_DATA_MAGIC) && 2081 (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC), 2082 mp, XFS_ERRTAG_DA_READ_BUF, 2083 XFS_RANDOM_DA_READ_BUF))) { 2084 xfs_buftrace("DA READ ERROR", rbp->bps[0]); 2085 XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)", 2086 XFS_ERRLEVEL_LOW, mp, info); 2087 error = XFS_ERROR(EFSCORRUPTED); 2088 xfs_da_brelse(trans, rbp); 2089 nbplist = 0; 2090 goto exit1; 2091 } 2092 } 2093 if (bplist) { 2094 kmem_free(bplist, sizeof(*bplist) * nmap); 2095 } 2096 if (mapp != &map) { 2097 kmem_free(mapp, sizeof(*mapp) * nfsb); 2098 } 2099 if (bpp) 2100 *bpp = rbp; 2101 return 0; 2102exit1: 2103 if (bplist) { 2104 for (i = 0; i < nbplist; i++) 2105 xfs_trans_brelse(trans, bplist[i]); 2106 kmem_free(bplist, sizeof(*bplist) * nmap); 2107 } 2108exit0: 2109 if (mapp != &map) 2110 kmem_free(mapp, sizeof(*mapp) * nfsb); 2111 if (bpp) 2112 *bpp = NULL; 2113 return error; 2114} 2115 2116/* 2117 * Get a buffer for the dir/attr block. 2118 */ 2119int 2120xfs_da_get_buf( 2121 xfs_trans_t *trans, 2122 xfs_inode_t *dp, 2123 xfs_dablk_t bno, 2124 xfs_daddr_t mappedbno, 2125 xfs_dabuf_t **bpp, 2126 int whichfork) 2127{ 2128 return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0, 2129 (inst_t *)__return_address); 2130} 2131 2132/* 2133 * Get a buffer for the dir/attr block, fill in the contents. 2134 */ 2135int 2136xfs_da_read_buf( 2137 xfs_trans_t *trans, 2138 xfs_inode_t *dp, 2139 xfs_dablk_t bno, 2140 xfs_daddr_t mappedbno, 2141 xfs_dabuf_t **bpp, 2142 int whichfork) 2143{ 2144 return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1, 2145 (inst_t *)__return_address); 2146} 2147 2148/* 2149 * Readahead the dir/attr block. 2150 */ 2151xfs_daddr_t 2152xfs_da_reada_buf( 2153 xfs_trans_t *trans, 2154 xfs_inode_t *dp, 2155 xfs_dablk_t bno, 2156 int whichfork) 2157{ 2158 xfs_daddr_t rval; 2159 2160 rval = -1; 2161 if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3, 2162 (inst_t *)__return_address)) 2163 return -1; 2164 else 2165 return rval; 2166} 2167 2168kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ 2169kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */ 2170 2171/* 2172 * Allocate a dir-state structure. 2173 * We don't put them on the stack since they're large. 2174 */ 2175xfs_da_state_t * 2176xfs_da_state_alloc(void) 2177{ 2178 return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP); 2179} 2180 2181/* 2182 * Kill the altpath contents of a da-state structure. 2183 */ 2184STATIC void 2185xfs_da_state_kill_altpath(xfs_da_state_t *state) 2186{ 2187 int i; 2188 2189 for (i = 0; i < state->altpath.active; i++) { 2190 if (state->altpath.blk[i].bp) { 2191 if (state->altpath.blk[i].bp != state->path.blk[i].bp) 2192 xfs_da_buf_done(state->altpath.blk[i].bp); 2193 state->altpath.blk[i].bp = NULL; 2194 } 2195 } 2196 state->altpath.active = 0; 2197} 2198 2199/* 2200 * Free a da-state structure. 2201 */ 2202void 2203xfs_da_state_free(xfs_da_state_t *state) 2204{ 2205 int i; 2206 2207 xfs_da_state_kill_altpath(state); 2208 for (i = 0; i < state->path.active; i++) { 2209 if (state->path.blk[i].bp) 2210 xfs_da_buf_done(state->path.blk[i].bp); 2211 } 2212 if (state->extravalid && state->extrablk.bp) 2213 xfs_da_buf_done(state->extrablk.bp); 2214#ifdef DEBUG 2215 memset((char *)state, 0, sizeof(*state)); 2216#endif /* DEBUG */ 2217 kmem_zone_free(xfs_da_state_zone, state); 2218} 2219 2220#ifdef XFS_DABUF_DEBUG 2221xfs_dabuf_t *xfs_dabuf_global_list; 2222lock_t xfs_dabuf_global_lock; 2223#endif 2224 2225/* 2226 * Create a dabuf. 2227 */ 2228/* ARGSUSED */ 2229STATIC xfs_dabuf_t * 2230xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) 2231{ 2232 xfs_buf_t *bp; 2233 xfs_dabuf_t *dabuf; 2234 int i; 2235 int off; 2236 2237 if (nbuf == 1) 2238 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP); 2239 else 2240 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP); 2241 dabuf->dirty = 0; 2242#ifdef XFS_DABUF_DEBUG 2243 dabuf->ra = ra; 2244 dabuf->target = XFS_BUF_TARGET(bps[0]); 2245 dabuf->blkno = XFS_BUF_ADDR(bps[0]); 2246#endif 2247 if (nbuf == 1) { 2248 dabuf->nbuf = 1; 2249 bp = bps[0]; 2250 dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp)); 2251 dabuf->data = XFS_BUF_PTR(bp); 2252 dabuf->bps[0] = bp; 2253 } else { 2254 dabuf->nbuf = nbuf; 2255 for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) { 2256 dabuf->bps[i] = bp = bps[i]; 2257 dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp)); 2258 } 2259 dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP); 2260 for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) { 2261 bp = bps[i]; 2262 memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp), 2263 XFS_BUF_COUNT(bp)); 2264 } 2265 } 2266#ifdef XFS_DABUF_DEBUG 2267 { 2268 SPLDECL(s); 2269 xfs_dabuf_t *p; 2270 2271 s = mutex_spinlock(&xfs_dabuf_global_lock); 2272 for (p = xfs_dabuf_global_list; p; p = p->next) { 2273 ASSERT(p->blkno != dabuf->blkno || 2274 p->target != dabuf->target); 2275 } 2276 dabuf->prev = NULL; 2277 if (xfs_dabuf_global_list) 2278 xfs_dabuf_global_list->prev = dabuf; 2279 dabuf->next = xfs_dabuf_global_list; 2280 xfs_dabuf_global_list = dabuf; 2281 mutex_spinunlock(&xfs_dabuf_global_lock, s); 2282 } 2283#endif 2284 return dabuf; 2285} 2286 2287/* 2288 * Un-dirty a dabuf. 2289 */ 2290STATIC void 2291xfs_da_buf_clean(xfs_dabuf_t *dabuf) 2292{ 2293 xfs_buf_t *bp; 2294 int i; 2295 int off; 2296 2297 if (dabuf->dirty) { 2298 ASSERT(dabuf->nbuf > 1); 2299 dabuf->dirty = 0; 2300 for (i = off = 0; i < dabuf->nbuf; 2301 i++, off += XFS_BUF_COUNT(bp)) { 2302 bp = dabuf->bps[i]; 2303 memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off, 2304 XFS_BUF_COUNT(bp)); 2305 } 2306 } 2307} 2308 2309/* 2310 * Release a dabuf. 2311 */ 2312void 2313xfs_da_buf_done(xfs_dabuf_t *dabuf) 2314{ 2315 ASSERT(dabuf); 2316 ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); 2317 if (dabuf->dirty) 2318 xfs_da_buf_clean(dabuf); 2319 if (dabuf->nbuf > 1) 2320 kmem_free(dabuf->data, BBTOB(dabuf->bbcount)); 2321#ifdef XFS_DABUF_DEBUG 2322 { 2323 SPLDECL(s); 2324 2325 s = mutex_spinlock(&xfs_dabuf_global_lock); 2326 if (dabuf->prev) 2327 dabuf->prev->next = dabuf->next; 2328 else 2329 xfs_dabuf_global_list = dabuf->next; 2330 if (dabuf->next) 2331 dabuf->next->prev = dabuf->prev; 2332 mutex_spinunlock(&xfs_dabuf_global_lock, s); 2333 } 2334 memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf)); 2335#endif 2336 if (dabuf->nbuf == 1) 2337 kmem_zone_free(xfs_dabuf_zone, dabuf); 2338 else 2339 kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf)); 2340} 2341 2342/* 2343 * Log transaction from a dabuf. 2344 */ 2345void 2346xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last) 2347{ 2348 xfs_buf_t *bp; 2349 uint f; 2350 int i; 2351 uint l; 2352 int off; 2353 2354 ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); 2355 if (dabuf->nbuf == 1) { 2356 ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0])); 2357 xfs_trans_log_buf(tp, dabuf->bps[0], first, last); 2358 return; 2359 } 2360 dabuf->dirty = 1; 2361 ASSERT(first <= last); 2362 for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) { 2363 bp = dabuf->bps[i]; 2364 f = off; 2365 l = f + XFS_BUF_COUNT(bp) - 1; 2366 if (f < first) 2367 f = first; 2368 if (l > last) 2369 l = last; 2370 if (f <= l) 2371 xfs_trans_log_buf(tp, bp, f - off, l - off); 2372 /* 2373 * B_DONE is set by xfs_trans_log buf. 2374 * If we don't set it on a new buffer (get not read) 2375 * then if we don't put anything in the buffer it won't 2376 * be set, and at commit it it released into the cache, 2377 * and then a read will fail. 2378 */ 2379 else if (!(XFS_BUF_ISDONE(bp))) 2380 XFS_BUF_DONE(bp); 2381 } 2382 ASSERT(last < off); 2383} 2384 2385/* 2386 * Release dabuf from a transaction. 2387 * Have to free up the dabuf before the buffers are released, 2388 * since the synchronization on the dabuf is really the lock on the buffer. 2389 */ 2390void 2391xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf) 2392{ 2393 xfs_buf_t *bp; 2394 xfs_buf_t **bplist; 2395 int i; 2396 int nbuf; 2397 2398 ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); 2399 if ((nbuf = dabuf->nbuf) == 1) { 2400 bplist = &bp; 2401 bp = dabuf->bps[0]; 2402 } else { 2403 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP); 2404 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist)); 2405 } 2406 xfs_da_buf_done(dabuf); 2407 for (i = 0; i < nbuf; i++) 2408 xfs_trans_brelse(tp, bplist[i]); 2409 if (bplist != &bp) 2410 kmem_free(bplist, nbuf * sizeof(*bplist)); 2411} 2412 2413/* 2414 * Invalidate dabuf from a transaction. 2415 */ 2416void 2417xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf) 2418{ 2419 xfs_buf_t *bp; 2420 xfs_buf_t **bplist; 2421 int i; 2422 int nbuf; 2423 2424 ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); 2425 if ((nbuf = dabuf->nbuf) == 1) { 2426 bplist = &bp; 2427 bp = dabuf->bps[0]; 2428 } else { 2429 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP); 2430 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist)); 2431 } 2432 xfs_da_buf_done(dabuf); 2433 for (i = 0; i < nbuf; i++) 2434 xfs_trans_binval(tp, bplist[i]); 2435 if (bplist != &bp) 2436 kmem_free(bplist, nbuf * sizeof(*bplist)); 2437} 2438 2439/* 2440 * Get the first daddr from a dabuf. 2441 */ 2442xfs_daddr_t 2443xfs_da_blkno(xfs_dabuf_t *dabuf) 2444{ 2445 ASSERT(dabuf->nbuf); 2446 ASSERT(dabuf->data); 2447 return XFS_BUF_ADDR(dabuf->bps[0]); 2448} 2449