1/* 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/buf.h> 32#include <sys/buf_internal.h> 33#include <sys/kernel.h> 34#include <sys/malloc.h> 35#include <sys/mount.h> 36#include <sys/vnode.h> 37 38 39#include "hfs.h" 40#include "hfs_cnode.h" 41#include "hfs_dbg.h" 42#include "hfs_endian.h" 43#include "hfs_btreeio.h" 44 45#include "hfscommon/headers/FileMgrInternal.h" 46#include "hfscommon/headers/BTreesPrivate.h" 47 48#define FORCESYNCBTREEWRITES 0 49 50/* From bsd/vfs/vfs_bio.c */ 51extern int bdwrite_internal(struct buf *, int); 52 53static int ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount); 54static int btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp); 55 56void btree_swap_node(struct buf *bp, __unused void *arg); 57 58/* 59 * Return btree node size for given vnode. 60 * 61 * Returns: 62 * For btree vnode, returns btree node size. 63 * For non-btree vnodes, returns 0. 64 */ 65u_int16_t get_btree_nodesize(struct vnode *vp) 66{ 67 BTreeControlBlockPtr btree; 68 u_int16_t node_size = 0; 69 70 if (vnode_issystem(vp)) { 71 btree = (BTreeControlBlockPtr) VTOF(vp)->fcbBTCBPtr; 72 if (btree) { 73 node_size = btree->nodeSize; 74 } 75 } 76 77 return node_size; 78} 79 80OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, __unused ItemCount minBlockCount) 81{ 82 BTreeControlBlockPtr bTreePtr; 83 84 DBG_ASSERT(vp != NULL); 85 DBG_ASSERT(blockSize >= kMinNodeSize); 86 if (blockSize > MAXBSIZE ) 87 return (fsBTBadNodeSize); 88 89 bTreePtr = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr; 90 bTreePtr->nodeSize = blockSize; 91 92 return (E_NONE); 93} 94 95 96OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions options, BlockDescriptor *block) 97{ 98 OSStatus retval = E_NONE; 99 struct buf *bp = NULL; 100 u_int8_t allow_empty_node; 101 102 /* If the btree block is being read using hint, it is 103 * fine for the swap code to find zeroed out nodes. 104 */ 105 if (options & kGetBlockHint) { 106 allow_empty_node = true; 107 } else { 108 allow_empty_node = false; 109 } 110 111 if (options & kGetEmptyBlock) { 112 daddr64_t blkno; 113 off_t offset; 114 115 offset = (daddr64_t)blockNum * (daddr64_t)block->blockSize; 116 bp = buf_getblk(vp, (daddr64_t)blockNum, block->blockSize, 0, 0, BLK_META); 117 if (bp && 118 VNOP_BLOCKMAP(vp, offset, block->blockSize, &blkno, NULL, NULL, 0, NULL) == 0) { 119 buf_setblkno(bp, blkno); 120 } 121 } else { 122 retval = buf_meta_bread(vp, (daddr64_t)blockNum, block->blockSize, NOCRED, &bp); 123 } 124 if (bp == NULL) 125 retval = -1; //XXX need better error 126 127 if (retval == E_NONE) { 128 block->blockHeader = bp; 129 block->buffer = (char *)buf_dataptr(bp); 130 block->blockNum = buf_lblkno(bp); 131 block->blockReadFromDisk = (buf_fromcache(bp) == 0); /* not found in cache ==> came from disk */ 132 133 // XXXdbg 134 block->isModified = 0; 135 136 /* Check and endian swap B-Tree node (only if it's a valid block) */ 137 if (!(options & kGetEmptyBlock)) { 138 139 /* This happens when we first open the b-tree, we might not have all the node data on hand */ 140 if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) && 141 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) && 142 (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) { 143 144 /* 145 * Don't swap the node descriptor, record offsets, or other records. 146 * This record will be invalidated and re-read with the correct node 147 * size once the B-tree control block is set up with the node size 148 * from the header record. 149 */ 150 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node); 151 152 } else { 153 /* 154 * In this case, we have enough data in-hand to do basic validation 155 * on the B-Tree node. 156 */ 157 if (block->blockReadFromDisk) { 158 /* 159 * The node was just read from disk, so always swap/check it. 160 * This is necessary on big endian since the test below won't trigger. 161 */ 162 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); 163 } 164 else { 165 /* 166 * Block wasn't read from disk; it was found in the cache. 167 */ 168 if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) { 169 /* 170 * The node was left in the cache in non-native order, so swap it. 171 * This only happens on little endian, after the node is written 172 * back to disk. 173 */ 174 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); 175 } 176 else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x000e) { 177 /* 178 * The node was in-cache in native-endianness. We don't need to do 179 * anything here, because the node is ready to use. Set retval == 0. 180 */ 181 retval = 0; 182 } 183 /* 184 * If the node doesn't have hex 14 (0xe) in the last two bytes of the buffer, 185 * it doesn't necessarily mean that this is a bad node. Zeroed nodes that are 186 * marked as unused in the b-tree map node would be OK and not have valid content. 187 */ 188 } 189 } 190 191 /* 192 * If we got an error, then the node is only partially swapped. 193 * We mark the buffer invalid so that the next attempt to get the 194 * node will read it and attempt to swap again, and will notice 195 * the error again. If we didn't do this, the next attempt to get 196 * the node might use the partially swapped node as-is. 197 */ 198 if (retval) 199 buf_markinvalid(bp); 200 } 201 } 202 203 if (retval) { 204 if (bp) 205 buf_brelse(bp); 206 block->blockHeader = NULL; 207 block->buffer = NULL; 208 } 209 210 return (retval); 211} 212 213 214void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr) 215{ 216 struct hfsmount *hfsmp = VTOHFS(vp); 217 struct buf *bp = NULL; 218 219 if (hfsmp->jnl == NULL) { 220 return; 221 } 222 223 bp = (struct buf *) blockPtr->blockHeader; 224 if (bp == NULL) { 225 panic("hfs: ModifyBlockStart: null bp for blockdescptr %p?!?\n", blockPtr); 226 return; 227 } 228 229 journal_modify_block_start(hfsmp->jnl, bp); 230 blockPtr->isModified = 1; 231} 232 233void 234btree_swap_node(struct buf *bp, __unused void *arg) 235{ 236 // struct hfsmount *hfsmp = (struct hfsmount *)arg; 237 int retval; 238 struct vnode *vp = buf_vnode(bp); 239 BlockDescriptor block; 240 241 /* Prepare the block pointer */ 242 block.blockHeader = bp; 243 block.buffer = (char *)buf_dataptr(bp); 244 block.blockNum = buf_lblkno(bp); 245 /* not found in cache ==> came from disk */ 246 block.blockReadFromDisk = (buf_fromcache(bp) == 0); 247 block.blockSize = buf_count(bp); 248 249 /* Swap the data now that this node is ready to go to disk. 250 * We allow swapping of zeroed out nodes here because we might 251 * be writing node whose last record just got deleted. 252 */ 253 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, true); 254 if (retval) 255 panic("hfs: btree_swap_node: about to write corrupt node!\n"); 256} 257 258 259static int 260btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp) 261{ 262 return journal_modify_block_end(hfsmp->jnl, bp, btree_swap_node, hfsmp); 263} 264 265 266OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options) 267{ 268 struct hfsmount *hfsmp = VTOHFS(vp); 269 OSStatus retval = E_NONE; 270 struct buf *bp = NULL; 271 272 bp = (struct buf *) blockPtr->blockHeader; 273 274 if (bp == NULL) { 275 retval = -1; 276 goto exit; 277 } 278 279 if (options & kTrashBlock) { 280 buf_markinvalid(bp); 281 282 if (hfsmp->jnl && (buf_flags(bp) & B_LOCKED)) { 283 journal_kill_block(hfsmp->jnl, bp); 284 } else { 285 buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */ 286 } 287 288 /* Don't let anyone else try to use this bp, it's been consumed */ 289 blockPtr->blockHeader = NULL; 290 291 } else { 292 if (options & kForceWriteBlock) { 293 if (hfsmp->jnl) { 294 if (blockPtr->isModified == 0) { 295 panic("hfs: releaseblock: modified is 0 but forcewrite set! bp %p\n", bp); 296 } 297 298 retval = btree_journal_modify_block_end(hfsmp, bp); 299 blockPtr->isModified = 0; 300 } else { 301 retval = VNOP_BWRITE(bp); 302 } 303 304 /* Don't let anyone else try to use this bp, it's been consumed */ 305 blockPtr->blockHeader = NULL; 306 307 } else if (options & kMarkBlockDirty) { 308 struct timeval tv; 309 microuptime(&tv); 310 if ((options & kLockTransaction) && hfsmp->jnl == NULL) { 311 /* 312 * 313 * Set the B_LOCKED flag and unlock the buffer, causing buf_brelse to move 314 * the buffer onto the LOCKED free list. This is necessary, otherwise 315 * getnewbuf() would try to reclaim the buffers using buf_bawrite, which 316 * isn't going to work. 317 * 318 */ 319 /* Don't hog all the buffers... */ 320 if (count_lock_queue() > kMaxLockedMetaBuffers) { 321 hfs_btsync(vp, HFS_SYNCTRANS); 322 /* Rollback sync time to cause a sync on lock release... */ 323 (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1)); 324 } 325 buf_setflags(bp, B_LOCKED); 326 } 327 328 /* 329 * Delay-write this block. 330 * If the maximum delayed buffers has been exceeded then 331 * free up some buffers and fall back to an asynchronous write. 332 */ 333 if (hfsmp->jnl) { 334 if (blockPtr->isModified == 0) { 335 panic("hfs: releaseblock: modified is 0 but markdirty set! bp %p\n", bp); 336 } 337 retval = btree_journal_modify_block_end(hfsmp, bp); 338 blockPtr->isModified = 0; 339 } else if (bdwrite_internal(bp, 1) != 0) { 340 hfs_btsync(vp, 0); 341 /* Rollback sync time to cause a sync on lock release... */ 342 (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1)); 343 344 buf_clearflags(bp, B_LOCKED); 345 buf_bawrite(bp); 346 } 347 348 /* Don't let anyone else try to use this bp, it's been consumed */ 349 blockPtr->blockHeader = NULL; 350 351 } else { 352 // check if we had previously called journal_modify_block_start() 353 // on this block and if so, abort it (which will call buf_brelse()). 354 if (hfsmp->jnl && blockPtr->isModified) { 355 // XXXdbg - I don't want to call modify_block_abort() 356 // because I think it may be screwing up the 357 // journal and blowing away a block that has 358 // valid data in it. 359 // 360 // journal_modify_block_abort(hfsmp->jnl, bp); 361 //panic("hfs: releaseblock called for 0x%x but mod_block_start previously called.\n", bp); 362 btree_journal_modify_block_end(hfsmp, bp); 363 blockPtr->isModified = 0; 364 } else { 365 buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */ 366 } 367 368 /* Don't let anyone else try to use this bp, it's been consumed */ 369 blockPtr->blockHeader = NULL; 370 } 371 } 372 373exit: 374 return (retval); 375} 376 377 378OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) 379{ 380#pragma unused (maxEOF) 381 382 OSStatus retval = 0, ret = 0; 383 int64_t actualBytesAdded, origSize; 384 u_int64_t bytesToAdd; 385 u_int32_t startAllocation; 386 u_int32_t fileblocks; 387 BTreeInfoRec btInfo; 388 ExtendedVCB *vcb; 389 FCB *filePtr; 390 struct proc *p = NULL; 391 int64_t trim = 0; 392 int lockflags = 0; 393 394 filePtr = GetFileControlBlock(vp); 395 396 if ( (off_t)minEOF > filePtr->fcbEOF ) 397 { 398 bytesToAdd = minEOF - filePtr->fcbEOF; 399 400 if (bytesToAdd < filePtr->ff_clumpsize) 401 bytesToAdd = filePtr->ff_clumpsize; //XXX why not always be a mutiple of clump size? 402 } 403 else 404 { 405 return -1; 406 } 407 408 vcb = VTOVCB(vp); 409 410 /* 411 * The Extents B-tree can't have overflow extents. ExtendFileC will 412 * return an error if an attempt is made to extend the Extents B-tree 413 * when the resident extents are exhausted. 414 */ 415 416 /* Protect allocation bitmap and extents overflow file. */ 417 lockflags = SFL_BITMAP; 418 if (VTOC(vp)->c_fileid != kHFSExtentsFileID) 419 lockflags |= SFL_EXTENTS; 420 lockflags = hfs_systemfile_lock(vcb, lockflags, HFS_EXCLUSIVE_LOCK); 421 422 (void) BTGetInformation(filePtr, 0, &btInfo); 423 424#if 0 // XXXdbg 425 /* 426 * The b-tree code expects nodes to be contiguous. So when 427 * the allocation block size is less than the b-tree node 428 * size, we need to force disk allocations to be contiguous. 429 */ 430 if (vcb->blockSize >= btInfo.nodeSize) { 431 extendFlags = 0; 432 } else { 433 /* Ensure that all b-tree nodes are contiguous on disk */ 434 extendFlags = kEFContigMask; 435 } 436#endif 437 438 origSize = filePtr->fcbEOF; 439 fileblocks = filePtr->ff_blocks; 440 startAllocation = vcb->nextAllocation; 441 442 // loop trying to get a contiguous chunk that's an integer multiple 443 // of the btree node size. if we can't get a contiguous chunk that 444 // is at least the node size then we break out of the loop and let 445 // the error propagate back up. 446 while((off_t)bytesToAdd >= btInfo.nodeSize) { 447 do { 448 retval = ExtendFileC(vcb, filePtr, bytesToAdd, 0, 449 kEFContigMask | kEFMetadataMask | kEFNoClumpMask, 450 (int64_t *)&actualBytesAdded); 451 if (retval == dskFulErr && actualBytesAdded == 0) { 452 bytesToAdd >>= 1; 453 if (bytesToAdd < btInfo.nodeSize) { 454 break; 455 } else if ((bytesToAdd % btInfo.nodeSize) != 0) { 456 // make sure it's an integer multiple of the nodeSize 457 bytesToAdd -= (bytesToAdd % btInfo.nodeSize); 458 } 459 } 460 } while (retval == dskFulErr && actualBytesAdded == 0); 461 462 if (retval == dskFulErr && actualBytesAdded == 0 && bytesToAdd <= btInfo.nodeSize) { 463 break; 464 } 465 466 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize; 467 bytesToAdd = minEOF - filePtr->fcbEOF; 468 } 469 470 /* 471 * If a new extent was added then move the roving allocator 472 * reference forward by the current b-tree file size so 473 * there's plenty of room to grow. 474 */ 475 if ((retval == 0) && 476 ((VCBTOHFS(vcb)->hfs_flags & HFS_METADATA_ZONE) == 0) && 477 (vcb->nextAllocation > startAllocation) && 478 ((vcb->nextAllocation + fileblocks) < vcb->allocLimit)) { 479 HFS_UPDATE_NEXT_ALLOCATION(vcb, vcb->nextAllocation + fileblocks); 480 } 481 482 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize; 483 484 // XXXdbg ExtendFileC() could have returned an error even though 485 // it grew the file to be big enough for our needs. If this is 486 // the case, we don't care about retval so we blow it away. 487 // 488 if (filePtr->fcbEOF >= (off_t)minEOF && retval != 0) { 489 retval = 0; 490 } 491 492 // XXXdbg if the file grew but isn't large enough or isn't an 493 // even multiple of the nodeSize then trim things back. if 494 // the file isn't large enough we trim back to the original 495 // size. otherwise we trim back to be an even multiple of the 496 // btree node size. 497 // 498 if ((filePtr->fcbEOF < (off_t)minEOF) || ((filePtr->fcbEOF - origSize) % btInfo.nodeSize) != 0) { 499 500 if (filePtr->fcbEOF < (off_t)minEOF) { 501 retval = dskFulErr; 502 503 if (filePtr->fcbEOF < origSize) { 504 panic("hfs: btree file eof %lld less than orig size %lld!\n", 505 filePtr->fcbEOF, origSize); 506 } 507 508 trim = filePtr->fcbEOF - origSize; 509 } else { 510 trim = ((filePtr->fcbEOF - origSize) % btInfo.nodeSize); 511 } 512 513 ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0, 0, FTOC(filePtr)->c_fileid, 0); 514 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize; 515 516 // XXXdbg - panic if the file didn't get trimmed back properly 517 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) { 518 panic("hfs: truncate file didn't! fcbEOF %lld nsize %d fcb %p\n", 519 filePtr->fcbEOF, btInfo.nodeSize, filePtr); 520 } 521 522 if (ret) { 523 // XXXdbg - this probably doesn't need to be a panic() 524 panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %ld)\n", 525 filePtr->fcbEOF, trim, (long)ret); 526 goto out; 527 } 528 } 529 530 if(VTOC(vp)->c_fileid != kHFSExtentsFileID) { 531 /* 532 * Get any extents overflow b-tree changes to disk ASAP! 533 */ 534 (void) BTFlushPath(VTOF(vcb->extentsRefNum)); 535 (void) hfs_fsync(vcb->extentsRefNum, MNT_WAIT, 0, p); 536 } 537 hfs_systemfile_unlock(vcb, lockflags); 538 lockflags = 0; 539 540 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) { 541 panic("hfs: extendbtree: fcb %p has eof 0x%llx not a multiple of 0x%x (trim %llx)\n", 542 filePtr, filePtr->fcbEOF, btInfo.nodeSize, trim); 543 } 544 545 /* 546 * Update the Alternate MDB or Alternate VolumeHeader 547 */ 548 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) || 549 (VTOC(vp)->c_fileid == kHFSCatalogFileID) || 550 (VTOC(vp)->c_fileid == kHFSAttributesFileID) 551 ) { 552 VTOC(vp)->c_flag |= C_MODIFIED; 553 MarkVCBDirty( vcb ); 554 ret = hfs_flushvolumeheader(VCBTOHFS(vcb), MNT_WAIT, HFS_ALTFLUSH); 555 } else { 556 VTOC(vp)->c_touch_chgtime = TRUE; 557 VTOC(vp)->c_touch_modtime = TRUE; 558 (void) hfs_update(vp, TRUE); 559 } 560 561 ret = ClearBTNodes(vp, btInfo.nodeSize, origSize, (filePtr->fcbEOF - origSize)); 562out: 563 if (retval == 0) 564 retval = ret; 565 566 if (lockflags) 567 hfs_systemfile_unlock(vcb, lockflags); 568 569 return retval; 570} 571 572 573/* 574 * Clear out (zero) new b-tree nodes on disk. 575 */ 576static int 577ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount) 578{ 579 struct hfsmount *hfsmp = VTOHFS(vp); 580 struct buf *bp = NULL; 581 daddr64_t blk; 582 daddr64_t blkcnt; 583 584 blk = offset / blksize; 585 blkcnt = amount / blksize; 586 587 while (blkcnt > 0) { 588 bp = buf_getblk(vp, blk, blksize, 0, 0, BLK_META); 589 if (bp == NULL) 590 continue; 591 592 // XXXdbg 593 if (hfsmp->jnl) { 594 // XXXdbg -- skipping this for now since it makes a transaction 595 // become *way* too large 596 //journal_modify_block_start(hfsmp->jnl, bp); 597 } 598 bzero((char *)buf_dataptr(bp), blksize); 599 600 buf_markaged(bp); 601 602 // XXXdbg 603 if (hfsmp->jnl) { 604 // XXXdbg -- skipping this for now since it makes a transaction 605 // become *way* too large 606 //journal_modify_block_end(hfsmp->jnl, bp); 607 608 // XXXdbg - remove this once we decide what to do with the 609 // writes to the journal 610 if ((blk % 32) == 0) 611 VNOP_BWRITE(bp); 612 else 613 buf_bawrite(bp); 614 } else { 615 /* wait/yield every 32 blocks so we don't hog all the buffers */ 616 if ((blk % 32) == 0) 617 VNOP_BWRITE(bp); 618 else 619 buf_bawrite(bp); 620 } 621 --blkcnt; 622 ++blk; 623 } 624 625 return (0); 626} 627 628 629extern char hfs_attrname[]; 630 631/* 632 * Create an HFS+ Attribute B-tree File. 633 * 634 * No global resources should be held. 635 */ 636int 637hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t nodecnt) 638{ 639 struct vnode* vp = NULLVP; 640 struct cat_desc cndesc; 641 struct cat_attr cnattr; 642 struct cat_fork cfork; 643 BlockDescriptor blkdesc; 644 BTNodeDescriptor *ndp; 645 BTHeaderRec *bthp; 646 BTreeControlBlockPtr btcb = NULL; 647 struct buf *bp = NULL; 648 void * buffer; 649 u_int8_t *bitmap; 650 u_int16_t *index; 651 u_int32_t node_num, num_map_nodes; 652 u_int32_t bytes_per_map_record; 653 u_int32_t temp; 654 u_int16_t offset; 655 int intrans = 0; 656 int result; 657 int newvnode_flags = 0; 658 659again: 660 /* 661 * Serialize creation using HFS_CREATING_BTREE flag. 662 */ 663 hfs_lock_mount (hfsmp); 664 if (hfsmp->hfs_flags & HFS_CREATING_BTREE) { 665 /* Someone else beat us, wait for them to finish. */ 666 (void) msleep(&hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex, 667 PDROP | PINOD, "hfs_create_attr_btree", 0); 668 if (hfsmp->hfs_attribute_vp) { 669 return (0); 670 } 671 goto again; 672 } 673 hfsmp->hfs_flags |= HFS_CREATING_BTREE; 674 hfs_unlock_mount (hfsmp); 675 676 /* Check if were out of usable disk space. */ 677 if ((hfs_freeblks(hfsmp, 1) == 0)) { 678 result = ENOSPC; 679 goto exit; 680 } 681 682 /* 683 * Set up Attribute B-tree vnode 684 * (this must be done before we start a transaction 685 * or take any system file locks) 686 */ 687 bzero(&cndesc, sizeof(cndesc)); 688 cndesc.cd_parentcnid = kHFSRootParentID; 689 cndesc.cd_flags |= CD_ISMETA; 690 cndesc.cd_nameptr = (const u_int8_t *)hfs_attrname; 691 cndesc.cd_namelen = strlen(hfs_attrname); 692 cndesc.cd_cnid = kHFSAttributesFileID; 693 694 bzero(&cnattr, sizeof(cnattr)); 695 cnattr.ca_linkcount = 1; 696 cnattr.ca_mode = S_IFREG; 697 cnattr.ca_fileid = cndesc.cd_cnid; 698 699 bzero(&cfork, sizeof(cfork)); 700 cfork.cf_clump = nodesize * nodecnt; 701 702 result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, 703 &cfork, &vp, &newvnode_flags); 704 if (result) { 705 goto exit; 706 } 707 /* 708 * Set up Attribute B-tree control block 709 */ 710 MALLOC(btcb, BTreeControlBlock *, sizeof(BTreeControlBlock), M_TEMP, M_WAITOK); 711 bzero(btcb, sizeof(BTreeControlBlock)); 712 713 btcb->nodeSize = nodesize; 714 btcb->maxKeyLength = kHFSPlusAttrKeyMaximumLength; 715 btcb->btreeType = 0xFF; 716 btcb->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask; 717 btcb->version = kBTreeVersion; 718 btcb->writeCount = 1; 719 btcb->flags = 0; /* kBTHeaderDirty */ 720 btcb->fileRefNum = vp; 721 btcb->getBlockProc = GetBTreeBlock; 722 btcb->releaseBlockProc = ReleaseBTreeBlock; 723 btcb->setEndOfForkProc = ExtendBTreeFile; 724 btcb->keyCompareProc = (KeyCompareProcPtr)hfs_attrkeycompare; 725 VTOF(vp)->fcbBTCBPtr = btcb; 726 727 /* 728 * Allocate some space 729 */ 730 if (hfs_start_transaction(hfsmp) != 0) { 731 result = EINVAL; 732 goto exit; 733 } 734 intrans = 1; 735 736 /* Note ExtendBTreeFile will acquire the necessary system file locks. */ 737 result = ExtendBTreeFile(vp, nodesize, cfork.cf_clump); 738 if (result) 739 goto exit; 740 741 btcb->totalNodes = VTOF(vp)->ff_size / nodesize; 742 743 /* 744 * Figure out how many map nodes we'll need. 745 * 746 * bytes_per_map_record = the number of bytes in the map record of a 747 * map node. Since that is the only record in the node, it is the size 748 * of the node minus the node descriptor at the start, and two record 749 * offsets at the end of the node. The "- 2" is to round the size down 750 * to a multiple of 4 bytes (since sizeof(BTNodeDescriptor) is not a 751 * multiple of 4). 752 * 753 * The value "temp" here is the number of *bits* in the map record of 754 * the header node. 755 */ 756 bytes_per_map_record = nodesize - sizeof(BTNodeDescriptor) - 2*sizeof(u_int16_t) - 2; 757 temp = 8 * (nodesize - sizeof(BTNodeDescriptor) 758 - sizeof(BTHeaderRec) 759 - kBTreeHeaderUserBytes 760 - 4 * sizeof(u_int16_t)); 761 if (btcb->totalNodes > temp) { 762 num_map_nodes = howmany(btcb->totalNodes - temp, bytes_per_map_record * 8); 763 } 764 else { 765 num_map_nodes = 0; 766 } 767 768 btcb->freeNodes = btcb->totalNodes - 1 - num_map_nodes; 769 770 /* 771 * Initialize the b-tree header on disk 772 */ 773 bp = buf_getblk(vp, 0, nodesize, 0, 0, BLK_META); 774 if (bp == NULL) { 775 result = EIO; 776 goto exit; 777 } 778 779 buffer = (void *)buf_dataptr(bp); 780 blkdesc.buffer = buffer; 781 blkdesc.blockHeader = (void *)bp; 782 blkdesc.blockReadFromDisk = 0; 783 blkdesc.isModified = 0; 784 785 ModifyBlockStart(vp, &blkdesc); 786 787 if (buf_size(bp) != nodesize) 788 panic("hfs_create_attr_btree: bad buffer size (%d)\n", buf_size(bp)); 789 790 bzero(buffer, nodesize); 791 index = (u_int16_t *)buffer; 792 793 /* FILL IN THE NODE DESCRIPTOR: */ 794 ndp = (BTNodeDescriptor *)buffer; 795 if (num_map_nodes != 0) 796 ndp->fLink = 1; 797 ndp->kind = kBTHeaderNode; 798 ndp->numRecords = 3; 799 offset = sizeof(BTNodeDescriptor); 800 index[(nodesize / 2) - 1] = offset; 801 802 /* FILL IN THE HEADER RECORD: */ 803 bthp = (BTHeaderRec *)((u_int8_t *)buffer + offset); 804 bthp->nodeSize = nodesize; 805 bthp->totalNodes = btcb->totalNodes; 806 bthp->freeNodes = btcb->freeNodes; 807 bthp->clumpSize = cfork.cf_clump; 808 bthp->btreeType = 0xFF; 809 bthp->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask; 810 bthp->maxKeyLength = kHFSPlusAttrKeyMaximumLength; 811 bthp->keyCompareType = kHFSBinaryCompare; 812 offset += sizeof(BTHeaderRec); 813 index[(nodesize / 2) - 2] = offset; 814 815 /* FILL IN THE USER RECORD: */ 816 offset += kBTreeHeaderUserBytes; 817 index[(nodesize / 2) - 3] = offset; 818 819 /* Mark the header node and map nodes in use in the map record. 820 * 821 * NOTE: Assumes that the header node's map record has at least 822 * (num_map_nodes + 1) bits. 823 */ 824 bitmap = (u_int8_t *) buffer + offset; 825 temp = num_map_nodes + 1; /* +1 for the header node */ 826 while (temp >= 8) { 827 *(bitmap++) = 0xFF; 828 temp -= 8; 829 } 830 *bitmap = ~(0xFF >> temp); 831 832 offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec) 833 - kBTreeHeaderUserBytes - (4 * sizeof(int16_t)); 834 index[(nodesize / 2) - 4] = offset; 835 836 if (hfsmp->jnl) { 837 result = btree_journal_modify_block_end(hfsmp, bp); 838 } else { 839 result = VNOP_BWRITE(bp); 840 } 841 if (result) 842 goto exit; 843 844 /* Create the map nodes: node numbers 1 .. num_map_nodes */ 845 for (node_num=1; node_num <= num_map_nodes; ++node_num) { 846 bp = buf_getblk(vp, node_num, nodesize, 0, 0, BLK_META); 847 if (bp == NULL) { 848 result = EIO; 849 goto exit; 850 } 851 buffer = (void *)buf_dataptr(bp); 852 blkdesc.buffer = buffer; 853 blkdesc.blockHeader = (void *)bp; 854 blkdesc.blockReadFromDisk = 0; 855 blkdesc.isModified = 0; 856 857 ModifyBlockStart(vp, &blkdesc); 858 859 bzero(buffer, nodesize); 860 index = (u_int16_t *)buffer; 861 862 /* Fill in the node descriptor */ 863 ndp = (BTNodeDescriptor *)buffer; 864 if (node_num != num_map_nodes) 865 ndp->fLink = node_num + 1; 866 ndp->kind = kBTMapNode; 867 ndp->numRecords = 1; 868 offset = sizeof(BTNodeDescriptor); 869 index[(nodesize / 2) - 1] = offset; 870 871 872 /* Fill in the map record's offset */ 873 /* Note: We assume that the map record is all zeroes */ 874 offset = sizeof(BTNodeDescriptor) + bytes_per_map_record; 875 index[(nodesize / 2) - 2] = offset; 876 877 if (hfsmp->jnl) { 878 result = btree_journal_modify_block_end(hfsmp, bp); 879 } else { 880 result = VNOP_BWRITE(bp); 881 } 882 if (result) 883 goto exit; 884 } 885 886 /* Update vp/cp for attribute btree */ 887 hfs_lock_mount (hfsmp); 888 hfsmp->hfs_attribute_cp = VTOC(vp); 889 hfsmp->hfs_attribute_vp = vp; 890 hfs_unlock_mount (hfsmp); 891 892 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); 893 894 if (intrans) { 895 hfs_end_transaction(hfsmp); 896 intrans = 0; 897 } 898 899 /* Initialize the vnode for virtual attribute data file */ 900 result = init_attrdata_vnode(hfsmp); 901 if (result) { 902 printf("hfs_create_attr_btree: vol=%s init_attrdata_vnode() error=%d\n", hfsmp->vcbVN, result); 903 } 904 905exit: 906 if (vp) { 907 hfs_unlock(VTOC(vp)); 908 } 909 if (result) { 910 if (btcb) { 911 FREE (btcb, M_TEMP); 912 } 913 if (vp) { 914 vnode_put(vp); 915 } 916 /* XXX need to give back blocks ? */ 917 } 918 if (intrans) { 919 hfs_end_transaction(hfsmp); 920 } 921 922 /* 923 * All done, clear HFS_CREATING_BTREE, and wake up any sleepers. 924 */ 925 hfs_lock_mount (hfsmp); 926 hfsmp->hfs_flags &= ~HFS_CREATING_BTREE; 927 wakeup((caddr_t)&hfsmp->hfs_attribute_cp); 928 hfs_unlock_mount (hfsmp); 929 930 return (result); 931} 932 933