1/* 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 File: VolumeAllocation.c 30 31 Contains: Routines for accessing and modifying the volume bitmap. 32 33 Version: HFS Plus 1.0 34 35 Copyright: ��� 1996-2009 by Apple Computer, Inc., all rights reserved. 36 37*/ 38 39/* 40Public routines: 41 BlockAllocate 42 Allocate space on a volume. Can allocate space contiguously. 43 If not contiguous, then allocation may be less than what was 44 asked for. Returns the starting block number, and number of 45 blocks. (Will only do a single extent???) 46 BlockDeallocate 47 Deallocate a contiguous run of allocation blocks. 48 49 BlockMarkAllocated 50 Exported wrapper to mark blocks as in-use. This will correctly determine 51 whether or not the red-black tree is enabled and call the appropriate function 52 if applicable. 53 BlockMarkFree 54 Exported wrapper to mark blocks as freed. This will correctly determine whether or 55 not the red-black tree is enabled and call the appropriate function if applicable. 56 57 58 ResetVCBFreeExtCache 59 Since the red-black tree obviates the need to maintain the free extent cache, we do 60 not update it if the tree is also live. As a result, if we ever need to destroy the trees 61 we should reset the free extent cache so it doesn't confuse us when we need to fall back to the 62 bitmap scanning allocator. 63 We also reset and disable the free extent cache when volume resizing is 64 in flight. 65 66 UpdateAllocLimit 67 Adjusts the AllocLimit field in the hfs mount point. This is used when we need to prevent 68 allocations from occupying space in the region we are modifying during a filesystem resize. 69 At other times, it should be consistent with the total number of allocation blocks in the 70 filesystem. It is also used to shrink or grow the number of blocks that the red-black tree should 71 know about. If growing, scan the new range of bitmap, and if shrinking, reduce the 72 number of items in the tree that we can allocate from. 73 74 UnmapBlocks 75 Issues DKIOCUNMAPs to the device as it fills the internal volume buffer when iterating 76 the volume bitmap. 77 78Internal routines: 79 Note that the RBTree routines are guarded by a cpp check for CONFIG_HFS_ALLOC_RBTREE. This 80 is to cut down on space for functions that could not possibly be used if they are not planning to 81 use the red-black tree code. 82 83 BlockMarkFreeRBTree 84 Make an internal call to BlockMarkFree and then update 85 and/or create Red-Black Tree allocation tree nodes to correspond 86 to the free space being generated. 87 BlockMarkFreeInternal 88 Mark a contiguous range of blocks as free. The corresponding 89 bits in the volume bitmap will be cleared. This will actually do the work 90 of modifying the bitmap for us. 91 92 BlockMarkAllocatedRBTree 93 Make an internal call to BlockAllocateMarked, which will update the 94 bitmap on-disk when we allocate blocks. If that is successful, then 95 we'll remove the appropriate entries from the red-black tree. 96 BlockMarkAllocatedInternal 97 Mark a contiguous range of blocks as allocated. The cor- 98 responding bits in the volume bitmap are set. Also tests to see 99 if any of the blocks were previously unallocated. 100 BlockFindContiguous 101 Find a contiguous range of blocks of a given size. The caller 102 specifies where to begin the search (by block number). The 103 block number of the first block in the range is returned. This is only 104 called by the bitmap scanning logic as the red-black tree should be able 105 to do this internally by searching its tree. 106 BlockAllocateAny 107 Find and allocate a contiguous range of blocks up to a given size. The 108 first range of contiguous free blocks found are allocated, even if there 109 are fewer blocks than requested (and even if a contiguous range of blocks 110 of the given size exists elsewhere). 111 BlockAllocateAnyBitmap 112 Finds a range of blocks per the above requirements without using the 113 Allocation RB Tree. This relies on the bitmap-scanning logic in order to find 114 any valid range of free space needed. 115 BlockAllocateAnyRBTree 116 Finds a valid range of blocks per the above requirements by searching 117 the red-black tree. We can just make an internal call to 118 BlockAllocateContigRBTree to find the valid range. 119 BlockAllocateContig 120 Find and allocate a contiguous range of blocks of a given size. If 121 a contiguous range of free blocks of the given size isn't found, then 122 the allocation fails (i.e. it is "all or nothing"). This routine is 123 essentially a wrapper function around its related sub-functions, 124 BlockAllocateContigBitmap and BlockAllocateContigRBTree, which use, 125 respectively, the original HFS+ bitmap scanning logic and the new 126 Red-Black Tree to search and manage free-space decisions. This function 127 contains logic for when to use which of the allocation algorithms, 128 depending on the free space contained in the volume. 129 BlockAllocateContigBitmap 130 Finds and allocates a range of blocks specified by the size parameters 131 using the original HFS+ bitmap scanning logic. The red-black tree 132 will not be updated if this function is used. 133 BlockAllocateContigRBTree 134 Finds and allocates a range of blocks specified by the size parameters 135 using the new red/black tree data structure and search algorithms 136 provided by the tree library. Updates the red/black tree nodes after 137 the on-disk data structure (bitmap) has been updated. 138 BlockAllocateKnown 139 Try to allocate space from known free space in the volume's 140 free extent cache. 141 142 ReadBitmapBlock 143 Given an allocation block number, read the bitmap block that 144 contains that allocation block into a caller-supplied buffer. 145 146 ReleaseBitmapBlock 147 Release a bitmap block back into the buffer cache. 148 149 remove_free_extent_cache 150 Remove an extent from the free extent cache. Handles overlaps 151 with multiple extents in the cache, and handles splitting an 152 extent in the cache if the extent to be removed is in the middle 153 of a cached extent. 154 155 add_free_extent_cache 156 Add an extent to the free extent cache. It will merge the 157 input extent with extents already in the cache. 158 159 160Debug/Test Routines 161 hfs_isallocated 162 Test to see if any blocks in a range are allocated. Journal or 163 allocation file lock must be held. 164 165 hfs_isallocated_scan 166 Test to see if any blocks in a range are allocated. Releases and 167 invalidates the block used when finished. 168 169 hfs_isrbtree_active 170 Test to see if the allocation red-black tree is live. This function 171 requires either an exclusive or shared lock on the allocation bitmap file 172 in the HFS mount structure, to prevent red-black tree pointers from disappearing. 173 174 hfs_isrbtree_allocated 175 Test to see if the specified extent is marked as allocated in the red-black tree. 176 Multiplexes between the metadata zone trees and the normal allocation zone trees 177 depending on the offset of the extent specified. 178 179 check_rbtree_extents 180 Void function that wraps around the above function (hfs_isrbtree_allocated) 181 and checks to see that the return value was appropriate based on the assertion we're 182 trying to validate (whether or not the specified extent should be marked as free 183 or allocated). 184 185 hfs_validate_rbtree 186 Exhaustive search function that will check every allocation block for its status in the 187 red-black tree and then check the corresponding status in the bitmap file. If the two are out 188 of sync, it will panic. Note that this function is extremely expensive and must NEVER 189 be run outside of debug code. 190 191 hfs_checktreelinks 192 Checks the embedded linked list structure of the red black tree for integrity. The next pointer 193 should always point to whatever extent_tree_offset_next returns. 194 195 196Red Black Tree Specific Routines 197 GenerateTree 198 Build a red-black tree for the given filesystem's bitmap. 199 200 DestroyTrees 201 Destroy the tree on the given filesystem 202 203 204 hfs_alloc_scan_block 205 Given a starting allocation block number, figures out which physical block contains that 206 allocation block's bit, and scans it from the starting bit until either the ending bit or 207 the end of the block. Free space extents are inserted into the appropriate red-black tree. 208 209*/ 210 211#include "../../hfs_macos_defs.h" 212 213#include <sys/types.h> 214#include <sys/buf.h> 215#include <sys/systm.h> 216#include <sys/sysctl.h> 217#include <sys/disk.h> 218#include <sys/ubc.h> 219#include <sys/uio.h> 220#include <kern/kalloc.h> 221/* For VM Page size */ 222#include <libkern/libkern.h> 223 224#include "../../hfs.h" 225#include "../../hfs_dbg.h" 226#include "../../hfs_format.h" 227#include "../../hfs_endian.h" 228#include "../../hfs_macos_defs.h" 229#include "../headers/FileMgrInternal.h" 230#include "../headers/HybridAllocator.h" 231#include "../../hfs_kdebug.h" 232 233/* Headers for unmap-on-mount support */ 234#include <vfs/vfs_journal.h> 235#include <sys/disk.h> 236 237#ifndef CONFIG_HFS_TRIM 238#define CONFIG_HFS_TRIM 0 239#endif 240 241/* 242 * Use sysctl vfs.generic.hfs.kdebug.allocation to control which 243 * KERNEL_DEBUG_CONSTANT events are enabled at runtime. (They're 244 * disabled by default because there can be a lot of these events, 245 * and we don't want to overwhelm the kernel debug buffer. If you 246 * want to watch these events in particular, just set the sysctl.) 247 */ 248static int hfs_kdebug_allocation = 0; 249SYSCTL_DECL(_vfs_generic); 250SYSCTL_NODE(_vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system"); 251SYSCTL_NODE(_vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug"); 252SYSCTL_INT(_vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations"); 253enum { 254 /* 255 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and 256 * BlockDeallocate, including the internal BlockAllocateXxx 257 * routines so we can see how an allocation was satisfied. 258 * 259 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the 260 * free extent cache. 261 * 262 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list. 263 * 264 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting 265 * or clearing bits, scanning the bitmap). 266 */ 267 HFSDBG_ALLOC_ENABLED = 1, 268 HFSDBG_EXT_CACHE_ENABLED = 2, 269 HFSDBG_UNMAP_ENABLED = 4, 270 HFSDBG_BITMAP_ENABLED = 8 271}; 272 273enum { 274 kBytesPerWord = 4, 275 kBitsPerByte = 8, 276 kBitsPerWord = 32, 277 278 kBitsWithinWordMask = kBitsPerWord-1 279}; 280 281#define kLowBitInWordMask 0x00000001ul 282#define kHighBitInWordMask 0x80000000ul 283#define kAllBitsSetInWord 0xFFFFFFFFul 284 285 286#define ALLOC_DEBUG 0 287 288static OSErr ReadBitmapBlock( 289 ExtendedVCB *vcb, 290 u_int32_t bit, 291 u_int32_t **buffer, 292 uintptr_t *blockRef); 293 294static OSErr ReleaseBitmapBlock( 295 ExtendedVCB *vcb, 296 uintptr_t blockRef, 297 Boolean dirty); 298 299static OSErr BlockAllocateAny( 300 ExtendedVCB *vcb, 301 u_int32_t startingBlock, 302 u_int32_t endingBlock, 303 u_int32_t maxBlocks, 304 Boolean useMetaZone, 305 u_int32_t *actualStartBlock, 306 u_int32_t *actualNumBlocks); 307 308static OSErr BlockAllocateAnyBitmap( 309 ExtendedVCB *vcb, 310 u_int32_t startingBlock, 311 u_int32_t endingBlock, 312 u_int32_t maxBlocks, 313 Boolean useMetaZone, 314 u_int32_t *actualStartBlock, 315 u_int32_t *actualNumBlocks); 316 317static OSErr BlockAllocateContig( 318 ExtendedVCB *vcb, 319 u_int32_t startingBlock, 320 u_int32_t minBlocks, 321 u_int32_t maxBlocks, 322 Boolean useMetaZone, 323 u_int32_t *actualStartBlock, 324 u_int32_t *actualNumBlocks); 325 326static OSErr BlockAllocateContigBitmap( 327 ExtendedVCB *vcb, 328 u_int32_t startingBlock, 329 u_int32_t minBlocks, 330 u_int32_t maxBlocks, 331 Boolean useMetaZone, 332 u_int32_t *actualStartBlock, 333 u_int32_t *actualNumBlocks); 334 335static OSErr BlockFindContiguous( 336 ExtendedVCB *vcb, 337 u_int32_t startingBlock, 338 u_int32_t endingBlock, 339 u_int32_t minBlocks, 340 u_int32_t maxBlocks, 341 Boolean useMetaZone, 342 u_int32_t *actualStartBlock, 343 u_int32_t *actualNumBlocks); 344 345static OSErr BlockAllocateKnown( 346 ExtendedVCB *vcb, 347 u_int32_t maxBlocks, 348 u_int32_t *actualStartBlock, 349 u_int32_t *actualNumBlocks); 350 351static OSErr BlockMarkAllocatedInternal ( 352 ExtendedVCB *vcb, 353 u_int32_t startingBlock, 354 register u_int32_t numBlocks); 355 356static OSErr BlockMarkFreeInternal( 357 ExtendedVCB *vcb, 358 u_int32_t startingBlock, 359 u_int32_t numBlocks, 360 Boolean do_validate); 361 362 363static OSErr ReleaseScanBitmapBlock( struct buf *bp ); 364 365static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset, 366 u_int32_t numBlocks, struct jnl_trim_list *list); 367 368static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list); 369 370static int hfs_alloc_scan_block(struct hfsmount *hfsmp, 371 u_int32_t startbit, 372 u_int32_t endBit, 373 u_int32_t *bitToScan, 374 struct jnl_trim_list *list); 375 376int hfs_isallocated_scan (struct hfsmount *hfsmp, 377 u_int32_t startingBlock, 378 u_int32_t *bp_buf); 379 380#if CONFIG_HFS_ALLOC_RBTREE 381static OSErr BlockAllocateAnyRBTree( 382 ExtendedVCB *vcb, 383 u_int32_t startingBlock, 384 u_int32_t maxBlocks, 385 Boolean useMetaZone, 386 u_int32_t *actualStartBlock, 387 u_int32_t *actualNumBlocks); 388 389static OSErr BlockAllocateContigRBTree( 390 ExtendedVCB *vcb, 391 u_int32_t startingBlock, 392 u_int32_t minBlocks, 393 u_int32_t maxBlocks, 394 Boolean useMetaZone, 395 u_int32_t *actualStartBlock, 396 u_int32_t *actualNumBlocks, 397 u_int32_t forceContig); 398 399static OSErr BlockMarkAllocatedRBTree( 400 ExtendedVCB *vcb, 401 u_int32_t startingBlock, 402 u_int32_t numBlocks); 403 404static OSErr BlockMarkFreeRBTree( 405 ExtendedVCB *vcb, 406 u_int32_t startingBlock, 407 u_int32_t numBlocks); 408 409static int 410hfs_isrbtree_allocated (struct hfsmount * hfsmp, 411 u_int32_t startBlock, 412 u_int32_t numBlocks, 413 extent_node_t** node1); 414 415extern void 416hfs_validate_rbtree (struct hfsmount *hfsmp, 417 u_int32_t start, 418 u_int32_t end); 419 420static void hfs_checktreelinks (struct hfsmount *hfsmp); 421 422 423void check_rbtree_extents (struct hfsmount *hfsmp, 424 u_int32_t start, 425 u_int32_t numBlocks, 426 int shouldBeFree); 427 428#define ASSERT_FREE 1 429#define ASSERT_ALLOC 0 430 431#endif /* CONFIG_HFS_ALLOC_RBTREE */ 432 433/* Functions for manipulating free extent cache */ 434static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount); 435static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount); 436static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated); 437 438#if ALLOC_DEBUG 439/* 440 * Validation Routine to verify that the TRIM list maintained by the journal 441 * is in good shape relative to what we think the bitmap should have. We should 442 * never encounter allocated blocks in the TRIM list, so if we ever encounter them, 443 * we panic. 444 */ 445int trim_validate_bitmap (struct hfsmount *hfsmp); 446int trim_validate_bitmap (struct hfsmount *hfsmp) { 447 u_int64_t blockno_offset; 448 u_int64_t numblocks; 449 int i; 450 int count; 451 u_int32_t startblk; 452 u_int32_t blks; 453 int err = 0; 454 uint32_t alloccount = 0; 455 456 if (hfsmp->jnl) { 457 struct journal *jnl = (struct journal*)hfsmp->jnl; 458 if (jnl->active_tr) { 459 struct jnl_trim_list *trim = &(jnl->active_tr->trim); 460 count = trim->extent_count; 461 for (i = 0; i < count; i++) { 462 blockno_offset = trim->extents[i].offset; 463 blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset; 464 blockno_offset = blockno_offset / hfsmp->blockSize; 465 numblocks = trim->extents[i].length / hfsmp->blockSize; 466 467 startblk = (u_int32_t)blockno_offset; 468 blks = (u_int32_t) numblocks; 469 err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount); 470 471 if (err == 0 && alloccount != 0) { 472 panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk); 473 } 474 } 475 } 476 } 477 return 0; 478} 479 480#endif 481 482 483/* 484 ;________________________________________________________________________________ 485 ; 486 ; Routine: hfs_unmap_free_extent 487 ; 488 ; Function: Make note of a range of allocation blocks that should be 489 ; unmapped (trimmed). That is, the given range of blocks no 490 ; longer have useful content, and the device can unmap the 491 ; previous contents. For example, a solid state disk may reuse 492 ; the underlying storage for other blocks. 493 ; 494 ; This routine is only supported for journaled volumes. The extent 495 ; being freed is passed to the journal code, and the extent will 496 ; be unmapped after the current transaction is written to disk. 497 ; 498 ; Input Arguments: 499 ; hfsmp - The volume containing the allocation blocks. 500 ; startingBlock - The first allocation block of the extent being freed. 501 ; numBlocks - The number of allocation blocks of the extent being freed. 502 ;________________________________________________________________________________ 503 */ 504static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks) 505{ 506 u_int64_t offset; 507 u_int64_t length; 508 u_int64_t device_sz; 509 int err = 0; 510 511 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) 512 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0); 513 514 if (ALLOC_DEBUG) { 515 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) { 516 panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks); 517 } 518 } 519 520 if (hfsmp->jnl != NULL) { 521 device_sz = hfsmp->hfs_logical_bytes; 522 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset; 523 length = (u_int64_t) numBlocks * hfsmp->blockSize; 524 525 /* Validate that the trim is in a valid range of bytes */ 526 if ((offset >= device_sz) || ((offset + length) > device_sz)) { 527 printf("hfs_unmap_free_ext: ignoring trim @ off %lld len %lld \n", offset, length); 528 err = EINVAL; 529 } 530 531 if (err == 0) { 532 err = journal_trim_add_extent(hfsmp->jnl, offset, length); 533 if (err) { 534 printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent", err); 535 } 536 } 537 } 538 539 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) 540 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0); 541} 542 543 544 545/* 546 ;________________________________________________________________________________ 547 ; 548 ; Routine: hfs_track_unmap_blocks 549 ; 550 ; Function: Make note of a range of allocation blocks that should be 551 ; unmapped (trimmed). That is, the given range of blocks no 552 ; longer have useful content, and the device can unmap the 553 ; previous contents. For example, a solid state disk may reuse 554 ; the underlying storage for other blocks. 555 ; 556 ; This routine is only supported for journaled volumes. 557 ; 558 ; *****NOTE*****: 559 ; This function should *NOT* be used when the volume is fully 560 ; mounted. This function is intended to support a bitmap iteration 561 ; at mount time to fully inform the SSD driver of the state of all blocks 562 ; at mount time, and assumes that there is no allocation/deallocation 563 ; interference during its iteration., 564 ; 565 ; Input Arguments: 566 ; hfsmp - The volume containing the allocation blocks. 567 ; offset - The first allocation block of the extent being freed. 568 ; numBlocks - The number of allocation blocks of the extent being freed. 569 ; list - The list of currently tracked trim ranges. 570 ;________________________________________________________________________________ 571 */ 572static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start, 573 u_int32_t numBlocks, struct jnl_trim_list *list) { 574 575 u_int64_t offset; 576 u_int64_t length; 577 int error = 0; 578 579 if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) { 580 int extent_no = list->extent_count; 581 offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset; 582 length = (u_int64_t) numBlocks * hfsmp->blockSize; 583 584 585 list->extents[extent_no].offset = offset; 586 list->extents[extent_no].length = length; 587 list->extent_count++; 588 if (list->extent_count == list->allocated_count) { 589 error = hfs_issue_unmap (hfsmp, list); 590 } 591 } 592 593 return error; 594} 595 596/* 597 ;________________________________________________________________________________ 598 ; 599 ; Routine: hfs_issue_unmap 600 ; 601 ; Function: Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list 602 ; 603 ; Input Arguments: 604 ; hfsmp - The volume containing the allocation blocks. 605 ; list - The list of currently tracked trim ranges. 606 ;________________________________________________________________________________ 607 */ 608 609static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list) { 610 dk_unmap_t unmap; 611 int error = 0; 612 613 if (list->extent_count > 0) { 614 bzero(&unmap, sizeof(unmap)); 615 unmap.extents = list->extents; 616 unmap.extentsCount = list->extent_count; 617 618 /* Issue a TRIM and flush them out */ 619 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); 620 621 bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t))); 622 list->extent_count = 0; 623 } 624 return error; 625} 626 627 628 629/* 630 ;________________________________________________________________________________ 631 ; 632 ; Routine: hfs_unmap_alloc_extent 633 ; 634 ; Function: Make note of a range of allocation blocks, some of 635 ; which may have previously been passed to hfs_unmap_free_extent, 636 ; is now in use on the volume. The given blocks will be removed 637 ; from any pending DKIOCUNMAP. 638 ; 639 ; Input Arguments: 640 ; hfsmp - The volume containing the allocation blocks. 641 ; startingBlock - The first allocation block of the extent being allocated. 642 ; numBlocks - The number of allocation blocks being allocated. 643 ;________________________________________________________________________________ 644 */ 645static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks) 646{ 647 u_int64_t offset; 648 u_int64_t length; 649 int err; 650 651 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) 652 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0); 653 654 if (hfsmp->jnl != NULL) { 655 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset; 656 length = (u_int64_t) numBlocks * hfsmp->blockSize; 657 658 err = journal_trim_remove_extent(hfsmp->jnl, offset, length); 659 if (err) { 660 printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent", err); 661 } 662 } 663 664 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) 665 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0); 666} 667 668 669/* 670;________________________________________________________________________________ 671; 672; Routine: hfs_trim_callback 673; 674; Function: This function is called when a transaction that freed extents 675; (via hfs_unmap_free_extent/journal_trim_add_extent) has been 676; written to the on-disk journal. This routine will add those 677; extents to the free extent cache so that they can be reused. 678; 679; CAUTION: This routine is called while the journal's trim lock 680; is held shared, so that no other thread can reuse any portion 681; of those extents. We must be very careful about which locks 682; we take from within this callback, to avoid deadlock. The 683; call to add_free_extent_cache will end up taking the cache's 684; lock (just long enough to add these extents to the cache). 685; 686; CAUTION: If the journal becomes invalid (eg., due to an I/O 687; error when trying to write to the journal), this callback 688; will stop getting called, even if extents got freed before 689; the journal became invalid! 690; 691; Input Arguments: 692; arg - The hfsmount of the volume containing the extents. 693; extent_count - The number of extents freed in the transaction. 694; extents - An array of extents (byte ranges) that were freed. 695;________________________________________________________________________________ 696*/ 697__private_extern__ void 698hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents) 699{ 700 uint32_t i; 701 uint32_t startBlock, numBlocks; 702 struct hfsmount *hfsmp = arg; 703 704 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) 705 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0); 706 707 for (i=0; i<extent_count; ++i) { 708 /* Convert the byte range in *extents back to a range of allocation blocks. */ 709 startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize; 710 numBlocks = extents[i].length / hfsmp->blockSize; 711 (void) add_free_extent_cache(hfsmp, startBlock, numBlocks); 712 } 713 714 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) 715 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0); 716} 717 718 719/* 720 ;________________________________________________________________________________ 721 ; 722 ; Routine: UnmapBlocks 723 ; 724 ; Function: Traverse the bitmap, and issue DKIOCUNMAPs to the underlying 725 ; device as needed so that the underlying disk device is as 726 ; up-to-date as possible with which blocks are unmapped. 727 ; 728 ; Input Arguments: 729 ; hfsmp - The volume containing the allocation blocks. 730 ;________________________________________________________________________________ 731 */ 732 733__private_extern__ 734u_int32_t UnmapBlocks (struct hfsmount *hfsmp) { 735 u_int32_t blocks_scanned = 0; 736 int error = 0; 737 struct jnl_trim_list trimlist; 738 739 /* 740 *struct jnl_trim_list { 741 uint32_t allocated_count; 742 uint32_t extent_count; 743 dk_extent_t *extents; 744 }; 745 */ 746 bzero (&trimlist, sizeof(trimlist)); 747 if (CONFIG_HFS_TRIM) { 748 int alloc_count = PAGE_SIZE / sizeof(dk_extent_t); 749 void *extents = kalloc (alloc_count * sizeof(dk_extent_t)); 750 if (extents == NULL) { 751 return ENOMEM; 752 } 753 trimlist.extents = (dk_extent_t*)extents; 754 trimlist.allocated_count = alloc_count; 755 trimlist.extent_count = 0; 756 757 758 759 while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){ 760 error = hfs_alloc_scan_block (hfsmp, blocks_scanned, hfsmp->totalBlocks, 761 &blocks_scanned, &trimlist); 762 if (error) { 763 printf("HFS: bitmap unmap scan error: %d\n", error); 764 break; 765 } 766 } 767 if (error == 0) { 768 hfs_issue_unmap(hfsmp, &trimlist); 769 } 770 if (trimlist.extents) { 771 kfree (trimlist.extents, (trimlist.allocated_count * sizeof(dk_extent_t))); 772 } 773 } 774 return error; 775} 776 777/* 778 ;________________________________________________________________________________ 779 ; 780 ; Routine: BlockAllocate 781 ; 782 ; Function: Allocate space on a volume. If contiguous allocation is requested, 783 ; at least the requested number of bytes will be allocated or an 784 ; error will be returned. If contiguous allocation is not forced, 785 ; the space will be allocated with the first largest extent available 786 ; at the requested starting allocation block. If there is not enough 787 ; room there, a block allocation of less than the requested size will be 788 ; allocated. 789 ; 790 ; If the requested starting block is 0 (for new file allocations), 791 ; the volume's allocation block pointer will be used as a starting 792 ; point. 793 ; 794 ; Input Arguments: 795 ; vcb - Pointer to ExtendedVCB for the volume to allocate space on 796 ; fcb - Pointer to FCB for the file for which storage is being allocated 797 ; startingBlock - Preferred starting allocation block, 0 = no preference 798 ; minBlocks - Number of blocks requested. If the allocation is non-contiguous, 799 ; less than this may actually be allocated 800 ; maxBlocks - The maximum number of blocks to allocate. If there is additional free 801 ; space after bytesRequested, then up to maxBlocks bytes should really 802 ; be allocated. (Used by ExtendFileC to round up allocations to a multiple 803 ; of the file's clump size.) 804 ; flags - Flags to specify options like contiguous, use metadata zone, 805 ; skip free block check, etc. 806 ; 807 ; Output: 808 ; (result) - Error code, zero for successful allocation 809 ; *startBlock - Actual starting allocation block 810 ; *actualBlocks - Actual number of allocation blocks allocated 811 ; 812 ; Side effects: 813 ; The volume bitmap is read and updated; the volume bitmap cache may be changed. 814 ;________________________________________________________________________________ 815 */ 816OSErr BlockAllocate ( 817 ExtendedVCB *vcb, /* which volume to allocate space on */ 818 u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */ 819 u_int32_t minBlocks, /* desired number of blocks to allocate */ 820 u_int32_t maxBlocks, /* maximum number of blocks to allocate */ 821 u_int32_t flags, /* option flags */ 822 u_int32_t *actualStartBlock, /* actual first block of allocation */ 823 u_int32_t *actualNumBlocks) /* number of blocks actually allocated; if forceContiguous */ 824 /* was zero, then this may represent fewer than minBlocks */ 825{ 826 u_int32_t freeBlocks; 827 OSErr err; 828 Boolean updateAllocPtr = false; // true if nextAllocation needs to be updated 829 struct hfsmount *hfsmp; 830 Boolean useMetaZone; 831 Boolean forceContiguous; 832 833 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) 834 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0); 835 836 if (flags & HFS_ALLOC_FORCECONTIG) { 837 forceContiguous = true; 838 } else { 839 forceContiguous = false; 840 } 841 842 if (flags & HFS_ALLOC_METAZONE) { 843 useMetaZone = true; 844 } else { 845 useMetaZone = false; 846 } 847 848 //TODO: Figure out when we need to re-enable the RB-Tree. 849 850 851 //TODO: Make sure we use allocLimit when appropriate. 852 853 /* 854 * TODO: Update BlockAllocate and its sub-functions to do cooperative allocation and bitmap scanning 855 * in conjunction with the Generate Tree function. If the red-black tree does not currently contain 856 * an allocation block of appropriate size, then start scanning blocks FOR the tree generation function until 857 * we find what we need. We'll update the tree fields when we're done, indicating that we've advanced the 858 * high water mark for the tree. 859 */ 860 861 // 862 // Initialize outputs in case we get an error 863 // 864 *actualStartBlock = 0; 865 *actualNumBlocks = 0; 866 hfsmp = VCBTOHFS (vcb); 867 freeBlocks = hfs_freeblks(hfsmp, 0); 868 869 870 /* Skip free block check if blocks are being allocated for relocating 871 * data during truncating a volume. 872 * 873 * During hfs_truncatefs(), the volume free block count is updated 874 * before relocating data to reflect the total number of free blocks 875 * that will exist on the volume after resize is successful. This 876 * means that we have reserved allocation blocks required for relocating 877 * the data and hence there is no need to check the free blocks. 878 * It will also prevent resize failure when the number of blocks in 879 * an extent being relocated is more than the free blocks that will 880 * exist after the volume is resized. 881 */ 882 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) { 883 // If the disk is already full, don't bother. 884 if (freeBlocks == 0) { 885 err = dskFulErr; 886 goto Exit; 887 } 888 if (forceContiguous && freeBlocks < minBlocks) { 889 err = dskFulErr; 890 goto Exit; 891 } 892 893 /* 894 * Clip if necessary so we don't over-subscribe the free blocks. 895 */ 896 if (minBlocks > freeBlocks) { 897 minBlocks = freeBlocks; 898 } 899 if (maxBlocks > freeBlocks) { 900 maxBlocks = freeBlocks; 901 } 902 } 903 904 // 905 // If caller didn't specify a starting block number, then use the volume's 906 // next block to allocate from. 907 // 908 if (startingBlock == 0) { 909 HFS_MOUNT_LOCK(vcb, TRUE); 910 911 /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */ 912 if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) { 913 startingBlock = vcb->sparseAllocation; 914 } 915 else { 916 startingBlock = vcb->nextAllocation; 917 } 918 HFS_MOUNT_UNLOCK(vcb, TRUE); 919 updateAllocPtr = true; 920 } 921 922 923 if (startingBlock >= vcb->allocLimit) { 924 startingBlock = 0; /* overflow so start at beginning */ 925 } 926 927 // 928 // If the request must be contiguous, then find a sequence of free blocks 929 // that is long enough. Otherwise, find the first free block. 930 // 931 if (forceContiguous) { 932 err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks, 933 useMetaZone, actualStartBlock, actualNumBlocks); 934 /* 935 * If we allocated from a new position then also update the roving allocator. 936 * This will keep the roving allocation pointer up-to-date even 937 * if we are using the new R/B tree allocator, since 938 * it doesn't matter to us here, how the underlying allocator found 939 * the block to vend out. 940 */ 941 if ((err == noErr) && 942 (*actualStartBlock > startingBlock) && 943 ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) || 944 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) { 945 updateAllocPtr = true; 946 } 947 } else { 948#if CONFIG_HFS_ALLOC_RBTREE 949 /* 950 * If the RB-Tree Allocator is live, just go straight for a 951 * BlockAllocateAny call and return the result. Otherwise, 952 * resort to the bitmap scanner. 953 */ 954 if (hfs_isrbtree_active(VCBTOHFS(vcb))) { 955 /* Start by trying to allocate from the starting block forward */ 956 err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit, 957 maxBlocks, useMetaZone, actualStartBlock, 958 actualNumBlocks); 959 960 /* 961 * Because the RB-Tree is live, the previous call to BlockAllocateAny 962 * will use the rbtree variant. As a result, it will automatically search the 963 * metadata zone for a valid extent if needed. If we get a return value of 964 * noErr, we found a valid extent and we can skip to the end. If the error indicates 965 * the disk is full, that's an equally valid return code and we can skip to the end, too. 966 */ 967 if (err == noErr || err == dskFulErr) { 968 goto Exit; 969 } 970 else { 971 //TODO: only tear down tree if the tree is finished building. 972 //Make sure to handle the ENOSPC condition properly. We shouldn't error out in that case. 973 /* Tear down tree if we encounter an error */ 974 if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) { 975 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED; 976 DestroyTrees(hfsmp); 977 ResetVCBFreeExtCache(hfsmp); 978 } 979 else { 980 goto Exit; 981 } 982 // fall through to the normal allocation since the rb-tree allocation failed. 983 } 984 } 985#endif 986 987 /* 988 * Scan the bitmap once, gather the N largest free extents, then 989 * allocate from these largest extents. Repeat as needed until 990 * we get all the space we needed. We could probably build up 991 * that list when the higher level caller tried (and failed) a 992 * contiguous allocation first. 993 * 994 * Note that the free-extent cache will be cease to be updated if 995 * we are using the red-black tree for allocations. If we jettison 996 * the tree, then we will reset the free-extent cache and start over. 997 */ 998 999 err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks); 1000 /* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */ 1001 1002 if (err == dskFulErr) { 1003 /* 1004 * Now we have to do a bigger scan. Start at startingBlock and go up until the 1005 * allocation limit. 1006 */ 1007 err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit, 1008 maxBlocks, useMetaZone, actualStartBlock, 1009 actualNumBlocks); 1010 } 1011 if (err == dskFulErr) { 1012 /* 1013 * We may be out of space in the normal zone; go up to the starting block from 1014 * the start of the volume. 1015 */ 1016 err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks, 1017 useMetaZone, actualStartBlock, 1018 actualNumBlocks); 1019 } 1020 } 1021 1022Exit: 1023 // if we actually allocated something then go update the 1024 // various bits of state that we maintain regardless of 1025 // whether there was an error (i.e. partial allocations 1026 // still need to update things like the free block count). 1027 // 1028 if (*actualNumBlocks != 0) { 1029 // 1030 // If we used the volume's roving allocation pointer, then we need to update it. 1031 // Adding in the length of the current allocation might reduce the next allocate 1032 // call by avoiding a re-scan of the already allocated space. However, the clump 1033 // just allocated can quite conceivably end up being truncated or released when 1034 // the file is closed or its EOF changed. Leaving the allocation pointer at the 1035 // start of the last allocation will avoid unnecessary fragmentation in this case. 1036 // 1037 HFS_MOUNT_LOCK(vcb, TRUE); 1038 1039 lck_spin_lock(&hfsmp->vcbFreeExtLock); 1040 if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) { 1041 vcb->sparseAllocation = *actualStartBlock; 1042 } 1043 lck_spin_unlock(&hfsmp->vcbFreeExtLock); 1044 if (*actualNumBlocks < vcb->hfs_freed_block_count) { 1045 vcb->hfs_freed_block_count -= *actualNumBlocks; 1046 } else { 1047 vcb->hfs_freed_block_count = 0; 1048 } 1049 1050 if (updateAllocPtr && 1051 ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) || 1052 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) { 1053 HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock); 1054 } 1055 1056 (void) remove_free_extent_cache(hfsmp, *actualStartBlock, *actualNumBlocks); 1057 1058 /* 1059 * Update the number of free blocks on the volume 1060 * 1061 * Skip updating the free blocks count if the block are 1062 * being allocated to relocate data as part of hfs_truncatefs() 1063 */ 1064 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) { 1065 vcb->freeBlocks -= *actualNumBlocks; 1066 } 1067 MarkVCBDirty(vcb); 1068 HFS_MOUNT_UNLOCK(vcb, TRUE); 1069 1070 hfs_generate_volume_notifications(VCBTOHFS(vcb)); 1071 } 1072 1073 if (ALLOC_DEBUG) { 1074 if (err == noErr) { 1075 if (*actualStartBlock >= hfsmp->totalBlocks) { 1076 panic ("BlockAllocate: vending invalid blocks!"); 1077 } 1078 if (*actualStartBlock >= hfsmp->allocLimit) { 1079 panic ("BlockAllocate: vending block past allocLimit!"); 1080 } 1081 1082 if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->totalBlocks) { 1083 panic ("BlockAllocate: vending too many invalid blocks!"); 1084 } 1085 1086 if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->allocLimit) { 1087 panic ("BlockAllocate: vending too many invalid blocks past allocLimit!"); 1088 } 1089 } 1090 } 1091 1092 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) 1093 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); 1094 1095 return err; 1096} 1097 1098 1099/* 1100;________________________________________________________________________________ 1101; 1102; Routine: BlockDeallocate 1103; 1104; Function: Update the bitmap to deallocate a run of disk allocation blocks 1105; 1106; Input Arguments: 1107; vcb - Pointer to ExtendedVCB for the volume to free space on 1108; firstBlock - First allocation block to be freed 1109; numBlocks - Number of allocation blocks to free up (must be > 0!) 1110; 1111; Output: 1112; (result) - Result code 1113; 1114; Side effects: 1115; The volume bitmap is read and updated; the volume bitmap cache may be changed. 1116; The Allocator's red-black trees may also be modified as a result. 1117;________________________________________________________________________________ 1118*/ 1119 1120OSErr BlockDeallocate ( 1121 ExtendedVCB *vcb, // Which volume to deallocate space on 1122 u_int32_t firstBlock, // First block in range to deallocate 1123 u_int32_t numBlocks, // Number of contiguous blocks to deallocate 1124 u_int32_t flags) 1125{ 1126 OSErr err; 1127 struct hfsmount *hfsmp; 1128 hfsmp = VCBTOHFS(vcb); 1129 1130 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) 1131 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0); 1132 1133 // 1134 // If no blocks to deallocate, then exit early 1135 // 1136 if (numBlocks == 0) { 1137 err = noErr; 1138 goto Exit; 1139 } 1140 1141 1142 if (ALLOC_DEBUG) { 1143 if (firstBlock >= hfsmp->totalBlocks) { 1144 panic ("BlockDeallocate: freeing invalid blocks!"); 1145 } 1146 1147 if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) { 1148 panic ("BlockDeallocate: freeing too many invalid blocks!"); 1149 } 1150 } 1151 1152 1153 1154 1155 /* 1156 * If we're using the red-black tree code, then try to free the 1157 * blocks by marking them in the red-black tree first. If the tree 1158 * is not active for whatever reason (or we're not using the 1159 * R/B Tree code at all), then go straight for the BlockMarkFree 1160 * function. 1161 * 1162 * Remember that we can get into this function if the tree isn't finished 1163 * building. In that case, check to see if the block we're de-allocating is 1164 * past the high watermark 1165 */ 1166#if CONFIG_HFS_ALLOC_RBTREE 1167 if (hfs_isrbtree_active(VCBTOHFS(vcb))) { 1168 /* 1169 * BlockMarkFreeRBTree deals with the case where we are resizing the 1170 * filesystem (shrinking), and we need to manipulate the bitmap beyond the portion 1171 * that is currenly controlled by the r/b tree. 1172 */ 1173 1174 //TODO: Update multiplexing code for the half-finished case. 1175 err = BlockMarkFreeRBTree(vcb, firstBlock, numBlocks); 1176 adjustFreeExtCache = 0; 1177 } 1178 else { 1179 err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true); 1180 } 1181 1182#else 1183 err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true); 1184#endif 1185 if (err) 1186 goto Exit; 1187 1188 // 1189 // Update the volume's free block count, and mark the VCB as dirty. 1190 // 1191 HFS_MOUNT_LOCK(vcb, TRUE); 1192 1193 /* 1194 * Do not update the free block count. This flags is specified 1195 * when a volume is being truncated. 1196 */ 1197 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) { 1198 vcb->freeBlocks += numBlocks; 1199 } 1200 1201 vcb->hfs_freed_block_count += numBlocks; 1202 1203 if (vcb->nextAllocation == (firstBlock + numBlocks)) { 1204 HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks)); 1205 } 1206 1207 if (hfsmp->jnl == NULL) { 1208 /* 1209 * In the journal case, we'll add the free extent once the journal 1210 * calls us back to tell us it wrote the transaction to disk. 1211 */ 1212 (void) add_free_extent_cache(vcb, firstBlock, numBlocks); 1213 1214 /* 1215 * If the journal case, we'll only update sparseAllocation once the 1216 * free extent cache becomes empty (when we remove the last entry 1217 * from the cache). Skipping it here means we're less likely to 1218 * find a recently freed extent via the bitmap before it gets added 1219 * to the free extent cache. 1220 */ 1221 if (firstBlock < vcb->sparseAllocation) { 1222 vcb->sparseAllocation = firstBlock; 1223 } 1224 } 1225 1226 MarkVCBDirty(vcb); 1227 HFS_MOUNT_UNLOCK(vcb, TRUE); 1228 1229 hfs_generate_volume_notifications(VCBTOHFS(vcb)); 1230Exit: 1231 1232 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) 1233 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0); 1234 1235 return err; 1236} 1237 1238 1239u_int8_t freebitcount[16] = { 1240 4, 3, 3, 2, 3, 2, 2, 1, /* 0 1 2 3 4 5 6 7 */ 1241 3, 2, 2, 1, 2, 1, 1, 0, /* 8 9 A B C D E F */ 1242}; 1243 1244u_int32_t 1245MetaZoneFreeBlocks(ExtendedVCB *vcb) 1246{ 1247 u_int32_t freeblocks; 1248 u_int32_t *currCache; 1249 uintptr_t blockRef; 1250 u_int32_t bit; 1251 u_int32_t lastbit; 1252 int bytesleft; 1253 int bytesperblock; 1254 u_int8_t byte; 1255 u_int8_t *buffer; 1256 1257 blockRef = 0; 1258 bytesleft = freeblocks = 0; 1259 buffer = NULL; 1260 bit = VCBTOHFS(vcb)->hfs_metazone_start; 1261 if (bit == 1) 1262 bit = 0; 1263 1264 lastbit = VCBTOHFS(vcb)->hfs_metazone_end; 1265 bytesperblock = vcb->vcbVBMIOSize; 1266 1267 /* 1268 * Count all the bits from bit to lastbit. 1269 */ 1270 while (bit < lastbit) { 1271 /* 1272 * Get next bitmap block. 1273 */ 1274 if (bytesleft == 0) { 1275 if (blockRef) { 1276 (void) ReleaseBitmapBlock(vcb, blockRef, false); 1277 blockRef = 0; 1278 } 1279 if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef) != 0) { 1280 return (0); 1281 } 1282 buffer = (u_int8_t *)currCache; 1283 bytesleft = bytesperblock; 1284 } 1285 byte = *buffer++; 1286 freeblocks += freebitcount[byte & 0x0F]; 1287 freeblocks += freebitcount[(byte >> 4) & 0x0F]; 1288 bit += kBitsPerByte; 1289 --bytesleft; 1290 } 1291 if (blockRef) 1292 (void) ReleaseBitmapBlock(vcb, blockRef, false); 1293 1294 return (freeblocks); 1295} 1296 1297 1298/* 1299 * Obtain the next allocation block (bit) that's 1300 * outside the metadata allocation zone. 1301 */ 1302static u_int32_t NextBitmapBlock( 1303 ExtendedVCB *vcb, 1304 u_int32_t bit) 1305{ 1306 struct hfsmount *hfsmp = VCBTOHFS(vcb); 1307 1308 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0) 1309 return (bit); 1310 /* 1311 * Skip over metadata allocation zone. 1312 */ 1313 if ((bit >= hfsmp->hfs_metazone_start) && 1314 (bit <= hfsmp->hfs_metazone_end)) { 1315 bit = hfsmp->hfs_metazone_end + 1; 1316 } 1317 return (bit); 1318} 1319 1320 1321/* 1322;_______________________________________________________________________ 1323; 1324; Routine: ReadBitmapBlock 1325; 1326; Function: Read in a bitmap block corresponding to a given allocation 1327; block (bit). Return a pointer to the bitmap block. 1328; 1329; Inputs: 1330; vcb -- Pointer to ExtendedVCB 1331; bit -- Allocation block whose bitmap block is desired 1332; 1333; Outputs: 1334; buffer -- Pointer to bitmap block corresonding to "block" 1335; blockRef 1336;_______________________________________________________________________ 1337*/ 1338static OSErr ReadBitmapBlock( 1339 ExtendedVCB *vcb, 1340 u_int32_t bit, 1341 u_int32_t **buffer, 1342 uintptr_t *blockRef) 1343{ 1344 OSErr err; 1345 struct buf *bp = NULL; 1346 struct vnode *vp = NULL; 1347 daddr64_t block; 1348 u_int32_t blockSize; 1349 1350 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) 1351 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0); 1352 1353 /* 1354 * volume bitmap blocks are protected by the allocation file lock 1355 */ 1356 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); 1357 1358 blockSize = (u_int32_t)vcb->vcbVBMIOSize; 1359 block = (daddr64_t)(bit / (blockSize * kBitsPerByte)); 1360 1361 if (vcb->vcbSigWord == kHFSPlusSigWord) { 1362 vp = vcb->hfs_allocation_vp; /* use allocation file vnode */ 1363 1364 } else /* hfs */ { 1365 vp = VCBTOHFS(vcb)->hfs_devvp; /* use device I/O vnode */ 1366 block += vcb->vcbVBMSt; /* map to physical block */ 1367 } 1368 1369 err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp); 1370 1371 if (bp) { 1372 if (err) { 1373 buf_brelse(bp); 1374 *blockRef = 0; 1375 *buffer = NULL; 1376 } else { 1377 *blockRef = (uintptr_t)bp; 1378 *buffer = (u_int32_t *)buf_dataptr(bp); 1379 } 1380 } 1381 1382 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) 1383 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0); 1384 1385 return err; 1386} 1387 1388 1389/* 1390;_______________________________________________________________________ 1391; 1392; Routine: ReleaseBitmapBlock 1393; 1394; Function: Relase a bitmap block. 1395; 1396; Inputs: 1397; vcb 1398; blockRef 1399; dirty 1400;_______________________________________________________________________ 1401*/ 1402static OSErr ReleaseBitmapBlock( 1403 ExtendedVCB *vcb, 1404 uintptr_t blockRef, 1405 Boolean dirty) 1406{ 1407 struct buf *bp = (struct buf *)blockRef; 1408 1409 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) 1410 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0); 1411 1412 if (blockRef == 0) { 1413 if (dirty) 1414 panic("hfs: ReleaseBitmapBlock: missing bp"); 1415 return (0); 1416 } 1417 1418 if (bp) { 1419 if (dirty) { 1420 // XXXdbg 1421 struct hfsmount *hfsmp = VCBTOHFS(vcb); 1422 1423 if (hfsmp->jnl) { 1424 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL); 1425 } else { 1426 buf_bdwrite(bp); 1427 } 1428 } else { 1429 buf_brelse(bp); 1430 } 1431 } 1432 1433 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) 1434 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0); 1435 1436 return (0); 1437} 1438 1439/* 1440 * ReleaseScanBitmapBlock is used to release struct bufs that were 1441 * created for use by bitmap scanning code. We want to force 1442 * them to be purged out of the buffer cache ASAP, so we'll release them differently 1443 * than in the ReleaseBitmapBlock case. Alternately, we know that we're only reading 1444 * the blocks, so we will never dirty them as part of the tree building scan. 1445 */ 1446 1447static OSErr ReleaseScanBitmapBlock(struct buf *bp ) { 1448 1449 if (bp == NULL) { 1450 return (0); 1451 } 1452 1453 if (bp) { 1454 /* Mark the buffer invalid if it isn't locked, then release it */ 1455 if ((buf_flags(bp) & B_LOCKED) == 0) { 1456 buf_markinvalid(bp); 1457 } 1458 buf_brelse(bp); 1459 } 1460 1461 return (0); 1462 1463 1464} 1465 1466/* 1467_______________________________________________________________________ 1468 1469Routine: BlockAllocateContig 1470 1471Function: Allocate a contiguous group of allocation blocks. The 1472 allocation is all-or-nothing. The caller guarantees that 1473 there are enough free blocks (though they may not be 1474 contiguous, in which case this call will fail). 1475 1476Inputs: 1477 vcb Pointer to volume where space is to be allocated 1478 startingBlock Preferred first block for allocation 1479 minBlocks Minimum number of contiguous blocks to allocate 1480 maxBlocks Maximum number of contiguous blocks to allocate 1481 useMetaZone 1482 1483Outputs: 1484 actualStartBlock First block of range allocated, or 0 if error 1485 actualNumBlocks Number of blocks allocated, or 0 if error 1486_______________________________________________________________________ 1487*/ 1488static OSErr BlockAllocateContig( 1489 ExtendedVCB *vcb, 1490 u_int32_t startingBlock, 1491 u_int32_t minBlocks, 1492 u_int32_t maxBlocks, 1493 Boolean useMetaZone, 1494 u_int32_t *actualStartBlock, 1495 u_int32_t *actualNumBlocks) 1496{ 1497 1498#if CONFIG_HFS_ALLOC_RBTREE 1499 if (hfs_isrbtree_active(VCBTOHFS(vcb))) { 1500 return BlockAllocateContigRBTree(vcb, startingBlock, minBlocks, maxBlocks, useMetaZone, 1501 actualStartBlock, actualNumBlocks, 1); 1502 } 1503#endif 1504 return BlockAllocateContigBitmap(vcb, startingBlock, minBlocks, 1505 maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks); 1506} 1507 1508/* 1509 * Variant of BlockAllocateContig that uses the original bitmap-searching logic 1510 */ 1511 1512static OSErr BlockAllocateContigBitmap( 1513 ExtendedVCB *vcb, 1514 u_int32_t startingBlock, 1515 u_int32_t minBlocks, 1516 u_int32_t maxBlocks, 1517 Boolean useMetaZone, 1518 u_int32_t *actualStartBlock, 1519 u_int32_t *actualNumBlocks) 1520{ 1521 OSErr err; 1522 1523 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) 1524 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0); 1525 1526 // 1527 // Find a contiguous group of blocks at least minBlocks long. 1528 // Determine the number of contiguous blocks available (up 1529 // to maxBlocks). 1530 // 1531 1532 /* 1533 * NOTE: If the only contiguous free extent of at least minBlocks 1534 * crosses startingBlock (i.e. starts before, ends after), then we 1535 * won't find it. Earlier versions *did* find this case by letting 1536 * the second search look past startingBlock by minBlocks. But 1537 * with the free extent cache, this can lead to duplicate entries 1538 * in the cache, causing the same blocks to be allocated twice. 1539 */ 1540 err = BlockFindContiguous(vcb, startingBlock, vcb->allocLimit, minBlocks, 1541 maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks); 1542 if (err == dskFulErr && startingBlock != 0) { 1543 /* 1544 * Constrain the endingBlock so we don't bother looking for ranges 1545 * that would overlap those found in the previous call. 1546 */ 1547 err = BlockFindContiguous(vcb, 1, startingBlock, minBlocks, maxBlocks, 1548 useMetaZone, actualStartBlock, actualNumBlocks); 1549 } 1550 // 1551 // Now mark those blocks allocated. 1552 // 1553 if (err == noErr) 1554 err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks); 1555 1556 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) 1557 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); 1558 1559 return err; 1560} 1561 1562#if CONFIG_HFS_ALLOC_RBTREE 1563/* 1564 * Variant of BlockAllocateContig that uses the newer red-black tree library 1565 * in order to manage free space extents. This will search the red-black tree 1566 * and return results in the same fashion as BlockAllocateContigBitmap. 1567 * 1568 * Note that this function is invoked from both the red-black tree variant of BlockAllocateany 1569 * as well as BlockAllocateContig. In order to determine when we should vend contiguous chunks over 1570 * locality-based-searches, we use the forceContig argument to determine who called us. 1571 */ 1572 1573static OSErr BlockAllocateContigRBTree( 1574 ExtendedVCB *vcb, 1575 u_int32_t startingBlock, 1576 u_int32_t minBlocks, 1577 u_int32_t maxBlocks, 1578 Boolean useMetaZone, 1579 u_int32_t *actualStartBlock, 1580 u_int32_t *actualNumBlocks, 1581 u_int32_t forceContig) 1582{ 1583 OSErr err; 1584 struct hfsmount *hfsmp = VCBTOHFS(vcb); 1585 extent_node_t search_sentinel; 1586 extent_node_t *node = NULL; 1587 extent_node_t tempnode; 1588 1589 bzero (&tempnode, sizeof(extent_node_t)); 1590 1591 /* Begin search at the end of the file, via startingBlock */ 1592 memset (&search_sentinel, 0, sizeof(extent_node_t)); 1593 search_sentinel.offset = startingBlock; 1594 1595 *actualStartBlock = 0; 1596 *actualNumBlocks = 0; 1597 1598 /* 1599 * Find the first available extent that satifies the allocation by searching 1600 * from the starting point and moving forward 1601 */ 1602 node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel); 1603 1604 if (node) { 1605 *actualStartBlock = node->offset; 1606 *actualNumBlocks = node->length; 1607 } 1608 1609 /* If we managed to grab at least minBlocks of space, then we're done. */ 1610 1611 if (*actualNumBlocks >= minBlocks) { 1612 if (*actualNumBlocks > maxBlocks) { 1613 *actualNumBlocks = maxBlocks; 1614 } 1615 1616 1617 /* Check to see if blocks are already marked as in-use */ 1618 if (ALLOC_DEBUG) { 1619 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); 1620 if (hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) { 1621 printf("bad node: %p, offset %d, length %d\n", node, node->offset,node->length); 1622 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n", 1623 *actualStartBlock, *actualNumBlocks); 1624 } 1625 } 1626 1627 /* 1628 * BlockMarkAllocatedRBTree is responsible for removing the nodes 1629 * from the red-black tree after the bitmap has been updated on-disk. 1630 */ 1631 err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks); 1632 if (err == noErr) { 1633 1634 if ( ALLOC_DEBUG ) { 1635 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); 1636 if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) { 1637 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n", 1638 *actualStartBlock, *actualNumBlocks); 1639 } 1640 check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC); 1641 } 1642 1643 return err; 1644 } 1645 } 1646 1647 /* 1648 * We may have failed to grow at the end of the file. We'll try to find 1649 * appropriate free extents, searching by size in the normal allocation zone. 1650 * 1651 * However, if we're allocating on behalf of a sparse device that hasn't explicitly 1652 * requested a contiguous chunk, then we try to search by offset, even if it 1653 * means fragmenting the file. We want all available entries starting 1654 * from the front of the disk to avoid creating new bandfiles. As a result, 1655 * we'll start by searching the offset tree rather than the normal length 1656 * tree. Note that this function can be invoked from BlockAllocateAny, in 1657 * which the minimum block size is 1 block, making it easy to succeed. 1658 */ 1659 search_sentinel.offset = hfsmp->hfs_metazone_end; 1660 search_sentinel.length = minBlocks; 1661 1662 if ((vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) && (forceContig == 0)) { 1663 /* just start with the first offset node */ 1664 node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel); 1665 } 1666 else { 1667 /* 1668 * Otherwise, start from the end of the metadata zone or our next allocation pointer, 1669 * and try to find the first chunk of size >= min. 1670 */ 1671 node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel); 1672 1673 if (node == NULL) { 1674 extent_node_t *metaend_node; 1675 /* 1676 * Maybe there's a free extent coalesced with the space still in the metadata 1677 * zone. If there is, find it and allocate from the middle of it, starting at 1678 * the end of the metadata zone. 1679 * 1680 * If search_prev yields a result that is not offset == metazone_end, then that 1681 * means no node existed at that offset. If the previous node's offset + length crosses 1682 * the metazone boundary, then allocate from there. If it is too small to 1683 * cross the metazone boundary, then it is of no importance and we'd have to 1684 * report ENOSPC. 1685 */ 1686 metaend_node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel); 1687 1688 if ((metaend_node) && (metaend_node->offset < hfsmp->hfs_metazone_end)) { 1689 u_int32_t node_end = metaend_node->offset + metaend_node->length; 1690 if (node_end > hfsmp->hfs_metazone_end) { 1691 u_int32_t modified_length = node_end - hfsmp->hfs_metazone_end; 1692 if (modified_length >= minBlocks) { 1693 /* 1694 * Then we can allocate it. Fill in the contents into tempnode, 1695 * and BlockMarkAllocatedRBTree below will take care of the rest. 1696 */ 1697 tempnode.offset = hfsmp->hfs_metazone_end; 1698 tempnode.length = MIN(minBlocks, node_end - tempnode.offset); 1699 node = &tempnode; 1700 } 1701 } 1702 } 1703 } 1704 } 1705 1706 /* If we can't find anything useful, search the metadata zone as a last resort. */ 1707 1708 if ((!node) && useMetaZone) { 1709 search_sentinel.offset = 0; 1710 search_sentinel.length = minBlocks; 1711 node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel); 1712 } 1713 1714 /* If we found something useful, then go ahead and update the bitmap */ 1715 if ((node) && (node->length >= minBlocks)) { 1716 *actualStartBlock = node->offset; 1717 if (node->length >= maxBlocks) { 1718 *actualNumBlocks = maxBlocks; 1719 } 1720 else { 1721 *actualNumBlocks = node->length; 1722 } 1723 1724 err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks); 1725 1726 if (err == noErr) { 1727 if ( ALLOC_DEBUG ) { 1728 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); 1729 if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) { 1730 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n", 1731 *actualStartBlock, *actualNumBlocks); 1732 } 1733 check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC); 1734 } 1735 } 1736 } 1737 else { 1738 int destroy_trees = 0; 1739 /* 1740 * TODO: Add High-water mark check here. If we couldn't find anything useful, 1741 * when do we tear down the tree? Or should the logic be in BlockAllocateContig?? 1742 */ 1743 if (destroy_trees) { 1744 DestroyTrees(VCBTOHFS(vcb)); 1745 /* Reset the Free Ext Cache since we'll be using it now. */ 1746 ResetVCBFreeExtCache(VCBTOHFS(vcb)); 1747 } 1748 1749 if (ALLOC_DEBUG) { 1750 printf("HFS allocator: No space on FS (%s). Node %p Start %d Min %d, Max %d, Tree still alive.\n", 1751 hfsmp->vcbVN, node, startingBlock, minBlocks, maxBlocks); 1752 1753 /* Dump the list ? */ 1754 extent_tree_offset_print(&hfsmp->offset_tree); 1755 1756 printf("HFS allocator: Done printing list on FS (%s). Min %d, Max %d, Tree still alive.\n", 1757 hfsmp->vcbVN, minBlocks, maxBlocks); 1758 1759 1760 1761 } 1762 err = dskFulErr; 1763 } 1764 1765 if (err == noErr) { 1766 if (ALLOC_DEBUG) { 1767 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) 1768 panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN); 1769 } 1770 } 1771 else { 1772 *actualStartBlock = 0; 1773 *actualNumBlocks = 0; 1774 } 1775 1776 return err; 1777 1778} 1779#endif 1780 1781 1782 1783/* 1784_______________________________________________________________________ 1785 1786Routine: BlockAllocateAny 1787 1788Function: Allocate one or more allocation blocks. If there are fewer 1789 free blocks than requested, all free blocks will be 1790 allocated. The caller guarantees that there is at least 1791 one free block. 1792 1793Inputs: 1794 vcb Pointer to volume where space is to be allocated 1795 startingBlock Preferred first block for allocation 1796 endingBlock Last block to check + 1 1797 maxBlocks Maximum number of contiguous blocks to allocate 1798 useMetaZone 1799 1800Outputs: 1801 actualStartBlock First block of range allocated, or 0 if error 1802 actualNumBlocks Number of blocks allocated, or 0 if error 1803_______________________________________________________________________ 1804*/ 1805 1806/* 1807 * BlockAllocateAny acts as a multiplexer between BlockAllocateAnyRBTree 1808 * and BlockAllocateAnyBitmap, which uses the bitmap scanning logic. 1809 */ 1810 1811static OSErr BlockAllocateAny( 1812 ExtendedVCB *vcb, 1813 u_int32_t startingBlock, 1814 register u_int32_t endingBlock, 1815 u_int32_t maxBlocks, 1816 Boolean useMetaZone, 1817 u_int32_t *actualStartBlock, 1818 u_int32_t *actualNumBlocks) 1819{ 1820 1821#if CONFIG_HFS_ALLOC_RBTREE 1822 if (hfs_isrbtree_active(VCBTOHFS(vcb))) { 1823 return BlockAllocateAnyRBTree(vcb, startingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks); 1824 } 1825#endif 1826 return BlockAllocateAnyBitmap(vcb, startingBlock, endingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks); 1827 1828} 1829 1830 1831#if CONFIG_HFS_ALLOC_RBTREE 1832/* 1833 * BlockAllocateAnyRBTree finds one or more allocation blocks by using 1834 * the red-black allocation tree to figure out where the free ranges are. 1835 * This function is typically used as a last resort becuase we were unable to 1836 * find the right ranges. Outputs are the same as BlockAllocateAnyBitmap. 1837 */ 1838static OSErr BlockAllocateAnyRBTree( 1839 ExtendedVCB *vcb, 1840 u_int32_t startingBlock, 1841 u_int32_t maxBlocks, 1842 Boolean useMetaZone, 1843 u_int32_t *actualStartBlock, 1844 u_int32_t *actualNumBlocks) 1845{ 1846 OSErr err; 1847 1848 /* 1849 * BlockAllocateContig 1850 */ 1851 /* If we're using the red-black tree, try searching at the specified offsets. */ 1852 err = BlockAllocateContigRBTree(vcb, startingBlock, 1, maxBlocks, useMetaZone, 1853 actualStartBlock, actualNumBlocks, 0); 1854 return err; 1855 1856} 1857#endif 1858 1859/* 1860 * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out 1861 * where the free allocation blocks are. Inputs and outputs are the same as for 1862 * BlockAllocateAny and BlockAllocateAnyRBTree 1863 */ 1864 1865static OSErr BlockAllocateAnyBitmap( 1866 ExtendedVCB *vcb, 1867 u_int32_t startingBlock, 1868 register u_int32_t endingBlock, 1869 u_int32_t maxBlocks, 1870 Boolean useMetaZone, 1871 u_int32_t *actualStartBlock, 1872 u_int32_t *actualNumBlocks) 1873{ 1874 OSErr err; 1875 register u_int32_t block; // current block number 1876 register u_int32_t currentWord; // Pointer to current word within bitmap block 1877 register u_int32_t bitMask; // Word with given bits already set (ready to OR in) 1878 register u_int32_t wordsLeft; // Number of words left in this bitmap block 1879 u_int32_t *buffer = NULL; 1880 u_int32_t *currCache = NULL; 1881 uintptr_t blockRef; 1882 u_int32_t bitsPerBlock; 1883 u_int32_t wordsPerBlock; 1884 Boolean dirty = false; 1885 struct hfsmount *hfsmp = VCBTOHFS(vcb); 1886 1887 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) 1888 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0); 1889 1890 /* 1891 * When we're skipping the metadata zone and the start/end 1892 * range overlaps with the metadata zone then adjust the 1893 * start to be outside of the metadata zone. If the range 1894 * is entirely inside the metadata zone then we can deny the 1895 * request (dskFulErr). 1896 */ 1897 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) { 1898 if (startingBlock <= vcb->hfs_metazone_end) { 1899 if (endingBlock > (vcb->hfs_metazone_end + 2)) 1900 startingBlock = vcb->hfs_metazone_end + 1; 1901 else { 1902 err = dskFulErr; 1903 goto Exit; 1904 } 1905 } 1906 } 1907 1908 // Since this routine doesn't wrap around 1909 if (maxBlocks > (endingBlock - startingBlock)) { 1910 maxBlocks = endingBlock - startingBlock; 1911 } 1912 1913 // 1914 // Pre-read the first bitmap block 1915 // 1916 err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef); 1917 if (err != noErr) goto Exit; 1918 buffer = currCache; 1919 1920 // 1921 // Set up the current position within the block 1922 // 1923 { 1924 u_int32_t wordIndexInBlock; 1925 1926 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte; 1927 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord; 1928 1929 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord; 1930 buffer += wordIndexInBlock; 1931 wordsLeft = wordsPerBlock - wordIndexInBlock; 1932 currentWord = SWAP_BE32 (*buffer); 1933 bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask); 1934 } 1935 1936 // 1937 // Find the first unallocated block 1938 // 1939 block=startingBlock; 1940 while (block < endingBlock) { 1941 if ((currentWord & bitMask) == 0) 1942 break; 1943 1944 // Next bit 1945 ++block; 1946 bitMask >>= 1; 1947 if (bitMask == 0) { 1948 // Next word 1949 bitMask = kHighBitInWordMask; 1950 ++buffer; 1951 1952 if (--wordsLeft == 0) { 1953 // Next block 1954 buffer = currCache = NULL; 1955 err = ReleaseBitmapBlock(vcb, blockRef, false); 1956 if (err != noErr) goto Exit; 1957 1958 /* 1959 * Skip over metadata blocks. 1960 */ 1961 if (!useMetaZone) { 1962 block = NextBitmapBlock(vcb, block); 1963 } 1964 if (block >= endingBlock) { 1965 err = dskFulErr; 1966 goto Exit; 1967 } 1968 1969 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef); 1970 if (err != noErr) goto Exit; 1971 buffer = currCache; 1972 1973 wordsLeft = wordsPerBlock; 1974 } 1975 currentWord = SWAP_BE32 (*buffer); 1976 } 1977 } 1978 1979 // Did we get to the end of the bitmap before finding a free block? 1980 // If so, then couldn't allocate anything. 1981 if (block >= endingBlock) { 1982 err = dskFulErr; 1983 goto Exit; 1984 } 1985 1986 // Return the first block in the allocated range 1987 *actualStartBlock = block; 1988 dirty = true; 1989 1990 // If we could get the desired number of blocks before hitting endingBlock, 1991 // then adjust endingBlock so we won't keep looking. Ideally, the comparison 1992 // would be (block + maxBlocks) < endingBlock, but that could overflow. The 1993 // comparison below yields identical results, but without overflow. 1994 if (block < (endingBlock-maxBlocks)) { 1995 endingBlock = block + maxBlocks; // if we get this far, we've found enough 1996 } 1997 1998 // XXXdbg 1999 if (hfsmp->jnl) { 2000 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); 2001 } 2002 2003 // 2004 // Allocate all of the consecutive blocks 2005 // 2006 while ((currentWord & bitMask) == 0) { 2007 // Allocate this block 2008 currentWord |= bitMask; 2009 2010 // Move to the next block. If no more, then exit. 2011 ++block; 2012 if (block == endingBlock) 2013 break; 2014 2015 // Next bit 2016 bitMask >>= 1; 2017 if (bitMask == 0) { 2018 *buffer = SWAP_BE32 (currentWord); // update value in bitmap 2019 2020 // Next word 2021 bitMask = kHighBitInWordMask; 2022 ++buffer; 2023 2024 if (--wordsLeft == 0) { 2025 // Next block 2026 buffer = currCache = NULL; 2027 err = ReleaseBitmapBlock(vcb, blockRef, true); 2028 if (err != noErr) goto Exit; 2029 2030 /* 2031 * Skip over metadata blocks. 2032 */ 2033 if (!useMetaZone) { 2034 u_int32_t nextBlock; 2035 2036 nextBlock = NextBitmapBlock(vcb, block); 2037 if (nextBlock != block) { 2038 goto Exit; /* allocation gap, so stop */ 2039 } 2040 } 2041 2042 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef); 2043 if (err != noErr) goto Exit; 2044 buffer = currCache; 2045 2046 // XXXdbg 2047 if (hfsmp->jnl) { 2048 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); 2049 } 2050 2051 wordsLeft = wordsPerBlock; 2052 } 2053 2054 currentWord = SWAP_BE32 (*buffer); 2055 } 2056 } 2057 *buffer = SWAP_BE32 (currentWord); // update the last change 2058 2059Exit: 2060 if (err == noErr) { 2061 *actualNumBlocks = block - *actualStartBlock; 2062 2063 // sanity check 2064 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) { 2065 panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN); 2066 } 2067 2068 /* 2069 * Beware! 2070 * Because this function directly manipulates the bitmap to mark the 2071 * blocks it came across as allocated, we must inform the journal (and 2072 * subsequently, the journal's trim list) that we are allocating these 2073 * blocks, just like in BlockMarkAllocatedInternal. hfs_unmap_alloc_extent 2074 * and the functions it calls will serialize behind the journal trim list lock 2075 * to ensure that either the asynchronous flush/TRIM/UNMAP happens prior to 2076 * us manipulating the trim list, or we get there first and successfully remove 2077 * these bitmap blocks before the TRIM happens. 2078 */ 2079 hfs_unmap_alloc_extent (vcb, *actualStartBlock, *actualNumBlocks); 2080 } 2081 else { 2082 *actualStartBlock = 0; 2083 *actualNumBlocks = 0; 2084 } 2085 2086 if (currCache) 2087 (void) ReleaseBitmapBlock(vcb, blockRef, dirty); 2088 2089 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) 2090 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); 2091 2092 return err; 2093} 2094 2095 2096/* 2097_______________________________________________________________________ 2098 2099Routine: BlockAllocateKnown 2100 2101Function: Try to allocate space from known free space in the free 2102 extent cache. 2103 2104Inputs: 2105 vcb Pointer to volume where space is to be allocated 2106 maxBlocks Maximum number of contiguous blocks to allocate 2107 2108Outputs: 2109 actualStartBlock First block of range allocated, or 0 if error 2110 actualNumBlocks Number of blocks allocated, or 0 if error 2111 2112Returns: 2113 dskFulErr Free extent cache is empty 2114_______________________________________________________________________ 2115*/ 2116 2117static OSErr BlockAllocateKnown( 2118 ExtendedVCB *vcb, 2119 u_int32_t maxBlocks, 2120 u_int32_t *actualStartBlock, 2121 u_int32_t *actualNumBlocks) 2122{ 2123 OSErr err; 2124 u_int32_t foundBlocks; 2125 2126 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) 2127 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0); 2128 2129 HFS_MOUNT_LOCK(vcb, TRUE); 2130 lck_spin_lock(&vcb->vcbFreeExtLock); 2131 if ((hfs_isrbtree_active(vcb) == true) || 2132 vcb->vcbFreeExtCnt == 0 || 2133 vcb->vcbFreeExt[0].blockCount == 0) { 2134 lck_spin_unlock(&vcb->vcbFreeExtLock); 2135 HFS_MOUNT_UNLOCK(vcb, TRUE); 2136 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) 2137 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0); 2138 return dskFulErr; 2139 } 2140 lck_spin_unlock(&vcb->vcbFreeExtLock); 2141 HFS_MOUNT_UNLOCK(vcb, TRUE); 2142 2143 lck_spin_lock(&vcb->vcbFreeExtLock); 2144 2145 // Just grab up to maxBlocks of the first (largest) free exent. 2146 *actualStartBlock = vcb->vcbFreeExt[0].startBlock; 2147 foundBlocks = vcb->vcbFreeExt[0].blockCount; 2148 if (foundBlocks > maxBlocks) 2149 foundBlocks = maxBlocks; 2150 *actualNumBlocks = foundBlocks; 2151 2152 lck_spin_unlock(&vcb->vcbFreeExtLock); 2153 2154 remove_free_extent_cache(vcb, *actualStartBlock, *actualNumBlocks); 2155 2156 // sanity check 2157 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) 2158 { 2159 printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN); 2160 hfs_mark_volume_inconsistent(vcb); 2161 *actualStartBlock = 0; 2162 *actualNumBlocks = 0; 2163 err = EIO; 2164 } 2165 else 2166 { 2167 // 2168 // Now mark the found extent in the bitmap 2169 // 2170 err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks); 2171 } 2172 2173 sanity_check_free_ext(vcb, 0); 2174 2175 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) 2176 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); 2177 2178 return err; 2179} 2180 2181/* 2182 * BlockMarkAllocated 2183 * 2184 * This is a wrapper function around the internal calls which will actually mark the blocks 2185 * as in-use. It will mark the blocks in the red-black tree if appropriate. We need to do 2186 * this logic here to avoid callers having to deal with whether or not the red-black tree 2187 * is enabled. 2188 */ 2189 2190 2191OSErr BlockMarkAllocated( 2192 ExtendedVCB *vcb, 2193 u_int32_t startingBlock, 2194 register u_int32_t numBlocks) 2195{ 2196 struct hfsmount *hfsmp; 2197 2198 hfsmp = VCBTOHFS(vcb); 2199#if CONFIG_HFS_ALLOC_RBTREE 2200 if (hfs_isrbtree_active(hfsmp)) { 2201 int err; 2202 2203 if ((startingBlock >= hfsmp->offset_block_end) && 2204 (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) { 2205 /* 2206 * We're manipulating a portion of the bitmap that is not controlled by the 2207 * red-black tree. Just update the bitmap and don't bother manipulating the tree 2208 */ 2209 goto justbitmap; 2210 } 2211 2212 err = BlockMarkAllocatedRBTree(vcb, startingBlock, numBlocks); 2213 if (err == noErr) { 2214 if ( ALLOC_DEBUG ) { 2215 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false); 2216 if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) { 2217 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n", 2218 startingBlock, numBlocks); 2219 } 2220 check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_ALLOC); 2221 } 2222 } 2223 return err; 2224 2225 } 2226justbitmap: 2227#endif 2228 2229 return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks); 2230 2231} 2232 2233 2234 2235/* 2236_______________________________________________________________________ 2237 2238Routine: BlockMarkAllocatedInternal 2239 2240Function: Mark a contiguous group of blocks as allocated (set in the 2241 bitmap). It assumes those bits are currently marked 2242 deallocated (clear in the bitmap). Note that this function 2243 must be called regardless of whether or not the bitmap or 2244 tree-based allocator is used, as all allocations must correctly 2245 be marked on-disk. If the tree-based approach is running, then 2246 this will be done before the node is removed from the tree. 2247 2248Inputs: 2249 vcb Pointer to volume where space is to be allocated 2250 startingBlock First block number to mark as allocated 2251 numBlocks Number of blocks to mark as allocated 2252_______________________________________________________________________ 2253*/ 2254static 2255OSErr BlockMarkAllocatedInternal ( 2256 ExtendedVCB *vcb, 2257 u_int32_t startingBlock, 2258 register u_int32_t numBlocks) 2259{ 2260 OSErr err; 2261 register u_int32_t *currentWord; // Pointer to current word within bitmap block 2262 register u_int32_t wordsLeft; // Number of words left in this bitmap block 2263 register u_int32_t bitMask; // Word with given bits already set (ready to OR in) 2264 u_int32_t firstBit; // Bit index within word of first bit to allocate 2265 u_int32_t numBits; // Number of bits in word to allocate 2266 u_int32_t *buffer = NULL; 2267 uintptr_t blockRef; 2268 u_int32_t bitsPerBlock; 2269 u_int32_t wordsPerBlock; 2270 // XXXdbg 2271 struct hfsmount *hfsmp = VCBTOHFS(vcb); 2272 2273 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) 2274 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0); 2275 2276 hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks); 2277 2278 // 2279 // Pre-read the bitmap block containing the first word of allocation 2280 // 2281 2282 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); 2283 if (err != noErr) goto Exit; 2284 // 2285 // Initialize currentWord, and wordsLeft. 2286 // 2287 { 2288 u_int32_t wordIndexInBlock; 2289 2290 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte; 2291 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord; 2292 2293 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord; 2294 currentWord = buffer + wordIndexInBlock; 2295 wordsLeft = wordsPerBlock - wordIndexInBlock; 2296 } 2297 2298 // XXXdbg 2299 if (hfsmp->jnl) { 2300 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); 2301 } 2302 2303 // 2304 // If the first block to allocate doesn't start on a word 2305 // boundary in the bitmap, then treat that first word 2306 // specially. 2307 // 2308 2309 firstBit = startingBlock % kBitsPerWord; 2310 if (firstBit != 0) { 2311 bitMask = kAllBitsSetInWord >> firstBit; // turn off all bits before firstBit 2312 numBits = kBitsPerWord - firstBit; // number of remaining bits in this word 2313 if (numBits > numBlocks) { 2314 numBits = numBlocks; // entire allocation is inside this one word 2315 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last 2316 } 2317#if DEBUG_BUILD 2318 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { 2319 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!"); 2320 } 2321#endif 2322 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap 2323 numBlocks -= numBits; // adjust number of blocks left to allocate 2324 2325 ++currentWord; // move to next word 2326 --wordsLeft; // one less word left in this block 2327 } 2328 2329 // 2330 // Allocate whole words (32 blocks) at a time. 2331 // 2332 2333 bitMask = kAllBitsSetInWord; // put this in a register for 68K 2334 while (numBlocks >= kBitsPerWord) { 2335 if (wordsLeft == 0) { 2336 // Read in the next bitmap block 2337 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block 2338 2339 buffer = NULL; 2340 err = ReleaseBitmapBlock(vcb, blockRef, true); 2341 if (err != noErr) goto Exit; 2342 2343 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); 2344 if (err != noErr) goto Exit; 2345 2346 // XXXdbg 2347 if (hfsmp->jnl) { 2348 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); 2349 } 2350 2351 // Readjust currentWord and wordsLeft 2352 currentWord = buffer; 2353 wordsLeft = wordsPerBlock; 2354 } 2355#if DEBUG_BUILD 2356 if (*currentWord != 0) { 2357 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!"); 2358 } 2359#endif 2360 *currentWord = SWAP_BE32 (bitMask); 2361 numBlocks -= kBitsPerWord; 2362 2363 ++currentWord; // move to next word 2364 --wordsLeft; // one less word left in this block 2365 } 2366 2367 // 2368 // Allocate any remaining blocks. 2369 // 2370 2371 if (numBlocks != 0) { 2372 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits 2373 if (wordsLeft == 0) { 2374 // Read in the next bitmap block 2375 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block 2376 2377 buffer = NULL; 2378 err = ReleaseBitmapBlock(vcb, blockRef, true); 2379 if (err != noErr) goto Exit; 2380 2381 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); 2382 if (err != noErr) goto Exit; 2383 2384 // XXXdbg 2385 if (hfsmp->jnl) { 2386 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); 2387 } 2388 2389 // Readjust currentWord and wordsLeft 2390 currentWord = buffer; 2391 wordsLeft = wordsPerBlock; 2392 } 2393#if DEBUG_BUILD 2394 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { 2395 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!"); 2396 } 2397#endif 2398 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap 2399 2400 // No need to update currentWord or wordsLeft 2401 } 2402 2403Exit: 2404 2405 if (buffer) 2406 (void)ReleaseBitmapBlock(vcb, blockRef, true); 2407 2408 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) 2409 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0); 2410 2411 return err; 2412} 2413 2414#if CONFIG_HFS_ALLOC_RBTREE 2415/* 2416 * This is a wrapper function around BlockMarkAllocated. This function is 2417 * called when the RB Tree-based allocator needs to mark a block as in-use. 2418 * This function should take the locks that would not normally be 2419 * necessary for the normal bitmap allocator, and then call the function. Once 2420 * the on-disk data structures are updated properly, then this will remove the 2421 * appropriate node from the tree. 2422 */ 2423 2424static OSErr BlockMarkAllocatedRBTree( 2425 ExtendedVCB *vcb, 2426 u_int32_t startingBlock, 2427 u_int32_t numBlocks) 2428{ 2429 OSErr err; 2430 struct hfsmount *hfsmp = VCBTOHFS(vcb); 2431 int rb_err = 0; 2432 2433 2434 if (ALLOC_DEBUG) { 2435 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); 2436 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) { 2437 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n", 2438 startingBlock, numBlocks); 2439 } 2440 check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE); 2441 } 2442 2443 err = BlockMarkAllocatedInternal (vcb, startingBlock, numBlocks); 2444 2445 if (err == noErr) { 2446 2447 if (ALLOC_DEBUG) { 2448 if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) { 2449 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet!\n", 2450 startingBlock, numBlocks); 2451 } 2452 } 2453 2454 /* 2455 * Mark the blocks in the offset tree. 2456 */ 2457 rb_err = extent_tree_offset_alloc_space(&hfsmp->offset_tree, numBlocks, startingBlock); 2458 if (rb_err) { 2459 if (ALLOC_DEBUG) { 2460 printf("HFS RBTree Allocator: Could not mark blocks as in-use! %d \n", rb_err); 2461 } 2462 2463 /* 2464 * We may be called from the BlockMarkAllocated interface, in which case, they would 2465 * not be picking extents from their start. Do a check here, find if the specified 2466 * extent is free, and if it is, then find the containing node. 2467 */ 2468 extent_node_t *node = NULL; 2469 extent_node_t search_sentinel; 2470 search_sentinel.offset = startingBlock; 2471 2472 node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel); 2473 2474 if (node) { 2475 rb_err = extent_tree_offset_alloc_unaligned (&hfsmp->offset_tree, numBlocks, startingBlock); 2476 } 2477 2478 if (ALLOC_DEBUG) { 2479 if (rb_err) { 2480 printf ("HFS RBTree Allocator: Still Couldn't mark blocks as in-use! %d\n", rb_err); 2481 } 2482 } 2483 } 2484 if (ALLOC_DEBUG) { 2485 check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC); 2486 } 2487 } 2488 2489 /* 2490 * If we encountered a red-black tree error, for now, we immediately back off and force 2491 * destruction of rb-tree. Set the persistent error-detected bit in the mount point. 2492 * That will ensure that even if we reach a low-water-mark in the future we will still 2493 * not allow the rb-tree to be used. On next mount, we will force a re-construction from 2494 * on-disk state. As a fallback, we will now resort to the bitmap-scanning behavior. 2495 */ 2496 if (rb_err) { 2497 /* Mark RB-Trees with error */ 2498 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED; 2499 DestroyTrees(hfsmp); 2500 /* Reset the Free Ext Cache since we'll be using it now. */ 2501 ResetVCBFreeExtCache(hfsmp); 2502 printf("HFS: Red-Black Allocator Tree BlockMarkAllocated error\n"); 2503 } 2504 2505 return err; 2506} 2507#endif 2508 2509 2510 2511/* 2512 * BlockMarkFree 2513 * 2514 * This is a wrapper function around the internal calls which will actually mark the blocks 2515 * as freed. It will mark the blocks in the red-black tree if appropriate. We need to do 2516 * this logic here to avoid callers having to deal with whether or not the red-black tree 2517 * is enabled. 2518 * 2519 */ 2520OSErr BlockMarkFree( 2521 ExtendedVCB *vcb, 2522 u_int32_t startingBlock, 2523 register u_int32_t numBlocks) 2524{ 2525 struct hfsmount *hfsmp; 2526 hfsmp = VCBTOHFS(vcb); 2527#if CONFIG_HFS_ALLOC_RBTREE 2528 if (hfs_isrbtree_active(hfsmp)) { 2529 int err; 2530 2531 if ((startingBlock >= hfsmp->offset_block_end) && 2532 (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) { 2533 /* 2534 * We're manipulating a portion of the bitmap that is not controlled by the 2535 * red-black tree. Just update the bitmap and don't bother manipulating the tree 2536 */ 2537 goto justbitmap; 2538 } 2539 2540 err = BlockMarkFreeRBTree(vcb, startingBlock, numBlocks); 2541 if (err == noErr) { 2542 if ( ALLOC_DEBUG ) { 2543 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false); 2544 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) { 2545 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use!\n", 2546 startingBlock, numBlocks); 2547 } 2548 check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_FREE); 2549 } 2550 } 2551 return err; 2552 } 2553justbitmap: 2554#endif 2555 return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true); 2556 2557} 2558 2559 2560/* 2561 * BlockMarkFreeUnused 2562 * 2563 * Scan the bitmap block beyond end of current file system for bits 2564 * that are marked as used. If any of the bits are marked as used, 2565 * this function marks them free. 2566 * 2567 * Note: This was specifically written to mark all bits beyond 2568 * end of current file system during hfs_extendfs(), which makes 2569 * sure that all the new blocks added to the file system are 2570 * marked as free. We expect that all the blocks beyond end of 2571 * current file system are always marked as free, but there might 2572 * be cases where are marked as used. This function assumes that 2573 * the number of blocks marked as used incorrectly are relatively 2574 * small, otherwise this can overflow journal transaction size 2575 * on certain file system configurations (example, large unused 2576 * bitmap with relatively small journal). 2577 * 2578 * Input: 2579 * startingBlock: First block of the range to mark unused 2580 * numBlocks: Number of blocks in the range to mark unused 2581 * 2582 * Returns: zero on success, non-zero on error. 2583 */ 2584OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t numBlocks) 2585{ 2586 int error = 0; 2587 struct hfsmount *hfsmp = VCBTOHFS(vcb); 2588 u_int32_t curNumBlocks; 2589 u_int32_t bitsPerBlock; 2590 u_int32_t lastBit; 2591 2592 /* Use the optimal bitmap I/O size instead of bitmap block size */ 2593 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte; 2594 2595 /* 2596 * First clear any non bitmap allocation block aligned bits 2597 * 2598 * Calculate the first bit in the bitmap block next to 2599 * the bitmap block containing the bit for startingBlock. 2600 * Using this value, we calculate the total number of 2601 * bits to be marked unused from startingBlock to the 2602 * end of bitmap block containing startingBlock. 2603 */ 2604 lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock; 2605 curNumBlocks = lastBit - startingBlock; 2606 if (curNumBlocks > numBlocks) { 2607 curNumBlocks = numBlocks; 2608 } 2609 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false); 2610 if (error) { 2611 return error; 2612 } 2613 startingBlock += curNumBlocks; 2614 numBlocks -= curNumBlocks; 2615 2616 /* 2617 * Check a full bitmap block for any 'used' bit. If any bit is used, 2618 * mark all the bits only in that bitmap block as free. This ensures 2619 * that we do not write unmodified bitmap blocks and do not 2620 * overwhelm the journal. 2621 * 2622 * The code starts by checking full bitmap block at a time, and 2623 * marks entire bitmap block as free only if any bit in that bitmap 2624 * block is marked as used. In the end, it handles the last bitmap 2625 * block which might be partially full by only checking till the 2626 * caller-specified last bit and if any bit is set, only mark that 2627 * range as free. 2628 */ 2629 while (numBlocks) { 2630 if (numBlocks >= bitsPerBlock) { 2631 curNumBlocks = bitsPerBlock; 2632 } else { 2633 curNumBlocks = numBlocks; 2634 } 2635 if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) { 2636 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false); 2637 if (error) { 2638 return error; 2639 } 2640 } 2641 startingBlock += curNumBlocks; 2642 numBlocks -= curNumBlocks; 2643 } 2644 2645 return error; 2646} 2647 2648/* 2649_______________________________________________________________________ 2650 2651Routine: BlockMarkFreeInternal 2652 2653Function: Mark a contiguous group of blocks as free (clear in the 2654 bitmap). It assumes those bits are currently marked 2655 allocated (set in the bitmap). 2656 2657Inputs: 2658 vcb Pointer to volume where space is to be freed 2659 startingBlock First block number to mark as freed 2660 numBlocks Number of blocks to mark as freed 2661 do_validate If true, validate that the blocks being 2662 deallocated to check if they are within totalBlocks 2663 for current volume and whether they were allocated 2664 before they are marked free. 2665_______________________________________________________________________ 2666*/ 2667static 2668OSErr BlockMarkFreeInternal( 2669 ExtendedVCB *vcb, 2670 u_int32_t startingBlock_in, 2671 register u_int32_t numBlocks_in, 2672 Boolean do_validate) 2673{ 2674 OSErr err; 2675 u_int32_t startingBlock = startingBlock_in; 2676 u_int32_t numBlocks = numBlocks_in; 2677 uint32_t unmapStart = startingBlock_in; 2678 uint32_t unmapCount = numBlocks_in; 2679 uint32_t wordIndexInBlock; 2680 u_int32_t *currentWord; // Pointer to current word within bitmap block 2681 u_int32_t wordsLeft; // Number of words left in this bitmap block 2682 u_int32_t bitMask; // Word with given bits already set (ready to OR in) 2683 u_int32_t currentBit; // Bit index within word of current bit to allocate 2684 u_int32_t numBits; // Number of bits in word to allocate 2685 u_int32_t *buffer = NULL; 2686 uintptr_t blockRef; 2687 u_int32_t bitsPerBlock; 2688 u_int32_t wordsPerBlock; 2689 // XXXdbg 2690 struct hfsmount *hfsmp = VCBTOHFS(vcb); 2691 2692 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) 2693 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0); 2694 2695 /* 2696 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we 2697 * need to be able to free blocks being relocated during hfs_truncatefs. 2698 */ 2699 if ((do_validate == true) && 2700 (startingBlock + numBlocks > vcb->totalBlocks)) { 2701 if (ALLOC_DEBUG) { 2702 panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN); 2703 } 2704 2705 printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN); 2706 hfs_mark_volume_inconsistent(vcb); 2707 err = EIO; 2708 goto Exit; 2709 } 2710 2711 // 2712 // Pre-read the bitmap block containing the first word of allocation 2713 // 2714 2715 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); 2716 if (err != noErr) goto Exit; 2717 // XXXdbg 2718 if (hfsmp->jnl) { 2719 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); 2720 } 2721 2722 // 2723 // Figure out how many bits and words per bitmap block. 2724 // 2725 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte; 2726 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord; 2727 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord; 2728 2729 // 2730 // Look for a range of free blocks immediately before startingBlock 2731 // (up to the start of the current bitmap block). Set unmapStart to 2732 // the first free block. 2733 // 2734 currentWord = buffer + wordIndexInBlock; 2735 currentBit = startingBlock % kBitsPerWord; 2736 bitMask = kHighBitInWordMask >> currentBit; 2737 while (true) { 2738 // Move currentWord/bitMask back by one bit 2739 bitMask <<= 1; 2740 if (bitMask == 0) { 2741 if (--currentWord < buffer) 2742 break; 2743 bitMask = kLowBitInWordMask; 2744 } 2745 2746 if (*currentWord & SWAP_BE32(bitMask)) 2747 break; // Found an allocated block. Stop searching. 2748 --unmapStart; 2749 ++unmapCount; 2750 } 2751 2752 // 2753 // If the first block to free doesn't start on a word 2754 // boundary in the bitmap, then treat that first word 2755 // specially. 2756 // 2757 2758 currentWord = buffer + wordIndexInBlock; 2759 wordsLeft = wordsPerBlock - wordIndexInBlock; 2760 currentBit = startingBlock % kBitsPerWord; 2761 if (currentBit != 0) { 2762 bitMask = kAllBitsSetInWord >> currentBit; // turn off all bits before currentBit 2763 numBits = kBitsPerWord - currentBit; // number of remaining bits in this word 2764 if (numBits > numBlocks) { 2765 numBits = numBlocks; // entire allocation is inside this one word 2766 bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits)); // turn off bits after last 2767 } 2768 if ((do_validate == true) && 2769 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) { 2770 goto Corruption; 2771 } 2772 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap 2773 numBlocks -= numBits; // adjust number of blocks left to free 2774 2775 ++currentWord; // move to next word 2776 --wordsLeft; // one less word left in this block 2777 } 2778 2779 // 2780 // Free whole words (32 blocks) at a time. 2781 // 2782 2783 while (numBlocks >= kBitsPerWord) { 2784 if (wordsLeft == 0) { 2785 // Read in the next bitmap block 2786 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block 2787 2788 buffer = NULL; 2789 err = ReleaseBitmapBlock(vcb, blockRef, true); 2790 if (err != noErr) goto Exit; 2791 2792 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); 2793 if (err != noErr) goto Exit; 2794 2795 // XXXdbg 2796 if (hfsmp->jnl) { 2797 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); 2798 } 2799 2800 // Readjust currentWord and wordsLeft 2801 currentWord = buffer; 2802 wordsLeft = wordsPerBlock; 2803 } 2804 if ((do_validate == true) && 2805 (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) { 2806 goto Corruption; 2807 } 2808 *currentWord = 0; // clear the entire word 2809 numBlocks -= kBitsPerWord; 2810 2811 ++currentWord; // move to next word 2812 --wordsLeft; // one less word left in this block 2813 } 2814 2815 // 2816 // Free any remaining blocks. 2817 // 2818 2819 if (numBlocks != 0) { 2820 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits 2821 if (wordsLeft == 0) { 2822 // Read in the next bitmap block 2823 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block 2824 2825 buffer = NULL; 2826 err = ReleaseBitmapBlock(vcb, blockRef, true); 2827 if (err != noErr) goto Exit; 2828 2829 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); 2830 if (err != noErr) goto Exit; 2831 2832 // XXXdbg 2833 if (hfsmp->jnl) { 2834 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); 2835 } 2836 2837 // Readjust currentWord and wordsLeft 2838 currentWord = buffer; 2839 wordsLeft = wordsPerBlock; 2840 } 2841 if ((do_validate == true) && 2842 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) { 2843 goto Corruption; 2844 } 2845 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap 2846 2847 // No need to update currentWord or wordsLeft 2848 } 2849 2850 // 2851 // Look for a range of free blocks immediately after the range we just freed 2852 // (up to the end of the current bitmap block). 2853 // 2854 wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord; 2855 wordsLeft = wordsPerBlock - wordIndexInBlock; 2856 currentWord = buffer + wordIndexInBlock; 2857 currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord; 2858 bitMask = kHighBitInWordMask >> currentBit; 2859 while (true) { 2860 // Move currentWord/bitMask/wordsLeft forward one bit 2861 bitMask >>= 1; 2862 if (bitMask == 0) { 2863 if (--wordsLeft == 0) 2864 break; 2865 ++currentWord; 2866 bitMask = kHighBitInWordMask; 2867 } 2868 2869 if (*currentWord & SWAP_BE32(bitMask)) 2870 break; // Found an allocated block. Stop searching. 2871 ++unmapCount; 2872 } 2873 2874Exit: 2875 2876 if (buffer) 2877 (void)ReleaseBitmapBlock(vcb, blockRef, true); 2878 2879 if (err == noErr) { 2880 hfs_unmap_free_extent(vcb, unmapStart, unmapCount); 2881 } 2882 2883 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) 2884 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0); 2885 2886 return err; 2887 2888Corruption: 2889#if DEBUG_BUILD 2890 panic("hfs: BlockMarkFreeInternal: blocks not allocated!"); 2891#else 2892 printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s\n", vcb->vcbVN); 2893 hfs_mark_volume_inconsistent(vcb); 2894 err = EIO; 2895 goto Exit; 2896#endif 2897} 2898 2899 2900#if CONFIG_HFS_ALLOC_RBTREE 2901/* 2902 * This is a wrapper function around BlockMarkFree. This function is 2903 * called when the RB Tree-based allocator needs to mark a block as no longer 2904 * in use. This function should take the locks that would not normally be 2905 * necessary for the normal bitmap deallocator, and then call the function. Once 2906 * the on-disk data structures are updated properly, then this will update an 2907 * existing rb-tree node if possible, or else create a new one. 2908 */ 2909 2910OSErr BlockMarkFreeRBTree( 2911 ExtendedVCB *vcb, 2912 u_int32_t startingBlock, 2913 register u_int32_t numBlocks) 2914{ 2915 OSErr err; 2916 struct hfsmount *hfsmp = VCBTOHFS(vcb); 2917 int rb_err = 0; 2918 2919 if (ALLOC_DEBUG) { 2920 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); 2921 if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) { 2922 panic ("HFS RBTree Allocator: Trying to free blocks starting @ %x for %x but blocks not in use! \n", 2923 startingBlock, numBlocks); 2924 } 2925 check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC); 2926 } 2927 2928 err = BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true); 2929 2930 if (err == noErr) { 2931 2932 /* 2933 * During a filesystem truncation, we may need to relocate files out of the 2934 * portion of the bitmap that is no longer controlled by the r/b tree. 2935 * In this case, just update the bitmap and do not attempt to manipulate the tree. 2936 */ 2937 if ((startingBlock >= hfsmp->offset_block_end) && 2938 (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) { 2939 goto free_error; 2940 } 2941 2942 extent_node_t *newnode; 2943 2944 if (ALLOC_DEBUG) { 2945 /* 2946 * Validate that the blocks in question are not allocated in the bitmap, and that they're 2947 * not in the offset tree, since it should be tracking free extents, rather than allocated 2948 * extents 2949 */ 2950 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) { 2951 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks still marked in-use!\n", 2952 startingBlock, numBlocks); 2953 } 2954 } 2955 2956 if ((hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) == 0) { 2957 if (startingBlock >= hfsmp->offset_block_end) { 2958 /* 2959 * If the tree generation code has not yet finished scanning the 2960 * bitmap region containing this extent, do nothing. If the start 2961 * of the range to be deallocated is greater than the current high 2962 * watermark on the offset tree, just bail out and let the scanner catch up with us. 2963 */ 2964 rb_err = 0; 2965 goto free_error; 2966 } 2967 } 2968 2969 newnode = extent_tree_free_space(&hfsmp->offset_tree, numBlocks, startingBlock); 2970 if (newnode == NULL) { 2971 rb_err = 1; 2972 goto free_error; 2973 } 2974 2975 if (ALLOC_DEBUG) { 2976 check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE); 2977 } 2978 2979 } 2980 2981free_error: 2982 /* 2983 * We follow the same principle as in BlockMarkAllocatedRB. 2984 * If we encounter an error in adding the extents to the rb-tree, then immediately 2985 * back off, destroy the trees, and persistently set a bit in the runtime hfsmp flags 2986 * to indicate we should not use the rb-tree until next mount, when we can force a rebuild. 2987 */ 2988 if (rb_err) { 2989 /* Mark RB-Trees with error */ 2990 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED; 2991 DestroyTrees(hfsmp); 2992 /* Reset the Free Ext Cache since we'll be using it now. */ 2993 ResetVCBFreeExtCache(hfsmp); 2994 printf("HFS: Red-Black Allocator Tree BlockMarkFree error\n"); 2995 } 2996 2997 2998 return err; 2999 3000} 3001#endif 3002 3003/* 3004_______________________________________________________________________ 3005 3006Routine: BlockFindContiguous 3007 3008Function: Find a contiguous range of blocks that are free (bits 3009 clear in the bitmap). If a contiguous range of the 3010 minimum size can't be found, an error will be returned. 3011 This is only needed to support the bitmap-scanning logic, 3012 as the red-black tree should be able to do this by internally 3013 searching its tree. 3014 3015Inputs: 3016 vcb Pointer to volume where space is to be allocated 3017 startingBlock Preferred first block of range 3018 endingBlock Last possible block in range + 1 3019 minBlocks Minimum number of blocks needed. Must be > 0. 3020 maxBlocks Maximum (ideal) number of blocks desired 3021 useMetaZone OK to dip into metadata allocation zone 3022 3023Outputs: 3024 actualStartBlock First block of range found, or 0 if error 3025 actualNumBlocks Number of blocks found, or 0 if error 3026 3027Returns: 3028 noErr Found at least minBlocks contiguous 3029 dskFulErr No contiguous space found, or all less than minBlocks 3030_______________________________________________________________________ 3031*/ 3032 3033static OSErr BlockFindContiguous( 3034 ExtendedVCB *vcb, 3035 u_int32_t startingBlock, 3036 u_int32_t endingBlock, 3037 u_int32_t minBlocks, 3038 u_int32_t maxBlocks, 3039 Boolean useMetaZone, 3040 u_int32_t *actualStartBlock, 3041 u_int32_t *actualNumBlocks) 3042{ 3043 OSErr err; 3044 register u_int32_t currentBlock; // Block we're currently looking at. 3045 u_int32_t firstBlock; // First free block in current extent. 3046 u_int32_t stopBlock; // If we get to this block, stop searching for first free block. 3047 u_int32_t foundBlocks; // Number of contiguous free blocks in current extent. 3048 u_int32_t *buffer = NULL; 3049 register u_int32_t *currentWord; 3050 register u_int32_t bitMask; 3051 register u_int32_t wordsLeft; 3052 register u_int32_t tempWord; 3053 uintptr_t blockRef; 3054 u_int32_t wordsPerBlock; 3055 u_int32_t updated_free_extent = 0; 3056 3057 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) 3058 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0); 3059 3060 /* 3061 * When we're skipping the metadata zone and the start/end 3062 * range overlaps with the metadata zone then adjust the 3063 * start to be outside of the metadata zone. If the range 3064 * is entirely inside the metadata zone then we can deny the 3065 * request (dskFulErr). 3066 */ 3067 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) { 3068 if (startingBlock <= vcb->hfs_metazone_end) { 3069 if (endingBlock > (vcb->hfs_metazone_end + 2)) 3070 startingBlock = vcb->hfs_metazone_end + 1; 3071 else 3072 goto DiskFull; 3073 } 3074 } 3075 3076 if ((endingBlock - startingBlock) < minBlocks) 3077 { 3078 // The set of blocks we're checking is smaller than the minimum number 3079 // of blocks, so we couldn't possibly find a good range. 3080 goto DiskFull; 3081 } 3082 3083 stopBlock = endingBlock - minBlocks + 1; 3084 currentBlock = startingBlock; 3085 firstBlock = 0; 3086 3087 /* 3088 * Skip over metadata blocks. 3089 */ 3090 if (!useMetaZone) 3091 currentBlock = NextBitmapBlock(vcb, currentBlock); 3092 3093 // 3094 // Pre-read the first bitmap block. 3095 // 3096 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef); 3097 if ( err != noErr ) goto ErrorExit; 3098 3099 // 3100 // Figure out where currentBlock is within the buffer. 3101 // 3102 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord; 3103 3104 wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1); // Current index into buffer 3105 currentWord = buffer + wordsLeft; 3106 wordsLeft = wordsPerBlock - wordsLeft; 3107 3108 do 3109 { 3110 foundBlocks = 0; 3111 3112 //============================================================ 3113 // Look for a free block, skipping over allocated blocks. 3114 //============================================================ 3115 3116 // 3117 // Check an initial partial word (if any) 3118 // 3119 bitMask = currentBlock & kBitsWithinWordMask; 3120 if (bitMask) 3121 { 3122 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once 3123 bitMask = kHighBitInWordMask >> bitMask; 3124 while (tempWord & bitMask) 3125 { 3126 bitMask >>= 1; 3127 ++currentBlock; 3128 } 3129 3130 // Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)? 3131 if (bitMask) 3132 goto FoundUnused; 3133 3134 // Didn't find any unused bits, so we're done with this word. 3135 ++currentWord; 3136 --wordsLeft; 3137 } 3138 3139 // 3140 // Check whole words 3141 // 3142 while (currentBlock < stopBlock) 3143 { 3144 // See if it's time to read another block. 3145 if (wordsLeft == 0) 3146 { 3147 buffer = NULL; 3148 err = ReleaseBitmapBlock(vcb, blockRef, false); 3149 if (err != noErr) goto ErrorExit; 3150 3151 /* 3152 * Skip over metadata blocks. 3153 */ 3154 if (!useMetaZone) { 3155 currentBlock = NextBitmapBlock(vcb, currentBlock); 3156 if (currentBlock >= stopBlock) { 3157 goto LoopExit; 3158 } 3159 } 3160 3161 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef); 3162 if ( err != noErr ) goto ErrorExit; 3163 3164 currentWord = buffer; 3165 wordsLeft = wordsPerBlock; 3166 } 3167 3168 // See if any of the bits are clear 3169 if ((tempWord = SWAP_BE32(*currentWord)) + 1) // non-zero if any bits were clear 3170 { 3171 // Figure out which bit is clear 3172 bitMask = kHighBitInWordMask; 3173 while (tempWord & bitMask) 3174 { 3175 bitMask >>= 1; 3176 ++currentBlock; 3177 } 3178 3179 break; // Found the free bit; break out to FoundUnused. 3180 } 3181 3182 // Keep looking at the next word 3183 currentBlock += kBitsPerWord; 3184 ++currentWord; 3185 --wordsLeft; 3186 } 3187 3188FoundUnused: 3189 // Make sure the unused bit is early enough to use 3190 if (currentBlock >= stopBlock) 3191 { 3192 break; 3193 } 3194 3195 // Remember the start of the extent 3196 firstBlock = currentBlock; 3197 3198 //============================================================ 3199 // Count the number of contiguous free blocks. 3200 //============================================================ 3201 3202 // 3203 // Check an initial partial word (if any) 3204 // 3205 bitMask = currentBlock & kBitsWithinWordMask; 3206 if (bitMask) 3207 { 3208 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once 3209 bitMask = kHighBitInWordMask >> bitMask; 3210 while (bitMask && !(tempWord & bitMask)) 3211 { 3212 bitMask >>= 1; 3213 ++currentBlock; 3214 } 3215 3216 // Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)? 3217 if (bitMask) 3218 goto FoundUsed; 3219 3220 // Didn't find any used bits, so we're done with this word. 3221 ++currentWord; 3222 --wordsLeft; 3223 } 3224 3225 // 3226 // Check whole words 3227 // 3228 while (currentBlock < endingBlock) 3229 { 3230 // See if it's time to read another block. 3231 if (wordsLeft == 0) 3232 { 3233 buffer = NULL; 3234 err = ReleaseBitmapBlock(vcb, blockRef, false); 3235 if (err != noErr) goto ErrorExit; 3236 3237 /* 3238 * Skip over metadata blocks. 3239 */ 3240 if (!useMetaZone) { 3241 u_int32_t nextBlock; 3242 3243 nextBlock = NextBitmapBlock(vcb, currentBlock); 3244 if (nextBlock != currentBlock) { 3245 goto LoopExit; /* allocation gap, so stop */ 3246 } 3247 } 3248 3249 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef); 3250 if ( err != noErr ) goto ErrorExit; 3251 3252 currentWord = buffer; 3253 wordsLeft = wordsPerBlock; 3254 } 3255 3256 // See if any of the bits are set 3257 if ((tempWord = SWAP_BE32(*currentWord)) != 0) 3258 { 3259 // Figure out which bit is set 3260 bitMask = kHighBitInWordMask; 3261 while (!(tempWord & bitMask)) 3262 { 3263 bitMask >>= 1; 3264 ++currentBlock; 3265 } 3266 3267 break; // Found the used bit; break out to FoundUsed. 3268 } 3269 3270 // Keep looking at the next word 3271 currentBlock += kBitsPerWord; 3272 ++currentWord; 3273 --wordsLeft; 3274 3275 // If we found at least maxBlocks, we can quit early. 3276 if ((currentBlock - firstBlock) >= maxBlocks) 3277 break; 3278 } 3279 3280FoundUsed: 3281 // Make sure we didn't run out of bitmap looking for a used block. 3282 // If so, pin to the end of the bitmap. 3283 if (currentBlock > endingBlock) 3284 currentBlock = endingBlock; 3285 3286 // Figure out how many contiguous free blocks there were. 3287 // Pin the answer to maxBlocks. 3288 foundBlocks = currentBlock - firstBlock; 3289 if (foundBlocks > maxBlocks) 3290 foundBlocks = maxBlocks; 3291 if (foundBlocks >= minBlocks) 3292 break; // Found what we needed! 3293 3294 /* We did not find the total blocks were were looking for, but 3295 * lets add this free block run to our free extent cache list 3296 */ 3297 updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks); 3298 3299 } while (currentBlock < stopBlock); 3300LoopExit: 3301 3302 // Return the outputs. 3303 if (foundBlocks < minBlocks) 3304 { 3305DiskFull: 3306 err = dskFulErr; 3307ErrorExit: 3308 *actualStartBlock = 0; 3309 *actualNumBlocks = 0; 3310 } 3311 else 3312 { 3313 err = noErr; 3314 *actualStartBlock = firstBlock; 3315 *actualNumBlocks = foundBlocks; 3316 /* 3317 * Sanity check for overflow 3318 */ 3319 if ((firstBlock + foundBlocks) > vcb->allocLimit) { 3320 panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x", 3321 vcb->vcbVN, startingBlock, endingBlock, currentBlock, 3322 firstBlock, stopBlock, minBlocks, foundBlocks); 3323 } 3324 } 3325 3326 if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) { 3327 int i; 3328 u_int32_t min_start = vcb->totalBlocks; 3329 3330 // set the nextAllocation pointer to the smallest free block number 3331 // we've seen so on the next mount we won't rescan unnecessarily 3332 lck_spin_lock(&vcb->vcbFreeExtLock); 3333 for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) { 3334 if (vcb->vcbFreeExt[i].startBlock < min_start) { 3335 min_start = vcb->vcbFreeExt[i].startBlock; 3336 } 3337 } 3338 lck_spin_unlock(&vcb->vcbFreeExtLock); 3339 if (min_start != vcb->totalBlocks) { 3340 if (min_start < vcb->nextAllocation) { 3341 vcb->nextAllocation = min_start; 3342 } 3343 if (min_start < vcb->sparseAllocation) { 3344 vcb->sparseAllocation = min_start; 3345 } 3346 } 3347 } 3348 3349 if (buffer) 3350 (void) ReleaseBitmapBlock(vcb, blockRef, false); 3351 3352 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) 3353 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); 3354 3355 return err; 3356} 3357 3358 3359#if CONFIG_HFS_ALLOC_RBTREE 3360/* 3361 * Wrapper function around hfs_isrbtree_allocated. This just takes the start offset, 3362 * and the number of blocks, and whether or not we should check if the blocks are 3363 * free or not. This function is designed to be used primarily with the debug #ifdef 3364 * enabled, so it results in a panic if anything unexpected occurs. 3365 * 3366 * shouldBeFree will be nonzero if the caller expects the zone to be free. 3367 */ 3368 3369void check_rbtree_extents (struct hfsmount *hfsmp, u_int32_t startBlocks, 3370 u_int32_t numBlocks, int shouldBeFree) { 3371 int alloc; 3372 extent_node_t *node1 = NULL; 3373 u_int32_t off1 = 0; 3374 u_int32_t len1 = 0; 3375 alloc = hfs_isrbtree_allocated (hfsmp, startBlocks, numBlocks, &node1); 3376 3377 if (node1) { 3378 off1 = node1->offset; 3379 len1 = node1->length; 3380 } 3381 3382 if (shouldBeFree) { 3383 /* 3384 * If the region should be free, then we expect to see extents in the tree 3385 * matching this start and length. Alloc != 0 means some portion of the extent 3386 * specified was allocated. 3387 */ 3388 if (alloc != 0){ 3389 panic ("HFS check_rbtree_extents: Node (%p) do not exist! " 3390 "node1 off (%d),len(%d),, start(%d) end(%d)\n", 3391 node1, off1, len1, startBlocks, numBlocks); 3392 } 3393 } 3394 else { 3395 /* 3396 * Otherwise, this means that the region should be allocated, and if we find 3397 * an extent matching it, that's bad. 3398 */ 3399 if (alloc == 0){ 3400 panic ("HFS check_rbtree_extents: Node (%p) exists! " 3401 "node1 off (%d),len(%d), start(%d) end(%d)\n", 3402 node1, off1, len1, startBlocks, numBlocks); 3403 } 3404 } 3405} 3406#endif 3407 3408#if CONFIG_HFS_ALLOC_RBTREE 3409/* 3410 * Exhaustive validation search. This function iterates over all allocation blocks and 3411 * compares their status in the red-black tree vs. the allocation bitmap. If the two are out of sync 3412 * then it will panic. Bitmap lock must be held while this function is run. 3413 * 3414 * Because this function requires a red-black tree search to validate every allocation block, it is 3415 * very expensive and should ONLY be run in debug mode, and even then, infrequently. 3416 * 3417 * 'end' is non-inclusive, so it should represent the total number of blocks in the volume. 3418 * 3419 */ 3420void 3421hfs_validate_rbtree (struct hfsmount *hfsmp, u_int32_t start, u_int32_t end){ 3422 3423 u_int32_t current; 3424 extent_node_t* node1; 3425 3426 hfs_checktreelinks (hfsmp); 3427 3428 for (current = start; current < end; current++) { 3429 node1 = NULL; 3430 int rbtree = hfs_isrbtree_allocated(hfsmp, current, 1, &node1); 3431 int bitmap = hfs_isallocated(hfsmp, current, 1); 3432 3433 if (bitmap != rbtree){ 3434 panic("HFS: Allocator mismatch @ block %d -- bitmap %d : rbtree %d\n", 3435 current, bitmap, rbtree); 3436 } 3437 } 3438} 3439 3440/* 3441 * Exhaustive Red-Black Tree Linked List verification routine. 3442 * 3443 * This function iterates through the red-black tree's nodes, and then verifies that the linked list 3444 * embedded within each of the nodes accurately points to the correct node as its "next" pointer. 3445 * The bitmap lock must be held while this function is run. 3446 */ 3447 3448void 3449hfs_checktreelinks (struct hfsmount *hfsmp) { 3450 extent_tree_offset_t *tree = &hfsmp->offset_tree; 3451 3452 extent_node_t *current = NULL; 3453 extent_node_t *next = NULL; 3454 extent_node_t *treenext; 3455 3456 current = extent_tree_off_first (tree); 3457 3458 while (current) { 3459 next = current->offset_next; 3460 treenext = extent_tree_off_next (tree, current); 3461 if (next != treenext) { 3462 panic("hfs_checktreelinks: mismatch for node (%p), next: %p , treenext %p !\n", current, next, treenext); 3463 } 3464 current = treenext; 3465 } 3466} 3467 3468#endif 3469 3470 3471#if CONFIG_HFS_ALLOC_RBTREE 3472/* 3473 * Test to see if any free blocks exist at a given offset. 3474 * If there exists a node at the specified offset, it will return the appropriate 3475 * node. 3476 * 3477 * NULL indicates allocated blocks exist at that offset. 3478 * 3479 * Allocation file lock must be held. 3480 * 3481 * Returns: 3482 * 1 if blocks in the range are allocated. 3483 * 0 if all blocks in the range are free. 3484 */ 3485 3486static int 3487hfs_isrbtree_allocated (struct hfsmount *hfsmp, u_int32_t startBlock, 3488 u_int32_t numBlocks, extent_node_t **ret_node) { 3489 3490 extent_node_t search_sentinel; 3491 extent_node_t *node = NULL; 3492 extent_node_t *nextnode = NULL; 3493 3494 /* 3495 * With only one tree, then we just have to validate that there are entries 3496 * in the R/B tree at the specified offset if it really is free. 3497 */ 3498 search_sentinel.offset = startBlock; 3499 search_sentinel.length = numBlocks; 3500 3501 node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel); 3502 if (node) { 3503 3504 *ret_node = node; 3505 nextnode = extent_tree_off_next (&hfsmp->offset_tree, node); 3506 if (nextnode != node->offset_next) { 3507 panic ("hfs_rbtree_isallocated: Next pointers out of sync!\n"); 3508 } 3509 3510 /* 3511 * Check to see if it is a superset of our target range. Because we started 3512 * with the offset or some offset prior to it, then we know the node's offset is 3513 * at least <= startBlock. So, if the end of the node is greater than the end of 3514 * our target range, then the whole range is free. 3515 */ 3516 3517 if ((node->offset + node->length) >= (startBlock + numBlocks)) { 3518 if (node->offset > startBlock) { 3519 panic ("hfs_rbtree_isallocated: bad node ordering!"); 3520 } 3521 return 0; 3522 } 3523 } 3524 /* 3525 * We got here if either our node search resulted in a node whose extent 3526 * was strictly before our target offset, or we couldnt' find a previous node 3527 * at all (the beginning of the volume). If the former, then we can infer that 3528 * at least one block in the target range is allocated since the next node's offset 3529 * must be greater than startBlock. 3530 * 3531 * Either way, this means that the target node is unavailable to allocate, so 3532 * just return 1; 3533 */ 3534 return 1; 3535} 3536 3537 3538#endif 3539 3540/* 3541 * Count number of bits set in the given 32-bit unsigned number 3542 * 3543 * Returns: 3544 * Number of bits set 3545 */ 3546static int num_bits_set(u_int32_t num) 3547{ 3548 int count; 3549 3550 for (count = 0; num; count++) { 3551 num &= num - 1; 3552 } 3553 3554 return count; 3555} 3556 3557/* 3558 * For a given range of blocks, find the total number of blocks 3559 * allocated. If 'stop_on_first' is true, it stops as soon as it 3560 * encounters the first allocated block. This option is useful 3561 * to determine if any block is allocated or not. 3562 * 3563 * Inputs: 3564 * startingBlock First allocation block number of the range to be scanned. 3565 * numBlocks Total number of blocks that need to be scanned. 3566 * stop_on_first Stop the search after the first allocated block is found. 3567 * 3568 * Output: 3569 * allocCount Total number of allocation blocks allocated in the given range. 3570 * 3571 * On error, it is the number of allocated blocks found 3572 * before the function got an error. 3573 * 3574 * If 'stop_on_first' is set, 3575 * allocCount = 1 if any allocated block was found. 3576 * allocCount = 0 if no allocated block was found. 3577 * 3578 * Returns: 3579 * 0 on success, non-zero on failure. 3580 */ 3581static int 3582hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, 3583 u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount) 3584{ 3585 u_int32_t *currentWord; // Pointer to current word within bitmap block 3586 u_int32_t wordsLeft; // Number of words left in this bitmap block 3587 u_int32_t bitMask; // Word with given bits already set (ready to test) 3588 u_int32_t firstBit; // Bit index within word of first bit to allocate 3589 u_int32_t numBits; // Number of bits in word to allocate 3590 u_int32_t *buffer = NULL; 3591 uintptr_t blockRef; 3592 u_int32_t bitsPerBlock; 3593 u_int32_t wordsPerBlock; 3594 u_int32_t blockCount = 0; 3595 int error; 3596 3597 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) 3598 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0); 3599 3600 /* 3601 * Pre-read the bitmap block containing the first word of allocation 3602 */ 3603 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); 3604 if (error) 3605 goto JustReturn; 3606 3607 /* 3608 * Initialize currentWord, and wordsLeft. 3609 */ 3610 { 3611 u_int32_t wordIndexInBlock; 3612 3613 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte; 3614 wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord; 3615 3616 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord; 3617 currentWord = buffer + wordIndexInBlock; 3618 wordsLeft = wordsPerBlock - wordIndexInBlock; 3619 } 3620 3621 /* 3622 * First test any non word aligned bits. 3623 */ 3624 firstBit = startingBlock % kBitsPerWord; 3625 if (firstBit != 0) { 3626 bitMask = kAllBitsSetInWord >> firstBit; 3627 numBits = kBitsPerWord - firstBit; 3628 if (numBits > numBlocks) { 3629 numBits = numBlocks; 3630 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); 3631 } 3632 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { 3633 if (stop_on_first) { 3634 blockCount = 1; 3635 goto Exit; 3636 } 3637 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask)); 3638 } 3639 numBlocks -= numBits; 3640 ++currentWord; 3641 --wordsLeft; 3642 } 3643 3644 /* 3645 * Test whole words (32 blocks) at a time. 3646 */ 3647 while (numBlocks >= kBitsPerWord) { 3648 if (wordsLeft == 0) { 3649 /* Read in the next bitmap block. */ 3650 startingBlock += bitsPerBlock; 3651 3652 buffer = NULL; 3653 error = ReleaseBitmapBlock(hfsmp, blockRef, false); 3654 if (error) goto Exit; 3655 3656 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); 3657 if (error) goto Exit; 3658 3659 /* Readjust currentWord and wordsLeft. */ 3660 currentWord = buffer; 3661 wordsLeft = wordsPerBlock; 3662 } 3663 if (*currentWord != 0) { 3664 if (stop_on_first) { 3665 blockCount = 1; 3666 goto Exit; 3667 } 3668 blockCount += num_bits_set(*currentWord); 3669 } 3670 numBlocks -= kBitsPerWord; 3671 ++currentWord; 3672 --wordsLeft; 3673 } 3674 3675 /* 3676 * Test any remaining blocks. 3677 */ 3678 if (numBlocks != 0) { 3679 bitMask = ~(kAllBitsSetInWord >> numBlocks); 3680 if (wordsLeft == 0) { 3681 /* Read in the next bitmap block */ 3682 startingBlock += bitsPerBlock; 3683 3684 buffer = NULL; 3685 error = ReleaseBitmapBlock(hfsmp, blockRef, false); 3686 if (error) goto Exit; 3687 3688 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); 3689 if (error) goto Exit; 3690 3691 currentWord = buffer; 3692 wordsLeft = wordsPerBlock; 3693 } 3694 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { 3695 if (stop_on_first) { 3696 blockCount = 1; 3697 goto Exit; 3698 } 3699 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask)); 3700 } 3701 } 3702Exit: 3703 if (buffer) { 3704 (void)ReleaseBitmapBlock(hfsmp, blockRef, false); 3705 } 3706 if (allocCount) { 3707 *allocCount = blockCount; 3708 } 3709 3710JustReturn: 3711 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) 3712 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0); 3713 3714 return (error); 3715} 3716 3717/* 3718 * Count total number of blocks that are allocated in the given 3719 * range from the bitmap. This is used to preflight total blocks 3720 * that need to be relocated during volume resize. 3721 * 3722 * The journal or allocation file lock must be held. 3723 * 3724 * Returns: 3725 * 0 on success, non-zero on failure. 3726 * On failure, allocCount is zero. 3727 */ 3728int 3729hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock, 3730 u_int32_t numBlocks, u_int32_t *allocCount) 3731{ 3732 return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount); 3733} 3734 3735/* 3736 * Test to see if any blocks in a range are allocated. 3737 * 3738 * Note: On error, this function returns 1, which means that 3739 * one or more blocks in the range are allocated. This function 3740 * is primarily used for volume resize and we do not want 3741 * to report to the caller that the blocks are free when we 3742 * were not able to deterministically find it out. So on error, 3743 * we always report that the blocks are allocated. 3744 * 3745 * The journal or allocation file lock must be held. 3746 * 3747 * Returns 3748 * 0 if all blocks in the range are free. 3749 * 1 if blocks in the range are allocated, or there was an error. 3750 */ 3751int 3752hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks) 3753{ 3754 int error; 3755 u_int32_t allocCount; 3756 3757 error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount); 3758 if (error) { 3759 /* On error, we always say that the blocks are allocated 3760 * so that volume resize does not return false success. 3761 */ 3762 return 1; 3763 } else { 3764 /* The function was deterministically able to find out 3765 * if there was any block allocated or not. In that case, 3766 * the value in allocCount is good enough to be returned 3767 * back to the caller. 3768 */ 3769 return allocCount; 3770 } 3771} 3772 3773/* 3774 * Check to see if the red-black tree is live. Allocation file lock must be held 3775 * shared or exclusive to call this function. Note that we may call this even if 3776 * HFS is built without activating the red-black tree code. 3777 */ 3778__private_extern__ 3779int 3780hfs_isrbtree_active(struct hfsmount *hfsmp){ 3781 3782 //TODO: Update this function to deal with a truncate/resize coming in when the tree 3783 //isn't fully finished. maybe we need to check the flags for something other than ENABLED? 3784 3785#if CONFIG_HFS_ALLOC_RBTREE 3786 if (ALLOC_DEBUG) { 3787 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false); 3788 } 3789 if (hfsmp){ 3790 3791 if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) { 3792 return 1; 3793 } 3794 } 3795#else 3796 #pragma unused (hfsmp) 3797#endif 3798 /* If the RB Tree code is not enabled, then just always return 0 */ 3799 return 0; 3800} 3801 3802 3803/* 3804 * This function scans the specified bitmap block and acts on it as necessary. 3805 * We may add it to the list of blocks to be UNMAP/TRIM'd or add it to allocator 3806 * data structures. This function is not #if'd to the CONFIG_RB case because 3807 * we want to use it unilaterally at mount time if on a UNMAP-capable device. 3808 * 3809 * Additionally, we may want an allocating thread to invoke this if the tree 3810 * does not have enough extents to satisfy an allocation request. 3811 * 3812 * startbit - the allocation block represented by a bit in 'allocblock' where we need to 3813 * start our scan. For instance, we may need to start the normal allocation scan 3814 * in the middle of an existing allocation block. 3815 * endBit - the allocation block where we should end this search (inclusive). 3816 * bitToScan - output argument for this function to specify the next bit to scan. 3817 * 3818 * Returns: 3819 * 0 on success 3820 * nonzero on failure. 3821 */ 3822 3823static int hfs_alloc_scan_block(struct hfsmount *hfsmp, u_int32_t startbit, 3824 u_int32_t endBit, u_int32_t *bitToScan, 3825 struct jnl_trim_list *list) { 3826 3827 int error; 3828 u_int32_t curAllocBlock; 3829 struct buf *blockRef = NULL; 3830 u_int32_t *buffer = NULL; 3831 u_int32_t wordIndexInBlock; 3832 u_int32_t blockSize = (u_int32_t)hfsmp->vcbVBMIOSize; 3833 u_int32_t wordsPerBlock = blockSize / kBytesPerWord; 3834 u_int32_t offset = 0; 3835 u_int32_t size = 0; 3836 3837 /* 3838 * Read the appropriate block from the bitmap file. ReadBitmapBlock 3839 * figures out which actual on-disk block corresponds to the bit we're 3840 * looking at. 3841 */ 3842 error = ReadBitmapBlock(hfsmp, startbit, &buffer, (uintptr_t*)&blockRef); 3843 if (error) { 3844 return error; 3845 } 3846 3847 /* curAllocBlock represents the logical block we're analyzing. */ 3848 curAllocBlock = startbit; 3849 3850 /* Figure out which word curAllocBlock corresponds to in the block we read */ 3851 wordIndexInBlock = (curAllocBlock / kBitsPerWord) % wordsPerBlock; 3852 3853 /* Scan a word at a time */ 3854 while (wordIndexInBlock < wordsPerBlock) { 3855 u_int32_t currentWord = SWAP_BE32(buffer[wordIndexInBlock]); 3856 u_int32_t curBit; 3857 3858 /* modulate curBit because it may start in the middle of a word */ 3859 for (curBit = curAllocBlock % kBitsPerWord; curBit < kBitsPerWord; curBit++) { 3860 3861 u_int32_t is_allocated = currentWord & (1 << (kBitsWithinWordMask - curBit)); 3862 if (ALLOC_DEBUG) { 3863 u_int32_t res = hfs_isallocated_scan (hfsmp, curAllocBlock, buffer); 3864 if ( ((res) && (!is_allocated)) || ((!res) && (is_allocated))) { 3865 panic("hfs_alloc_scan: curAllocBit %u, curBit (%d), word (0x%x), is_allocated (0x%x) res(0x%x) \n", 3866 curAllocBlock, curBit, currentWord, is_allocated, res); 3867 } 3868 } 3869 /* 3870 * If curBit is not allocated, keep track of the start of the free range. 3871 * Increment a running tally on how many free blocks in a row we've seen. 3872 */ 3873 if (!is_allocated) { 3874 size++; 3875 if (offset == 0) { 3876 offset = curAllocBlock; 3877 } 3878 } 3879 else { 3880 /* 3881 * If we hit an allocated block, insert the extent that tracked the range 3882 * we saw, and reset our tally counter. 3883 */ 3884 if (size != 0) { 3885#if CONFIG_HFS_ALLOC_RBTREE 3886 extent_tree_free_space(&hfsmp->offset_tree, size, offset); 3887#endif 3888 hfs_track_unmap_blocks (hfsmp, offset, size, list); 3889 size = 0; 3890 offset = 0; 3891 } 3892 } 3893 curAllocBlock++; 3894 /* 3895 * Exit early if the next bit we'd analyze would take us beyond the end of the 3896 * range that we're supposed to scan. 3897 */ 3898 if (curAllocBlock >= endBit) { 3899 goto DoneScanning; 3900 } 3901 } 3902 wordIndexInBlock++; 3903 } 3904DoneScanning: 3905 3906 /* We may have been tracking a range of free blocks that hasn't been inserted yet. */ 3907 if (size != 0) { 3908#if CONFIG_HFS_ALLOC_RBTREE 3909 extent_tree_free_space(&hfsmp->offset_tree, size, offset); 3910#endif 3911 hfs_track_unmap_blocks (hfsmp, offset, size, list); 3912 } 3913 /* 3914 * curAllocBlock represents the next block we need to scan while we're in this 3915 * function. 3916 */ 3917 *bitToScan = curAllocBlock; 3918 3919 ReleaseScanBitmapBlock(blockRef); 3920 3921 return 0; 3922} 3923 3924 3925/* 3926 * This function is basically the same as hfs_isallocated, except it's designed for 3927 * use with the red-black tree validation code. It assumes we're only checking whether 3928 * one bit is active, and that we're going to pass in the buf to use, since GenerateTree 3929 * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation. 3930 * 3931 * This should not be called in general purpose scanning code. 3932 */ 3933int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) { 3934 3935 u_int32_t *currentWord; // Pointer to current word within bitmap block 3936 u_int32_t bitMask; // Word with given bits already set (ready to test) 3937 u_int32_t firstBit; // Bit index within word of first bit to allocate 3938 u_int32_t numBits; // Number of bits in word to allocate 3939 u_int32_t bitsPerBlock; 3940 uintptr_t blockRef; 3941 u_int32_t wordsPerBlock; 3942 u_int32_t numBlocks = 1; 3943 u_int32_t *buffer = NULL; 3944 3945 int inuse = 0; 3946 int error; 3947 3948 3949 if (bp_buf) { 3950 /* just use passed-in buffer if avail. */ 3951 buffer = bp_buf; 3952 } 3953 else { 3954 /* 3955 * Pre-read the bitmap block containing the first word of allocation 3956 */ 3957 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); 3958 if (error) 3959 return (error); 3960 } 3961 3962 /* 3963 * Initialize currentWord, and wordsLeft. 3964 */ 3965 u_int32_t wordIndexInBlock; 3966 3967 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte; 3968 wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord; 3969 3970 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord; 3971 currentWord = buffer + wordIndexInBlock; 3972 3973 /* 3974 * First test any non word aligned bits. 3975 */ 3976 firstBit = startingBlock % kBitsPerWord; 3977 bitMask = kAllBitsSetInWord >> firstBit; 3978 numBits = kBitsPerWord - firstBit; 3979 if (numBits > numBlocks) { 3980 numBits = numBlocks; 3981 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); 3982 } 3983 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { 3984 inuse = 1; 3985 goto Exit; 3986 } 3987 numBlocks -= numBits; 3988 ++currentWord; 3989 3990Exit: 3991 if(bp_buf == NULL) { 3992 if (buffer) { 3993 (void)ReleaseBitmapBlock(hfsmp, blockRef, false); 3994 } 3995 } 3996 return (inuse); 3997 3998 3999 4000} 4001 4002#if CONFIG_HFS_ALLOC_RBTREE 4003 4004/* 4005 * Extern function that is called from mount and upgrade mount routines 4006 * that enable us to initialize the tree. 4007 */ 4008 4009__private_extern__ 4010u_int32_t InitTree(struct hfsmount *hfsmp) { 4011 extent_tree_init (&(hfsmp->offset_tree)); 4012 return 0; 4013} 4014 4015 4016/* 4017 * This function builds the trees specified in its arguments. It uses 4018 * buf_meta_breads to scan through the bitmap and re-build the tree state. 4019 * It is very important to use buf_meta_bread because we need to ensure that we 4020 * read the most current version of the blocks that we're scanning. If we used 4021 * cluster_io, then journaled transactions could still be sitting in RAM since they are 4022 * written to disk in the proper location asynchronously. 4023 * 4024 * Because this could be still running when mount has finished, we need to check 4025 * after every allocation block that we're working on if an unmount or some other 4026 * operation that would cause us to teardown has come in. (think downgrade mount). 4027 * If an unmount has come in, then abort whatever we're doing and return -1 4028 * to indicate we hit an error. If we don't do this, we'd hold up unmount for 4029 * a very long time. 4030 * 4031 * This function assumes that the bitmap lock is acquired exclusively before being 4032 * called. It will drop the lock and then re-acquire it during operation, but 4033 * will always return with the lock held. 4034 */ 4035__private_extern__ 4036u_int32_t GenerateTree(struct hfsmount *hfsmp, u_int32_t endBlock, int *flags, int initialscan) { 4037 4038 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false); 4039 4040 u_int32_t *cur_block_eof; 4041 int error = 0; 4042 4043 int USE_FINE_GRAINED_LOCKING = 0; 4044 4045 /* Initialize the block counter while we hold the bitmap lock */ 4046 cur_block_eof = &hfsmp->offset_block_end; 4047 4048 /* 4049 * This loop advances over all allocation bitmap blocks of the current region 4050 * to scan them and add the results into the red-black tree. We use the mount point 4051 * variable offset_block_end as our loop counter. This gives us flexibility 4052 * because we can release the allocation bitmap lock and allow a thread that wants 4053 * to make an allocation to grab the lock and do some scanning on our behalf while we're 4054 * waiting to re-acquire the lock. Then, the allocating thread will only do as much bitmap 4055 * scanning as needed to fulfill its allocation. 4056 * 4057 * If the other thread does IO for us, then it will update the offset_block_end 4058 * variable as well, since it will use the same hfs_alloc_scan_block function to do its bit 4059 * scanning. So when we re-grab the lock, our current EOF/loop will immediately skip us to the next 4060 * block that needs scanning. 4061 */ 4062 4063 while (*cur_block_eof < endBlock) { 4064 4065 /* 4066 * If the filesystem is being resized before the bitmap has been fully scanned, we'll 4067 * update our endBlock to match the current allocation limit in the hfsmp struct. 4068 * The allocLimit field would only be be updated while holding the bitmap lock, so we won't 4069 * be executing this code at the same time that the resize is going on. 4070 */ 4071 if ((initialscan) && (endBlock != hfsmp->allocLimit)) { 4072 4073 /* If we're past the new/modified allocLimit, then just stop immediately.*/ 4074 if (*cur_block_eof >= hfsmp->allocLimit ) { 4075 break; 4076 } 4077 endBlock = hfsmp->allocLimit; 4078 } 4079 4080 /* 4081 * TODO: fix unmount stuff! 4082 * See rdar://7391404 4083 * 4084 * Once the RB allocator is checked in, we'll want to augment it to not hold the 4085 * allocation bitmap lock for the entire duration of the tree scan. For a first check-in 4086 * it's ok to do that but we can't leave it like that forever. 4087 * 4088 * The gist of the new algorithm will work as follows: 4089 * if an unmount is in flight and has been detected: 4090 * abort tree-build. 4091 * unset tree-in-progress bit. 4092 * wakeup unmount thread 4093 * unlock allocation bitmap lock, fail out. 4094 * 4095 * The corresponding code in the unmount side should already be in place. 4096 */ 4097 4098 error = hfs_alloc_scan_block (hfsmp, *cur_block_eof, endBlock, cur_block_eof); 4099 4100 //TODO: Fix this below! 4101 if (USE_FINE_GRAINED_LOCKING){ 4102 hfs_systemfile_unlock(hfsmp, *flags); 4103 *flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); 4104 } 4105 //TODO: Infer that if *flags == 0, we don't actually need to lock/unlock. 4106 } 4107 4108 return error; 4109} 4110 4111/* 4112 * This function destroys the specified rb-trees associated with the mount point. 4113 */ 4114__private_extern__ 4115void DestroyTrees(struct hfsmount *hfsmp) { 4116 4117 if (ALLOC_DEBUG) { 4118 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false); 4119 printf("DestroyTrees: Validating red/black tree for vol %s\n", (char*) hfsmp->vcbVN); 4120 hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end ); 4121 } 4122 4123 /* 4124 * extent_tree_destroy will start with the first entry in the tree (by offset), then 4125 * iterate through the tree quickly using its embedded linked list. This results in tree 4126 * destruction in O(n) time. 4127 */ 4128 4129 if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) { 4130 extent_tree_destroy(&hfsmp->offset_tree); 4131 4132 /* Mark Trees as disabled */ 4133 hfsmp->extent_tree_flags &= ~HFS_ALLOC_RB_ENABLED; 4134 } 4135 4136 return; 4137} 4138 4139#endif 4140 4141/* 4142 * This function resets all of the data structures relevant to the 4143 * free extent cache stored in the hfsmount struct. 4144 * 4145 * If we are using the red-black tree code then we need to account for the fact that 4146 * we may encounter situations where we need to jettison the tree. If that is the 4147 * case, then we fail-over to the bitmap scanning logic, but we need to ensure that 4148 * the free ext cache is zeroed before we start using it. 4149 * 4150 * We also reset and disable the cache when allocLimit is updated... which 4151 * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()). 4152 * It is independent of the type of allocator being used currently. 4153 */ 4154void ResetVCBFreeExtCache(struct hfsmount *hfsmp) 4155{ 4156 int bytes; 4157 void *freeExt; 4158 4159 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) 4160 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0); 4161 4162 lck_spin_lock(&hfsmp->vcbFreeExtLock); 4163 4164 /* reset Free Extent Count */ 4165 hfsmp->vcbFreeExtCnt = 0; 4166 4167 /* reset the actual array */ 4168 bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor); 4169 freeExt = (void*)(hfsmp->vcbFreeExt); 4170 4171 bzero (freeExt, bytes); 4172 4173 lck_spin_unlock(&hfsmp->vcbFreeExtLock); 4174 4175 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) 4176 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0); 4177 4178 return; 4179} 4180 4181/* 4182 * This function is used to inform the allocator if we have to effectively shrink 4183 * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs. 4184 * 4185 * The bitmap lock must be held when calling this function. This function also modifies the 4186 * allocLimit field in the hfs mount point structure in the general case. 4187 * 4188 * In the shrinking case, we'll have to remove all free extents from the red-black 4189 * tree past the specified offset new_end_block. In the growth case, we'll have to force 4190 * a re-scan of the new allocation blocks from our current allocLimit to the new end block. 4191 * 4192 * new_end_block represents the total number of blocks available for allocation in the resized 4193 * filesystem. Block #new_end_block should not be allocatable in the resized filesystem since it 4194 * will be out of the (0, n-1) range that are indexable in the bitmap. 4195 * 4196 * Returns 0 on success 4197 * errno on failure 4198 */ 4199__private_extern__ 4200u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) { 4201 4202 /* 4203 * Update allocLimit to the argument specified, but don't do anything else 4204 * if the red/black tree is not enabled. 4205 */ 4206 hfsmp->allocLimit = new_end_block; 4207 4208 /* Invalidate the free extent cache completely so that 4209 * it does not have any extents beyond end of current 4210 * volume. 4211 */ 4212 ResetVCBFreeExtCache(hfsmp); 4213 4214#if CONFIG_HFS_ALLOC_RBTREE 4215 /* Shrinking the existing filesystem */ 4216 if ((new_end_block < hfsmp->offset_block_end) && 4217 (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) { 4218 extent_node_t search_sentinel; 4219 extent_node_t *node = NULL; 4220 /* Remover points to the current item to free/remove from the tree */ 4221 extent_node_t *remover = NULL; 4222 4223 /* Begin search at the specified offset */ 4224 memset (&search_sentinel, 0, sizeof(extent_node_t)); 4225 search_sentinel.offset = new_end_block; 4226 4227 /* 4228 * Find the first available extent that satifies the allocation by searching 4229 * from the starting point or 1 earlier. We may need to split apart an existing node 4230 * if it straddles the new alloc limit. 4231 */ 4232 node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel); 4233 if (node) { 4234 /* If it's an exact match, then just remove them all from this point forward */ 4235 if (node->offset == new_end_block) { 4236 /* 4237 * Find the previous entry and update its next pointer to NULL 4238 * since this entry is biting the dust. Update remover to node. 4239 */ 4240 extent_node_t *prev = NULL; 4241 prev = extent_tree_off_prev (&hfsmp->offset_tree, node); 4242 if (prev) { 4243 prev->offset_next = NULL; 4244 } 4245 remover = node; 4246 } 4247 else { 4248 /* See if we need to split this node */ 4249 if ((node->offset + node->length) > new_end_block) { 4250 /* 4251 * Update node to reflect its new size up until new_end_block. 4252 */ 4253 remover = node->offset_next; 4254 node->length = new_end_block - node->offset; 4255 /* node is becoming the last free extent in the volume. */ 4256 node->offset_next = NULL; 4257 } 4258 else { 4259 if (node->offset_next == NULL) { 4260 /* 4261 * 'node' points to the last free extent in the volume. 4262 * Coincidentally, it is also before the new cut-off point at which 4263 * we will stop representing bitmap values in the tree. Just bail out now. 4264 */ 4265 return 0; 4266 } 4267 /* 4268 * Otherwise, point our temp variable 'remover' to the node where 4269 * we'll need to start yanking things out of the tree, and make 'node' 4270 * the last element in the tree in the linked list. 4271 */ 4272 remover = node->offset_next; 4273 if (remover->offset <= new_end_block) { 4274 panic ("UpdateAllocLimit: Invalid RBTree node next ptr!"); 4275 } 4276 node->offset_next = NULL; 4277 } 4278 } 4279 4280 /* 4281 * Remover is our "temp" pointer that points to the current node to remove from 4282 * the offset tree. We'll simply iterate through the tree linked list, removing the current 4283 * element from the tree, freeing them as we come across them. 4284 */ 4285 while (remover) { 4286 extent_node_t *next = remover->offset_next; 4287 extent_tree_remove_node (&hfsmp->offset_tree, remover); 4288 free_node (remover); 4289 remover = next; 4290 } 4291 4292 if (ALLOC_DEBUG) { 4293 printf ("UpdateAllocLimit: Validating rbtree after truncation\n"); 4294 hfs_validate_rbtree (hfsmp, 0, new_end_block-1); 4295 } 4296 4297 /* 4298 * Don't forget to shrink offset_block_end after a successful truncation 4299 * new_end_block should represent the number of blocks available on the 4300 * truncated volume. 4301 */ 4302 4303 hfsmp->offset_block_end = new_end_block; 4304 4305 return 0; 4306 } 4307 else { 4308 if (ALLOC_DEBUG) { 4309 panic ("UpdateAllocLimit: no prev!"); 4310 } 4311 return ENOSPC; 4312 } 4313 } 4314 /* Growing the existing filesystem */ 4315 else if ((new_end_block > hfsmp->offset_block_end) && 4316 (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) { 4317 int flags = 0; 4318 int retval = 0; 4319 4320 if (ALLOC_DEBUG) { 4321 printf ("UpdateAllocLimit: Validating rbtree prior to growth\n"); 4322 hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end); 4323 } 4324 4325 4326 retval = GenerateTree (hfsmp, new_end_block, &flags, 0); 4327 4328 /* 4329 * Don't forget to update offset_block_end after a successful tree extension. 4330 */ 4331 if (retval == 0) { 4332 4333 if (ALLOC_DEBUG) { 4334 printf ("UpdateAllocLimit: Validating rbtree after growth\n"); 4335 hfs_validate_rbtree (hfsmp, 0, new_end_block); 4336 } 4337 4338 hfsmp->offset_block_end = new_end_block; 4339 } 4340 4341 return retval; 4342 } 4343 /* Otherwise, do nothing. fall through to the code below. */ 4344 printf ("error : off_block_end: %d, alloclimit: %d, new_end_block: %d\n", 4345 hfsmp->offset_block_end,hfsmp->allocLimit, new_end_block); 4346#endif 4347 4348 return 0; 4349 4350} 4351 4352 4353/* 4354 * Remove an extent from the list of free extents. 4355 * 4356 * This is a low-level routine. It does not handle overlaps or splitting; 4357 * that is the responsibility of the caller. The input extent must exactly 4358 * match an extent already in the list; it will be removed, and any following 4359 * extents in the list will be shifted up. 4360 * 4361 * Inputs: 4362 * startBlock - Start of extent to remove 4363 * blockCount - Number of blocks in extent to remove 4364 * 4365 * Result: 4366 * The index of the extent that was removed. 4367 */ 4368static void remove_free_extent_list(struct hfsmount *hfsmp, int index) 4369{ 4370 if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) { 4371 if (ALLOC_DEBUG) 4372 panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt); 4373 else 4374 printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt); 4375 return; 4376 } 4377 int shift_count = hfsmp->vcbFreeExtCnt - index - 1; 4378 if (shift_count > 0) { 4379 memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0])); 4380 } 4381 hfsmp->vcbFreeExtCnt--; 4382} 4383 4384 4385/* 4386 * Add an extent to the list of free extents. 4387 * 4388 * This is a low-level routine. It does not handle overlaps or coalescing; 4389 * that is the responsibility of the caller. This routine *does* make 4390 * sure that the extent it is adding is inserted in the correct location. 4391 * If the list is full, this routine will handle either removing the last 4392 * extent in the list to make room for the new extent, or ignoring the 4393 * new extent if it is "worse" than the last extent in the list. 4394 * 4395 * Inputs: 4396 * startBlock - Start of extent to add 4397 * blockCount - Number of blocks in extent to add 4398 * 4399 * Result: 4400 * The index where the extent that was inserted, or kMaxFreeExtents 4401 * if the extent was not inserted (the list was full, and the extent 4402 * being added was "worse" than everything in the list). 4403 */ 4404static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount) 4405{ 4406 uint32_t i; 4407 4408 /* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */ 4409 if (ALLOC_DEBUG) { 4410 uint32_t endBlock = startBlock + blockCount; 4411 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) { 4412 if (endBlock < hfsmp->vcbFreeExt[i].startBlock || 4413 startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) { 4414 continue; 4415 } 4416 panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d", 4417 hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i); 4418 } 4419 } 4420 4421 /* Figure out what index the new extent should be inserted at. */ 4422 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) { 4423 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { 4424 /* The list is sorted by increasing offset. */ 4425 if (startBlock < hfsmp->vcbFreeExt[i].startBlock) { 4426 break; 4427 } 4428 } else { 4429 /* The list is sorted by decreasing size. */ 4430 if (blockCount > hfsmp->vcbFreeExt[i].blockCount) { 4431 break; 4432 } 4433 } 4434 } 4435 4436 /* When we get here, i is the index where the extent should be inserted. */ 4437 if (i == kMaxFreeExtents) { 4438 /* 4439 * The new extent is worse than anything already in the list, 4440 * and the list is full, so just ignore the extent to be added. 4441 */ 4442 return i; 4443 } 4444 4445 /* 4446 * Grow the list (if possible) to make room for an insert. 4447 */ 4448 if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents) 4449 hfsmp->vcbFreeExtCnt++; 4450 4451 /* 4452 * If we'll be keeping any extents after the insert position, then shift them. 4453 */ 4454 int shift_count = hfsmp->vcbFreeExtCnt - i - 1; 4455 if (shift_count > 0) { 4456 memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0])); 4457 } 4458 4459 /* Finally, store the new extent at its correct position. */ 4460 hfsmp->vcbFreeExt[i].startBlock = startBlock; 4461 hfsmp->vcbFreeExt[i].blockCount = blockCount; 4462 return i; 4463} 4464 4465 4466/* 4467 * Remove an entry from free extent cache after it has been allocated. 4468 * 4469 * This is a high-level routine. It handles removing a portion of a 4470 * cached extent, potentially splitting it into two (if the cache was 4471 * already full, throwing away the extent that would sort last). It 4472 * also handles removing an extent that overlaps multiple extents in 4473 * the cache. 4474 * 4475 * Inputs: 4476 * hfsmp - mount point structure 4477 * startBlock - starting block of the extent to be removed. 4478 * blockCount - number of blocks of the extent to be removed. 4479 */ 4480static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount) 4481{ 4482 u_int32_t i, insertedIndex; 4483 u_int32_t currentStart, currentEnd, endBlock; 4484 int extentsRemoved = 0; 4485 4486#if CONFIG_HFS_ALLOC_RBTREE 4487 /* If red-black tree is enabled, no free extent cache is necessary */ 4488 if (hfs_isrbtree_active(hfsmp) == true) { 4489 return; 4490 } 4491#endif 4492 4493 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) 4494 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0); 4495 4496 endBlock = startBlock + blockCount; 4497 4498 lck_spin_lock(&hfsmp->vcbFreeExtLock); 4499 4500 /* 4501 * Iterate over all of the extents in the free extent cache, removing or 4502 * updating any entries that overlap with the input extent. 4503 */ 4504 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) { 4505 currentStart = hfsmp->vcbFreeExt[i].startBlock; 4506 currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount; 4507 4508 /* 4509 * If the current extent is entirely before or entirely after the 4510 * the extent to be removed, then we keep it as-is. 4511 */ 4512 if (currentEnd <= startBlock || currentStart >= endBlock) { 4513 continue; 4514 } 4515 4516 /* 4517 * If the extent being removed entirely contains the current extent, 4518 * then remove the current extent. 4519 */ 4520 if (startBlock <= currentStart && endBlock >= currentEnd) { 4521 remove_free_extent_list(hfsmp, i); 4522 4523 /* 4524 * We just removed the extent at index i. The extent at 4525 * index i+1 just got shifted to index i. So decrement i 4526 * to undo the loop's "++i", and the next iteration will 4527 * examine index i again, which contains the next extent 4528 * in the list. 4529 */ 4530 --i; 4531 ++extentsRemoved; 4532 continue; 4533 } 4534 4535 /* 4536 * If the extent being removed is strictly "in the middle" of the 4537 * current extent, then we need to split the current extent into 4538 * two discontiguous extents (the "head" and "tail"). The good 4539 * news is that we don't need to examine any other extents in 4540 * the list. 4541 */ 4542 if (startBlock > currentStart && endBlock < currentEnd) { 4543 remove_free_extent_list(hfsmp, i); 4544 add_free_extent_list(hfsmp, currentStart, startBlock - currentStart); 4545 add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock); 4546 break; 4547 } 4548 4549 /* 4550 * The only remaining possibility is that the extent to be removed 4551 * overlaps the start or end (but not both!) of the current extent. 4552 * So we need to replace the current extent with a shorter one. 4553 * 4554 * The only tricky part is that the updated extent might be at a 4555 * different index than the original extent. If the updated extent 4556 * was inserted after the current extent, then we need to re-examine 4557 * the entry at index i, since it now contains the extent that was 4558 * previously at index i+1. If the updated extent was inserted 4559 * before or at the same index as the removed extent, then the 4560 * following extents haven't changed position. 4561 */ 4562 remove_free_extent_list(hfsmp, i); 4563 if (startBlock > currentStart) { 4564 /* Remove the tail of the current extent. */ 4565 insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart); 4566 } else { 4567 /* Remove the head of the current extent. */ 4568 insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock); 4569 } 4570 if (insertedIndex > i) { 4571 --i; /* Undo the "++i" in the loop, so we examine the entry at index i again. */ 4572 } 4573 } 4574 4575 lck_spin_unlock(&hfsmp->vcbFreeExtLock); 4576 4577 sanity_check_free_ext(hfsmp, 0); 4578 4579 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) 4580 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0); 4581 4582 return; 4583} 4584 4585 4586/* 4587 * Add an entry to free extent cache after it has been deallocated. 4588 * 4589 * This is a high-level routine. It will merge overlapping or contiguous 4590 * extents into a single, larger extent. 4591 * 4592 * If the extent provided has blocks beyond current allocLimit, it is 4593 * clipped to allocLimit (so that we won't accidentally find and allocate 4594 * space beyond allocLimit). 4595 * 4596 * Inputs: 4597 * hfsmp - mount point structure 4598 * startBlock - starting block of the extent to be removed. 4599 * blockCount - number of blocks of the extent to be removed. 4600 * 4601 * Returns: 4602 * true - if the extent was added successfully to the list 4603 * false - if the extent was not added to the list, maybe because 4604 * the extent was beyond allocLimit, or is not best 4605 * candidate to be put in the cache. 4606 */ 4607static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount) 4608{ 4609 Boolean retval = false; 4610 uint32_t endBlock; 4611 uint32_t currentEnd; 4612 uint32_t i; 4613 4614 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) 4615 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0); 4616 4617 /* 4618 * If using the red-black tree allocator, then there's no need to special case 4619 * for the sparse device case. We'll simply add the region we've recently freed 4620 * to the red-black tree, where it will get sorted by offset and length. The only special 4621 * casing will need to be done on the allocation side, where we may favor free extents 4622 * based on offset even if it will cause fragmentation. This may be true, for example, if 4623 * we are trying to reduce the number of bandfiles created in a sparse bundle disk image. 4624 */ 4625#if CONFIG_HFS_ALLOC_RBTREE 4626 if (hfs_isrbtree_active(hfsmp) == true) { 4627 goto out_not_locked; 4628 } 4629#endif 4630 4631 /* No need to add extent that is beyond current allocLimit */ 4632 if (startBlock >= hfsmp->allocLimit) { 4633 goto out_not_locked; 4634 } 4635 4636 /* If end of the free extent is beyond current allocLimit, clip the extent */ 4637 if ((startBlock + blockCount) > hfsmp->allocLimit) { 4638 blockCount = hfsmp->allocLimit - startBlock; 4639 } 4640 4641 lck_spin_lock(&hfsmp->vcbFreeExtLock); 4642 4643 /* 4644 * Make a pass through the free extent cache, looking for known extents that 4645 * overlap or are contiguous with the extent to be added. We'll remove those 4646 * extents from the cache, and incorporate them into the new extent to be added. 4647 */ 4648 endBlock = startBlock + blockCount; 4649 for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) { 4650 currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount; 4651 if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) { 4652 /* Extent i does not overlap and is not contiguous, so keep it. */ 4653 continue; 4654 } else { 4655 /* We need to remove extent i and combine it with the input extent. */ 4656 if (hfsmp->vcbFreeExt[i].startBlock < startBlock) 4657 startBlock = hfsmp->vcbFreeExt[i].startBlock; 4658 if (currentEnd > endBlock) 4659 endBlock = currentEnd; 4660 4661 remove_free_extent_list(hfsmp, i); 4662 /* 4663 * We just removed the extent at index i. The extent at 4664 * index i+1 just got shifted to index i. So decrement i 4665 * to undo the loop's "++i", and the next iteration will 4666 * examine index i again, which contains the next extent 4667 * in the list. 4668 */ 4669 --i; 4670 } 4671 } 4672 add_free_extent_list(hfsmp, startBlock, endBlock - startBlock); 4673 4674 lck_spin_unlock(&hfsmp->vcbFreeExtLock); 4675 4676out_not_locked: 4677 sanity_check_free_ext(hfsmp, 0); 4678 4679 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) 4680 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0); 4681 4682 return retval; 4683} 4684 4685/* Debug function to check if the free extent cache is good or not */ 4686static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated) 4687{ 4688 u_int32_t i, j; 4689 4690 /* Do not do anything if debug is not on, or if we're using the red-black tree */ 4691 if ((ALLOC_DEBUG == 0) || (hfs_isrbtree_active(hfsmp) == true)) { 4692 return; 4693 } 4694 4695 lck_spin_lock(&hfsmp->vcbFreeExtLock); 4696 4697 if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents) 4698 panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt); 4699 4700 /* 4701 * Iterate the Free extent cache and ensure no entries are bogus or refer to 4702 * allocated blocks. 4703 */ 4704 for(i=0; i < hfsmp->vcbFreeExtCnt; i++) { 4705 u_int32_t start, nblocks; 4706 4707 start = hfsmp->vcbFreeExt[i].startBlock; 4708 nblocks = hfsmp->vcbFreeExt[i].blockCount; 4709 4710 //printf ("hfs: %p: slot:%d (%u,%u)\n", hfsmp, i, start, nblocks); 4711 4712 /* Check if any of the blocks in free extent cache are allocated. 4713 * This should not be enabled always because it might take 4714 * very long for large extents that get added to the list. 4715 * 4716 * We have to drop vcbFreeExtLock while we call hfs_isallocated 4717 * because it is going to do I/O. Note that the free extent 4718 * cache could change. That's a risk we take when using this 4719 * debugging code. (Another alternative would be to try to 4720 * detect when the free extent cache changed, and perhaps 4721 * restart if the list changed while we dropped the lock.) 4722 */ 4723 if (check_allocated) { 4724 lck_spin_unlock(&hfsmp->vcbFreeExtLock); 4725 if (hfs_isallocated(hfsmp, start, nblocks)) { 4726 panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n", 4727 hfsmp, i, start, nblocks); 4728 } 4729 lck_spin_lock(&hfsmp->vcbFreeExtLock); 4730 } 4731 4732 /* Check if any part of the extent is beyond allocLimit */ 4733 if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) { 4734 panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n", 4735 hfsmp, i, start, nblocks, hfsmp->allocLimit); 4736 } 4737 4738 /* Check if there are any duplicate start blocks */ 4739 for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) { 4740 if (start == hfsmp->vcbFreeExt[j].startBlock) { 4741 panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n", 4742 hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock, 4743 hfsmp->vcbFreeExt[j].blockCount); 4744 } 4745 } 4746 4747 /* Check if the entries are out of order */ 4748 if ((i+1) != hfsmp->vcbFreeExtCnt) { 4749 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { 4750 /* sparse devices are sorted by starting block number (ascending) */ 4751 if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) { 4752 panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n", 4753 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, 4754 hfsmp->vcbFreeExt[i+1].blockCount); 4755 } 4756 } else { 4757 /* normally sorted by block count (descending) */ 4758 if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) { 4759 panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n", 4760 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, 4761 hfsmp->vcbFreeExt[i+1].blockCount); 4762 } 4763 } 4764 } 4765 } 4766 lck_spin_unlock(&hfsmp->vcbFreeExtLock); 4767} 4768