1/*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29	File:		VolumeAllocation.c
30
31	Contains:	Routines for accessing and modifying the volume bitmap.
32
33	Version:	HFS Plus 1.0
34
35	Copyright:	��� 1996-2009 by Apple Computer, Inc., all rights reserved.
36
37*/
38
39/*
40Public routines:
41	BlockAllocate
42					Allocate space on a volume.  Can allocate space contiguously.
43					If not contiguous, then allocation may be less than what was
44					asked for.  Returns the starting block number, and number of
45					blocks.  (Will only do a single extent???)
46	BlockDeallocate
47					Deallocate a contiguous run of allocation blocks.
48
49	BlockMarkAllocated
50					Exported wrapper to mark blocks as in-use.  This will correctly determine
51					whether or not the red-black tree is enabled and call the appropriate function
52					if applicable.
53	BlockMarkFree
54					Exported wrapper to mark blocks as freed.  This will correctly determine whether or
55					not the red-black tree is enabled and call the appropriate function if applicable.
56
57
58	ResetVCBFreeExtCache
59					Since the red-black tree obviates the need to maintain the free extent cache, we do
60					not update it if the tree is also live.  As a result, if we ever need to destroy the trees
61					we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
62					bitmap scanning allocator.
63					We also reset and disable the free extent cache when volume resizing is
64					in flight.
65
66	UpdateAllocLimit
67					Adjusts the AllocLimit field in the hfs mount point.  This is used when we need to prevent
68					allocations from occupying space in the region we are modifying during a filesystem resize.
69					At other times, it should be consistent with the total number of allocation blocks in the
70					filesystem.  It is also used to shrink or grow the number of blocks that the red-black tree should
71					know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
72					number of items in the tree that we can allocate from.
73
74	UnmapBlocks
75					Issues DKIOCUNMAPs to the device as it fills the internal volume buffer when iterating
76					the volume bitmap.
77
78Internal routines:
79	Note that the RBTree routines are guarded by a cpp check for CONFIG_HFS_ALLOC_RBTREE.  This
80	is to cut down on space for functions that could not possibly be used if they are not planning to
81	use the red-black tree code.
82
83	BlockMarkFreeRBTree
84					Make an internal call to BlockMarkFree and then update
85					and/or create Red-Black Tree allocation tree nodes to correspond
86					to the free space being generated.
87	BlockMarkFreeInternal
88					Mark a contiguous range of blocks as free.  The corresponding
89					bits in the volume bitmap will be cleared.  This will actually do the work
90					of modifying the bitmap for us.
91
92	BlockMarkAllocatedRBTree
93					Make an internal call to BlockAllocateMarked, which will update the
94					bitmap on-disk when we allocate blocks.  If that is successful, then
95					we'll remove the appropriate entries from the red-black tree.
96	BlockMarkAllocatedInternal
97					Mark a contiguous range of blocks as allocated.  The cor-
98					responding bits in the volume bitmap are set.  Also tests to see
99					if any of the blocks were previously unallocated.
100	BlockFindContiguous
101					Find a contiguous range of blocks of a given size.  The caller
102					specifies where to begin the search (by block number).  The
103					block number of the first block in the range is returned.  This is only
104					called by the bitmap scanning logic as the red-black tree should be able
105					to do this internally by searching its tree.
106	BlockAllocateAny
107					Find and allocate a contiguous range of blocks up to a given size.  The
108					first range of contiguous free blocks found are allocated, even if there
109					are fewer blocks than requested (and even if a contiguous range of blocks
110					of the given size exists elsewhere).
111	BlockAllocateAnyBitmap
112					Finds a range of blocks per the above requirements without using the
113					Allocation RB Tree.  This relies on the bitmap-scanning logic in order to find
114					any valid range of free space needed.
115	BlockAllocateAnyRBTree
116					Finds a valid range of blocks per the above requirements by searching
117					the red-black tree.  We can just make an internal call to
118					BlockAllocateContigRBTree to find the valid range.
119	BlockAllocateContig
120					Find and allocate a contiguous range of blocks of a given size.  If
121					a contiguous range of free blocks of the given size isn't found, then
122					the allocation fails (i.e. it is "all or nothing").  This routine is
123					essentially a wrapper function around its related sub-functions,
124					BlockAllocateContigBitmap and BlockAllocateContigRBTree, which use,
125					respectively, the original HFS+ bitmap scanning logic and the new
126					Red-Black Tree to search and manage free-space decisions.  This function
127					contains logic for when to use which of the allocation algorithms,
128					depending on the free space contained in the volume.
129	BlockAllocateContigBitmap
130					Finds and allocates a range of blocks specified by the size parameters
131					using the original HFS+ bitmap scanning logic.  The red-black tree
132					will not be updated if this function is used.
133	BlockAllocateContigRBTree
134					Finds and allocates a range of blocks specified by the size parameters
135					using the new red/black tree data structure and search algorithms
136					provided by the tree library.  Updates the red/black tree nodes after
137					the on-disk data structure (bitmap) has been updated.
138	BlockAllocateKnown
139					Try to allocate space from known free space in the volume's
140					free extent cache.
141
142	ReadBitmapBlock
143					Given an allocation block number, read the bitmap block that
144					contains that allocation block into a caller-supplied buffer.
145
146	ReleaseBitmapBlock
147					Release a bitmap block back into the buffer cache.
148
149	remove_free_extent_cache
150					Remove an extent from the free extent cache.  Handles overlaps
151					with multiple extents in the cache, and handles splitting an
152					extent in the cache if the extent to be removed is in the middle
153					of a cached extent.
154
155	add_free_extent_cache
156					Add an extent to the free extent cache.  It will merge the
157					input extent with extents already in the cache.
158
159
160Debug/Test Routines
161	hfs_isallocated
162					Test to see if any blocks in a range are allocated.  Journal or
163					allocation file lock must be held.
164
165	hfs_isallocated_scan
166					Test to see if any blocks in a range are allocated.  Releases and
167					invalidates the block used when finished.
168
169	hfs_isrbtree_active
170					Test to see if the allocation red-black tree is live.  This function
171					requires either an exclusive or shared lock on the allocation bitmap file
172					in the HFS mount structure, to prevent red-black tree pointers from disappearing.
173
174	hfs_isrbtree_allocated
175					Test to see if the specified extent is marked as allocated in the red-black tree.
176					Multiplexes between the metadata zone trees and the normal allocation zone trees
177					depending on the offset of the extent specified.
178
179	check_rbtree_extents
180					Void function that wraps around the above function (hfs_isrbtree_allocated)
181					and checks to see that the return value was appropriate based on the assertion we're
182					trying to validate (whether or not the specified extent should be marked as free
183					or allocated).
184
185	hfs_validate_rbtree
186					Exhaustive search function that will check every allocation block for its status in the
187					red-black tree and then check the corresponding status in the bitmap file.  If the two are out
188					of sync, it will panic.  Note that this function is extremely expensive and must NEVER
189					be run outside of debug code.
190
191	hfs_checktreelinks
192					Checks the embedded linked list structure of the red black tree for integrity.  The next pointer
193					should always point to whatever extent_tree_offset_next returns.
194
195
196Red Black Tree Specific Routines
197	GenerateTree
198					Build a red-black tree for the given filesystem's bitmap.
199
200	DestroyTrees
201					Destroy the tree on the given filesystem
202
203
204	hfs_alloc_scan_block
205					Given a starting allocation block number, figures out which physical block contains that
206					allocation block's bit, and scans it from the starting bit until either the ending bit or
207					the end of the block.  Free space extents are inserted into the appropriate red-black tree.
208
209*/
210
211#include "../../hfs_macos_defs.h"
212
213#include <sys/types.h>
214#include <sys/buf.h>
215#include <sys/systm.h>
216#include <sys/sysctl.h>
217#include <sys/disk.h>
218#include <sys/ubc.h>
219#include <sys/uio.h>
220#include <kern/kalloc.h>
221/* For VM Page size */
222#include <libkern/libkern.h>
223
224#include "../../hfs.h"
225#include "../../hfs_dbg.h"
226#include "../../hfs_format.h"
227#include "../../hfs_endian.h"
228#include "../../hfs_macos_defs.h"
229#include "../headers/FileMgrInternal.h"
230#include "../headers/HybridAllocator.h"
231#include "../../hfs_kdebug.h"
232
233/* Headers for unmap-on-mount support */
234#include <vfs/vfs_journal.h>
235#include <sys/disk.h>
236
237#ifndef CONFIG_HFS_TRIM
238#define CONFIG_HFS_TRIM 0
239#endif
240
241/*
242 * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
243 * KERNEL_DEBUG_CONSTANT events are enabled at runtime.  (They're
244 * disabled by default because there can be a lot of these events,
245 * and we don't want to overwhelm the kernel debug buffer.  If you
246 * want to watch these events in particular, just set the sysctl.)
247 */
248static int hfs_kdebug_allocation = 0;
249SYSCTL_DECL(_vfs_generic);
250SYSCTL_NODE(_vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system");
251SYSCTL_NODE(_vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug");
252SYSCTL_INT(_vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations");
253enum {
254	/*
255	 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
256	 * BlockDeallocate, including the internal BlockAllocateXxx
257	 * routines so we can see how an allocation was satisfied.
258	 *
259	 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
260	 * free extent cache.
261	 *
262	 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
263	 *
264	 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
265	 * or clearing bits, scanning the bitmap).
266	 */
267	HFSDBG_ALLOC_ENABLED		= 1,
268	HFSDBG_EXT_CACHE_ENABLED	= 2,
269	HFSDBG_UNMAP_ENABLED		= 4,
270	HFSDBG_BITMAP_ENABLED		= 8
271};
272
273enum {
274	kBytesPerWord			=	4,
275	kBitsPerByte			=	8,
276	kBitsPerWord			=	32,
277
278	kBitsWithinWordMask		=	kBitsPerWord-1
279};
280
281#define kLowBitInWordMask	0x00000001ul
282#define kHighBitInWordMask	0x80000000ul
283#define kAllBitsSetInWord	0xFFFFFFFFul
284
285
286#define ALLOC_DEBUG 0
287
288static OSErr ReadBitmapBlock(
289	ExtendedVCB		*vcb,
290	u_int32_t		bit,
291	u_int32_t		**buffer,
292	uintptr_t		*blockRef);
293
294static OSErr ReleaseBitmapBlock(
295	ExtendedVCB		*vcb,
296	uintptr_t		blockRef,
297	Boolean			dirty);
298
299static OSErr BlockAllocateAny(
300	ExtendedVCB		*vcb,
301	u_int32_t		startingBlock,
302	u_int32_t		endingBlock,
303	u_int32_t		maxBlocks,
304	Boolean			useMetaZone,
305	u_int32_t		*actualStartBlock,
306	u_int32_t		*actualNumBlocks);
307
308static OSErr BlockAllocateAnyBitmap(
309	ExtendedVCB		*vcb,
310	u_int32_t		startingBlock,
311	u_int32_t		endingBlock,
312	u_int32_t		maxBlocks,
313	Boolean			useMetaZone,
314	u_int32_t		*actualStartBlock,
315	u_int32_t		*actualNumBlocks);
316
317static OSErr BlockAllocateContig(
318	ExtendedVCB		*vcb,
319	u_int32_t		startingBlock,
320	u_int32_t		minBlocks,
321	u_int32_t		maxBlocks,
322	Boolean			useMetaZone,
323	u_int32_t		*actualStartBlock,
324	u_int32_t		*actualNumBlocks);
325
326static OSErr BlockAllocateContigBitmap(
327	ExtendedVCB		*vcb,
328	u_int32_t		startingBlock,
329	u_int32_t		minBlocks,
330	u_int32_t		maxBlocks,
331	Boolean			useMetaZone,
332	u_int32_t		*actualStartBlock,
333	u_int32_t		*actualNumBlocks);
334
335static OSErr BlockFindContiguous(
336	ExtendedVCB		*vcb,
337	u_int32_t		startingBlock,
338	u_int32_t		endingBlock,
339	u_int32_t		minBlocks,
340	u_int32_t		maxBlocks,
341	Boolean			useMetaZone,
342	u_int32_t		*actualStartBlock,
343	u_int32_t		*actualNumBlocks);
344
345static OSErr BlockAllocateKnown(
346	ExtendedVCB		*vcb,
347	u_int32_t		maxBlocks,
348	u_int32_t		*actualStartBlock,
349	u_int32_t		*actualNumBlocks);
350
351static OSErr BlockMarkAllocatedInternal (
352	ExtendedVCB		*vcb,
353	u_int32_t		startingBlock,
354	register u_int32_t	numBlocks);
355
356static OSErr BlockMarkFreeInternal(
357	ExtendedVCB	*vcb,
358	u_int32_t	startingBlock,
359	u_int32_t	numBlocks,
360	Boolean 	do_validate);
361
362
363static OSErr ReleaseScanBitmapBlock( struct buf *bp );
364
365static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset,
366                             u_int32_t numBlocks, struct jnl_trim_list *list);
367
368static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list);
369
370static int hfs_alloc_scan_block(struct hfsmount *hfsmp,
371								u_int32_t startbit,
372								u_int32_t endBit,
373								u_int32_t *bitToScan,
374                                struct jnl_trim_list *list);
375
376int hfs_isallocated_scan (struct hfsmount *hfsmp,
377								 u_int32_t startingBlock,
378								 u_int32_t *bp_buf);
379
380#if CONFIG_HFS_ALLOC_RBTREE
381static OSErr BlockAllocateAnyRBTree(
382	ExtendedVCB		*vcb,
383	u_int32_t		startingBlock,
384	u_int32_t		maxBlocks,
385	Boolean			useMetaZone,
386	u_int32_t		*actualStartBlock,
387	u_int32_t		*actualNumBlocks);
388
389static OSErr BlockAllocateContigRBTree(
390	ExtendedVCB		*vcb,
391	u_int32_t		startingBlock,
392	u_int32_t		minBlocks,
393	u_int32_t		maxBlocks,
394	Boolean			useMetaZone,
395	u_int32_t		*actualStartBlock,
396	u_int32_t		*actualNumBlocks,
397	u_int32_t 		forceContig);
398
399static OSErr BlockMarkAllocatedRBTree(
400	ExtendedVCB		*vcb,
401	u_int32_t		startingBlock,
402	u_int32_t	numBlocks);
403
404static OSErr BlockMarkFreeRBTree(
405	ExtendedVCB		*vcb,
406	u_int32_t		startingBlock,
407	u_int32_t	numBlocks);
408
409static int
410hfs_isrbtree_allocated (struct hfsmount * hfsmp,
411	u_int32_t startBlock,
412	u_int32_t numBlocks,
413	extent_node_t** node1);
414
415extern void
416hfs_validate_rbtree (struct hfsmount *hfsmp,
417					 u_int32_t start,
418					 u_int32_t end);
419
420static void hfs_checktreelinks (struct hfsmount *hfsmp);
421
422
423void check_rbtree_extents (struct hfsmount *hfsmp,
424	u_int32_t start,
425	u_int32_t numBlocks,
426	int shouldBeFree);
427
428#define ASSERT_FREE 1
429#define ASSERT_ALLOC 0
430
431#endif /* CONFIG_HFS_ALLOC_RBTREE */
432
433/* Functions for manipulating free extent cache */
434static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
435static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
436static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
437
438#if ALLOC_DEBUG
439/*
440 * Validation Routine to verify that the TRIM list maintained by the journal
441 * is in good shape relative to what we think the bitmap should have.  We should
442 * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
443 * we panic.
444 */
445int trim_validate_bitmap (struct hfsmount *hfsmp);
446int trim_validate_bitmap (struct hfsmount *hfsmp) {
447	u_int64_t blockno_offset;
448	u_int64_t numblocks;
449	int i;
450	int count;
451	u_int32_t startblk;
452	u_int32_t blks;
453	int err = 0;
454	uint32_t alloccount = 0;
455
456	if (hfsmp->jnl) {
457		struct journal *jnl = (struct journal*)hfsmp->jnl;
458		if (jnl->active_tr) {
459			struct jnl_trim_list *trim = &(jnl->active_tr->trim);
460			count = trim->extent_count;
461			for (i = 0; i < count; i++) {
462				blockno_offset = trim->extents[i].offset;
463				blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
464				blockno_offset = blockno_offset / hfsmp->blockSize;
465				numblocks = trim->extents[i].length / hfsmp->blockSize;
466
467				startblk = (u_int32_t)blockno_offset;
468				blks = (u_int32_t) numblocks;
469				err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
470
471				if (err == 0 && alloccount != 0) {
472					panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
473				}
474			}
475		}
476	}
477	return 0;
478}
479
480#endif
481
482
483/*
484 ;________________________________________________________________________________
485 ;
486 ; Routine:		hfs_unmap_free_extent
487 ;
488 ; Function:		Make note of a range of allocation blocks that should be
489 ;				unmapped (trimmed).  That is, the given range of blocks no
490 ;				longer have useful content, and the device can unmap the
491 ;				previous contents.  For example, a solid state disk may reuse
492 ;				the underlying storage for other blocks.
493 ;
494 ;				This routine is only supported for journaled volumes.  The extent
495 ;				being freed is passed to the journal code, and the extent will
496 ;				be unmapped after the current transaction is written to disk.
497 ;
498 ; Input Arguments:
499 ;	hfsmp			- The volume containing the allocation blocks.
500 ;	startingBlock	- The first allocation block of the extent being freed.
501 ;	numBlocks		- The number of allocation blocks of the extent being freed.
502 ;________________________________________________________________________________
503 */
504static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
505{
506	u_int64_t offset;
507	u_int64_t length;
508	u_int64_t device_sz;
509	int err = 0;
510
511	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
512		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
513
514	if (ALLOC_DEBUG) {
515		if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
516			panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks);
517		}
518	}
519
520	if (hfsmp->jnl != NULL) {
521		device_sz = hfsmp->hfs_logical_bytes;
522		offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
523		length = (u_int64_t) numBlocks * hfsmp->blockSize;
524
525		/* Validate that the trim is in a valid range of bytes */
526		if ((offset >= device_sz) || ((offset + length) > device_sz)) {
527			printf("hfs_unmap_free_ext: ignoring trim @ off %lld len %lld \n", offset, length);
528			err = EINVAL;
529		}
530
531		if (err == 0) {
532			err = journal_trim_add_extent(hfsmp->jnl, offset, length);
533			if (err) {
534				printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent", err);
535			}
536		}
537	}
538
539	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
540		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
541}
542
543
544
545/*
546 ;________________________________________________________________________________
547 ;
548 ; Routine:		hfs_track_unmap_blocks
549 ;
550 ; Function:	Make note of a range of allocation blocks that should be
551 ;				unmapped (trimmed).  That is, the given range of blocks no
552 ;				longer have useful content, and the device can unmap the
553 ;				previous contents.  For example, a solid state disk may reuse
554 ;				the underlying storage for other blocks.
555 ;
556 ;				This routine is only supported for journaled volumes.
557 ;
558 ;              *****NOTE*****:
559 ;              This function should *NOT* be used when the volume is fully
560 ;              mounted.  This function is intended to support a bitmap iteration
561 ;              at mount time to fully inform the SSD driver of the state of all blocks
562 ;              at mount time, and assumes that there is no allocation/deallocation
563 ;              interference during its iteration.,
564 ;
565 ; Input Arguments:
566 ;	hfsmp			- The volume containing the allocation blocks.
567 ;	offset          - The first allocation block of the extent being freed.
568 ;	numBlocks		- The number of allocation blocks of the extent being freed.
569 ;  list            - The list of currently tracked trim ranges.
570 ;________________________________________________________________________________
571 */
572static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start,
573                              u_int32_t numBlocks, struct jnl_trim_list *list) {
574
575    u_int64_t offset;
576    u_int64_t length;
577    int error = 0;
578
579    if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) {
580        int extent_no = list->extent_count;
581        offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
582        length = (u_int64_t) numBlocks * hfsmp->blockSize;
583
584
585        list->extents[extent_no].offset = offset;
586        list->extents[extent_no].length = length;
587        list->extent_count++;
588        if (list->extent_count == list->allocated_count) {
589            error = hfs_issue_unmap (hfsmp, list);
590        }
591    }
592
593    return error;
594}
595
596/*
597 ;________________________________________________________________________________
598 ;
599 ; Routine:		hfs_issue_unmap
600 ;
601 ; Function:	Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
602 ;
603 ; Input Arguments:
604 ;	hfsmp			- The volume containing the allocation blocks.
605 ;  list            - The list of currently tracked trim ranges.
606 ;________________________________________________________________________________
607 */
608
609static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list) {
610    dk_unmap_t unmap;
611    int error = 0;
612
613    if (list->extent_count > 0) {
614        bzero(&unmap, sizeof(unmap));
615        unmap.extents = list->extents;
616        unmap.extentsCount = list->extent_count;
617
618        /* Issue a TRIM and flush them out */
619        error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
620
621        bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t)));
622        list->extent_count = 0;
623    }
624    return error;
625}
626
627
628
629/*
630 ;________________________________________________________________________________
631 ;
632 ; Routine:		hfs_unmap_alloc_extent
633 ;
634 ; Function:		Make note of a range of allocation blocks, some of
635 ;				which may have previously been passed to hfs_unmap_free_extent,
636 ;				is now in use on the volume.  The given blocks will be removed
637 ;				from any pending DKIOCUNMAP.
638 ;
639 ; Input Arguments:
640 ;	hfsmp			- The volume containing the allocation blocks.
641 ;	startingBlock	- The first allocation block of the extent being allocated.
642 ;	numBlocks		- The number of allocation blocks being allocated.
643 ;________________________________________________________________________________
644 */
645static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
646{
647	u_int64_t offset;
648	u_int64_t length;
649	int err;
650
651	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
652		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
653
654	if (hfsmp->jnl != NULL) {
655		offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
656		length = (u_int64_t) numBlocks * hfsmp->blockSize;
657
658		err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
659		if (err) {
660			printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent", err);
661		}
662	}
663
664	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
665		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
666}
667
668
669/*
670;________________________________________________________________________________
671;
672; Routine:		hfs_trim_callback
673;
674; Function:		This function is called when a transaction that freed extents
675;				(via hfs_unmap_free_extent/journal_trim_add_extent) has been
676;				written to the on-disk journal.  This routine will add those
677;				extents to the free extent cache so that they can be reused.
678;
679;				CAUTION: This routine is called while the journal's trim lock
680;				is held shared, so that no other thread can reuse any portion
681;				of those extents.  We must be very careful about which locks
682;				we take from within this callback, to avoid deadlock.  The
683;				call to add_free_extent_cache will end up taking the cache's
684;				lock (just long enough to add these extents to the cache).
685;
686;				CAUTION: If the journal becomes invalid (eg., due to an I/O
687;				error when trying to write to the journal), this callback
688;				will stop getting called, even if extents got freed before
689;				the journal became invalid!
690;
691; Input Arguments:
692;	arg				- The hfsmount of the volume containing the extents.
693;	extent_count	- The number of extents freed in the transaction.
694;	extents			- An array of extents (byte ranges) that were freed.
695;________________________________________________________________________________
696*/
697__private_extern__ void
698hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
699{
700	uint32_t i;
701	uint32_t startBlock, numBlocks;
702	struct hfsmount *hfsmp = arg;
703
704	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
705		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
706
707	for (i=0; i<extent_count; ++i) {
708		/* Convert the byte range in *extents back to a range of allocation blocks. */
709		startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
710		numBlocks = extents[i].length / hfsmp->blockSize;
711		(void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
712	}
713
714	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
715		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
716}
717
718
719/*
720 ;________________________________________________________________________________
721 ;
722 ; Routine:		UnmapBlocks
723 ;
724 ; Function:	Traverse the bitmap, and issue DKIOCUNMAPs to the underlying
725 ;				device as needed so that the underlying disk device is as
726 ;				up-to-date as possible with which blocks are unmapped.
727 ;
728 ; Input Arguments:
729 ;	hfsmp			- The volume containing the allocation blocks.
730 ;________________________________________________________________________________
731 */
732
733__private_extern__
734u_int32_t UnmapBlocks (struct hfsmount *hfsmp) {
735	u_int32_t blocks_scanned = 0;
736	int error = 0;
737    struct jnl_trim_list trimlist;
738
739    /*
740     *struct jnl_trim_list {
741     uint32_t    allocated_count;
742     uint32_t    extent_count;
743     dk_extent_t *extents;
744     };
745    */
746    bzero (&trimlist, sizeof(trimlist));
747    if (CONFIG_HFS_TRIM) {
748        int alloc_count = PAGE_SIZE / sizeof(dk_extent_t);
749        void *extents = kalloc (alloc_count * sizeof(dk_extent_t));
750        if (extents == NULL) {
751            return ENOMEM;
752        }
753        trimlist.extents = (dk_extent_t*)extents;
754        trimlist.allocated_count = alloc_count;
755        trimlist.extent_count = 0;
756
757
758
759        while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){
760            error = hfs_alloc_scan_block (hfsmp, blocks_scanned, hfsmp->totalBlocks,
761                                          &blocks_scanned, &trimlist);
762            if (error) {
763                printf("HFS: bitmap unmap scan error: %d\n", error);
764                break;
765            }
766        }
767        if (error == 0) {
768            hfs_issue_unmap(hfsmp, &trimlist);
769        }
770        if (trimlist.extents) {
771            kfree (trimlist.extents, (trimlist.allocated_count * sizeof(dk_extent_t)));
772        }
773	}
774	return error;
775}
776
777/*
778 ;________________________________________________________________________________
779 ;
780 ; Routine:	   BlockAllocate
781 ;
782 ; Function:   Allocate space on a volume.	If contiguous allocation is requested,
783 ;			   at least the requested number of bytes will be allocated or an
784 ;			   error will be returned.	If contiguous allocation is not forced,
785 ;			   the space will be allocated with the first largest extent available
786 ;			   at the requested starting allocation block.  If there is not enough
787 ;			   room there, a block allocation of less than the requested size will be
788 ;			   allocated.
789 ;
790 ;			   If the requested starting block is 0 (for new file allocations),
791 ;			   the volume's allocation block pointer will be used as a starting
792 ;			   point.
793 ;
794 ; Input Arguments:
795 ;	 vcb			 - Pointer to ExtendedVCB for the volume to allocate space on
796 ;	 fcb			 - Pointer to FCB for the file for which storage is being allocated
797 ;	 startingBlock	 - Preferred starting allocation block, 0 = no preference
798 ;	 minBlocks	 	 - Number of blocks requested.	If the allocation is non-contiguous,
799 ;					   less than this may actually be allocated
800 ;	 maxBlocks	 	 - The maximum number of blocks to allocate.  If there is additional free
801 ;					   space after bytesRequested, then up to maxBlocks bytes should really
802 ;					   be allocated.  (Used by ExtendFileC to round up allocations to a multiple
803 ;					   of the file's clump size.)
804 ;	 flags           - Flags to specify options like contiguous, use metadata zone,
805 ;					   skip free block check, etc.
806 ;
807 ; Output:
808 ;	 (result)		 - Error code, zero for successful allocation
809 ;	 *startBlock	 - Actual starting allocation block
810 ;	 *actualBlocks	 - Actual number of allocation blocks allocated
811 ;
812 ; Side effects:
813 ;	 The volume bitmap is read and updated; the volume bitmap cache may be changed.
814 ;________________________________________________________________________________
815 */
816OSErr BlockAllocate (
817	ExtendedVCB		*vcb,				/* which volume to allocate space on */
818	u_int32_t		startingBlock,		/* preferred starting block, or 0 for no preference */
819	u_int32_t		minBlocks,		/* desired number of blocks to allocate */
820	u_int32_t		maxBlocks,		/* maximum number of blocks to allocate */
821	u_int32_t		flags,			/* option flags */
822	u_int32_t		*actualStartBlock,	/* actual first block of allocation */
823	u_int32_t		*actualNumBlocks)	/* number of blocks actually allocated; if forceContiguous */
824							/* was zero, then this may represent fewer than minBlocks */
825{
826	u_int32_t  freeBlocks;
827	OSErr			err;
828	Boolean			updateAllocPtr = false;		//	true if nextAllocation needs to be updated
829	struct hfsmount	*hfsmp;
830	Boolean useMetaZone;
831	Boolean forceContiguous;
832
833	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
834		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
835
836	if (flags & HFS_ALLOC_FORCECONTIG) {
837		forceContiguous = true;
838	} else {
839		forceContiguous = false;
840	}
841
842	if (flags & HFS_ALLOC_METAZONE) {
843		useMetaZone = true;
844	} else {
845		useMetaZone = false;
846	}
847
848	//TODO: Figure out when we need to re-enable the RB-Tree.
849
850
851	//TODO: Make sure we use allocLimit when appropriate.
852
853	/*
854	 * TODO: Update BlockAllocate and its sub-functions to do cooperative allocation and bitmap scanning
855	 * in conjunction with the Generate Tree function.   If the red-black tree does not currently contain
856	 * an allocation block of appropriate size, then start scanning blocks FOR the tree generation function until
857	 * we find what we need.  We'll update the tree fields when we're done, indicating that we've advanced the
858	 * high water mark for the tree.
859	 */
860
861	//
862	//	Initialize outputs in case we get an error
863	//
864	*actualStartBlock = 0;
865	*actualNumBlocks = 0;
866	hfsmp = VCBTOHFS (vcb);
867	freeBlocks = hfs_freeblks(hfsmp, 0);
868
869
870	/* Skip free block check if blocks are being allocated for relocating
871	 * data during truncating a volume.
872	 *
873	 * During hfs_truncatefs(), the volume free block count is updated
874	 * before relocating data to reflect the total number of free blocks
875	 * that will exist on the volume after resize is successful.  This
876	 * means that we have reserved allocation blocks required for relocating
877	 * the data and hence there is no need to check the free blocks.
878	 * It will also prevent resize failure when the number of blocks in
879	 * an extent being relocated is more than the free blocks that will
880	 * exist after the volume is resized.
881	 */
882	if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
883		//	If the disk is already full, don't bother.
884		if (freeBlocks == 0) {
885			err = dskFulErr;
886			goto Exit;
887		}
888		if (forceContiguous && freeBlocks < minBlocks) {
889			err = dskFulErr;
890			goto Exit;
891		}
892
893		/*
894		 * Clip if necessary so we don't over-subscribe the free blocks.
895		 */
896		if (minBlocks > freeBlocks) {
897			minBlocks = freeBlocks;
898		}
899		if (maxBlocks > freeBlocks) {
900			maxBlocks = freeBlocks;
901		}
902	}
903
904	//
905	//	If caller didn't specify a starting block number, then use the volume's
906	//	next block to allocate from.
907	//
908	if (startingBlock == 0) {
909		HFS_MOUNT_LOCK(vcb, TRUE);
910
911		/* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
912		if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
913			startingBlock = vcb->sparseAllocation;
914		}
915		else {
916			startingBlock = vcb->nextAllocation;
917		}
918		HFS_MOUNT_UNLOCK(vcb, TRUE);
919		updateAllocPtr = true;
920	}
921
922
923	if (startingBlock >= vcb->allocLimit) {
924		startingBlock = 0; /* overflow so start at beginning */
925	}
926
927	//
928	//	If the request must be contiguous, then find a sequence of free blocks
929	//	that is long enough.  Otherwise, find the first free block.
930	//
931	if (forceContiguous) {
932		err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks,
933		                          useMetaZone, actualStartBlock, actualNumBlocks);
934		/*
935		 * If we allocated from a new position then also update the roving allocator.
936		 * This will keep the roving allocation pointer up-to-date even
937		 * if we are using the new R/B tree allocator, since
938		 * it doesn't matter to us here, how the underlying allocator found
939		 * the block to vend out.
940		 */
941		if ((err == noErr) &&
942		    (*actualStartBlock > startingBlock) &&
943		    ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
944	    	     (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
945			updateAllocPtr = true;
946		}
947	} else {
948#if CONFIG_HFS_ALLOC_RBTREE
949		/*
950		 * If the RB-Tree Allocator is live, just go straight for a
951		 * BlockAllocateAny call and return the result.  Otherwise,
952		 * resort to the bitmap scanner.
953		 */
954		if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
955			/* Start by trying to allocate from the starting block forward */
956			err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
957								   maxBlocks, useMetaZone, actualStartBlock,
958								   actualNumBlocks);
959
960			/*
961			 * Because the RB-Tree is live, the previous call to BlockAllocateAny
962			 * will use the rbtree variant.  As a result, it will automatically search the
963			 * metadata zone for a valid extent if needed.  If we get a return value of
964			 * noErr, we found a valid extent and we can skip to the end.  If the error indicates
965			 * the disk is full, that's an equally valid return code and we can skip to the end, too.
966			 */
967			if (err == noErr || err == dskFulErr) {
968				goto Exit;
969			}
970			else {
971				//TODO: only tear down tree if the tree is finished building.
972				//Make sure to handle the ENOSPC condition properly.  We shouldn't error out in that case.
973				/* Tear down tree if we encounter an error */
974				if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) {
975					hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
976					DestroyTrees(hfsmp);
977					ResetVCBFreeExtCache(hfsmp);
978				}
979				else {
980					goto Exit;
981				}
982				// fall through to the normal allocation since the rb-tree allocation failed.
983			}
984		}
985#endif
986
987		/*
988		 * Scan the bitmap once, gather the N largest free extents, then
989		 * allocate from these largest extents.  Repeat as needed until
990		 * we get all the space we needed.  We could probably build up
991		 * that list when the higher level caller tried (and failed) a
992		 * contiguous allocation first.
993		 *
994		 * Note that the free-extent cache will be cease to be updated if
995		 * we are using the red-black tree for allocations.  If we jettison
996		 * the tree, then we will reset the free-extent cache and start over.
997		 */
998
999		err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks);
1000		/* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */
1001
1002		if (err == dskFulErr) {
1003			/*
1004			 * Now we have to do a bigger scan.  Start at startingBlock and go up until the
1005			 * allocation limit.
1006			 */
1007			err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
1008			                       maxBlocks, useMetaZone, actualStartBlock,
1009			                       actualNumBlocks);
1010		}
1011		if (err == dskFulErr) {
1012			/*
1013			 * We may be out of space in the normal zone; go up to the starting block from
1014			 * the start of the volume.
1015			 */
1016			err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks,
1017			                       useMetaZone, actualStartBlock,
1018			                       actualNumBlocks);
1019		}
1020	}
1021
1022Exit:
1023	// if we actually allocated something then go update the
1024	// various bits of state that we maintain regardless of
1025	// whether there was an error (i.e. partial allocations
1026	// still need to update things like the free block count).
1027	//
1028	if (*actualNumBlocks != 0) {
1029		//
1030		//	If we used the volume's roving allocation pointer, then we need to update it.
1031		//	Adding in the length of the current allocation might reduce the next allocate
1032		//	call by avoiding a re-scan of the already allocated space.  However, the clump
1033		//	just allocated can quite conceivably end up being truncated or released when
1034		//	the file is closed or its EOF changed.  Leaving the allocation pointer at the
1035		//	start of the last allocation will avoid unnecessary fragmentation in this case.
1036		//
1037		HFS_MOUNT_LOCK(vcb, TRUE);
1038
1039		lck_spin_lock(&hfsmp->vcbFreeExtLock);
1040		if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) {
1041			vcb->sparseAllocation = *actualStartBlock;
1042		}
1043		lck_spin_unlock(&hfsmp->vcbFreeExtLock);
1044		if (*actualNumBlocks < vcb->hfs_freed_block_count) {
1045			vcb->hfs_freed_block_count -= *actualNumBlocks;
1046		} else {
1047			vcb->hfs_freed_block_count = 0;
1048		}
1049
1050		if (updateAllocPtr &&
1051				((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
1052				 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
1053			HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock);
1054		}
1055
1056		(void) remove_free_extent_cache(hfsmp, *actualStartBlock, *actualNumBlocks);
1057
1058		/*
1059		 * Update the number of free blocks on the volume
1060		 *
1061		 * Skip updating the free blocks count if the block are
1062		 * being allocated to relocate data as part of hfs_truncatefs()
1063		 */
1064		if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1065			vcb->freeBlocks -= *actualNumBlocks;
1066		}
1067		MarkVCBDirty(vcb);
1068		HFS_MOUNT_UNLOCK(vcb, TRUE);
1069
1070		hfs_generate_volume_notifications(VCBTOHFS(vcb));
1071	}
1072
1073	if (ALLOC_DEBUG) {
1074		if (err == noErr) {
1075			if (*actualStartBlock >= hfsmp->totalBlocks) {
1076				panic ("BlockAllocate: vending invalid blocks!");
1077			}
1078			if (*actualStartBlock >= hfsmp->allocLimit) {
1079				panic ("BlockAllocate: vending block past allocLimit!");
1080			}
1081
1082			if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->totalBlocks) {
1083				panic ("BlockAllocate: vending too many invalid blocks!");
1084			}
1085
1086			if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->allocLimit) {
1087				panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
1088			}
1089		}
1090	}
1091
1092	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1093		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
1094
1095	return err;
1096}
1097
1098
1099/*
1100;________________________________________________________________________________
1101;
1102; Routine:	   BlockDeallocate
1103;
1104; Function:    Update the bitmap to deallocate a run of disk allocation blocks
1105;
1106; Input Arguments:
1107;	 vcb		- Pointer to ExtendedVCB for the volume to free space on
1108;	 firstBlock	- First allocation block to be freed
1109;	 numBlocks	- Number of allocation blocks to free up (must be > 0!)
1110;
1111; Output:
1112;	 (result)	- Result code
1113;
1114; Side effects:
1115;	 The volume bitmap is read and updated; the volume bitmap cache may be changed.
1116;	 The Allocator's red-black trees may also be modified as a result.
1117;________________________________________________________________________________
1118*/
1119
1120OSErr BlockDeallocate (
1121	ExtendedVCB		*vcb,			//	Which volume to deallocate space on
1122	u_int32_t		firstBlock,		//	First block in range to deallocate
1123	u_int32_t		numBlocks, 		//	Number of contiguous blocks to deallocate
1124	u_int32_t 		flags)
1125{
1126	OSErr			err;
1127	struct hfsmount *hfsmp;
1128	hfsmp = VCBTOHFS(vcb);
1129
1130	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1131		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);
1132
1133	//
1134	//	If no blocks to deallocate, then exit early
1135	//
1136	if (numBlocks == 0) {
1137		err = noErr;
1138		goto Exit;
1139	}
1140
1141
1142	if (ALLOC_DEBUG) {
1143		if (firstBlock >= hfsmp->totalBlocks) {
1144			panic ("BlockDeallocate: freeing invalid blocks!");
1145		}
1146
1147		if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {
1148			panic ("BlockDeallocate: freeing too many invalid blocks!");
1149		}
1150	}
1151
1152
1153
1154
1155	/*
1156	 * If we're using the red-black tree code, then try to free the
1157	 * blocks by marking them in the red-black tree first.  If the tree
1158	 * is not active for whatever reason (or we're not using the
1159	 * R/B Tree code at all), then go straight for the BlockMarkFree
1160	 * function.
1161	 *
1162	 * Remember that we can get into this function if the tree isn't finished
1163	 * building.  In that case, check to see if the block we're de-allocating is
1164	 * past the high watermark
1165	 */
1166#if CONFIG_HFS_ALLOC_RBTREE
1167	if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
1168		/*
1169		 * BlockMarkFreeRBTree deals with the case where we are resizing the
1170		 * filesystem (shrinking), and we need to manipulate the bitmap beyond the portion
1171		 * that is currenly controlled by the r/b tree.
1172		 */
1173
1174		//TODO: Update multiplexing code for the half-finished case.
1175		err = BlockMarkFreeRBTree(vcb, firstBlock, numBlocks);
1176		adjustFreeExtCache = 0;
1177	}
1178	else {
1179		err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
1180	}
1181
1182#else
1183	err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
1184#endif
1185	if (err)
1186		goto Exit;
1187
1188	//
1189	//	Update the volume's free block count, and mark the VCB as dirty.
1190	//
1191	HFS_MOUNT_LOCK(vcb, TRUE);
1192
1193	/*
1194	 * Do not update the free block count.  This flags is specified
1195	 * when a volume is being truncated.
1196	 */
1197	if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1198		vcb->freeBlocks += numBlocks;
1199	}
1200
1201	vcb->hfs_freed_block_count += numBlocks;
1202
1203	if (vcb->nextAllocation == (firstBlock + numBlocks)) {
1204		HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
1205	}
1206
1207	if (hfsmp->jnl == NULL) {
1208		/*
1209		 * In the journal case, we'll add the free extent once the journal
1210		 * calls us back to tell us it wrote the transaction to disk.
1211		 */
1212		(void) add_free_extent_cache(vcb, firstBlock, numBlocks);
1213
1214		/*
1215		 * If the journal case, we'll only update sparseAllocation once the
1216		 * free extent cache becomes empty (when we remove the last entry
1217		 * from the cache).  Skipping it here means we're less likely to
1218		 * find a recently freed extent via the bitmap before it gets added
1219		 * to the free extent cache.
1220		 */
1221		if (firstBlock < vcb->sparseAllocation) {
1222			vcb->sparseAllocation = firstBlock;
1223		}
1224	}
1225
1226	MarkVCBDirty(vcb);
1227  	HFS_MOUNT_UNLOCK(vcb, TRUE);
1228
1229	hfs_generate_volume_notifications(VCBTOHFS(vcb));
1230Exit:
1231
1232	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1233		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);
1234
1235	return err;
1236}
1237
1238
1239u_int8_t freebitcount[16] = {
1240	4, 3, 3, 2, 3, 2, 2, 1,  /* 0 1 2 3 4 5 6 7 */
1241	3, 2, 2, 1, 2, 1, 1, 0,  /* 8 9 A B C D E F */
1242};
1243
1244u_int32_t
1245MetaZoneFreeBlocks(ExtendedVCB *vcb)
1246{
1247	u_int32_t freeblocks;
1248	u_int32_t *currCache;
1249	uintptr_t blockRef;
1250	u_int32_t bit;
1251	u_int32_t lastbit;
1252	int bytesleft;
1253	int bytesperblock;
1254	u_int8_t byte;
1255	u_int8_t *buffer;
1256
1257	blockRef = 0;
1258	bytesleft = freeblocks = 0;
1259	buffer = NULL;
1260	bit = VCBTOHFS(vcb)->hfs_metazone_start;
1261	if (bit == 1)
1262		bit = 0;
1263
1264	lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
1265	bytesperblock = vcb->vcbVBMIOSize;
1266
1267	/*
1268	 *  Count all the bits from bit to lastbit.
1269	 */
1270	while (bit < lastbit) {
1271		/*
1272		 *  Get next bitmap block.
1273		 */
1274		if (bytesleft == 0) {
1275			if (blockRef) {
1276				(void) ReleaseBitmapBlock(vcb, blockRef, false);
1277				blockRef = 0;
1278			}
1279			if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef) != 0) {
1280				return (0);
1281			}
1282			buffer = (u_int8_t *)currCache;
1283			bytesleft = bytesperblock;
1284		}
1285		byte = *buffer++;
1286		freeblocks += freebitcount[byte & 0x0F];
1287		freeblocks += freebitcount[(byte >> 4) & 0x0F];
1288		bit += kBitsPerByte;
1289		--bytesleft;
1290	}
1291	if (blockRef)
1292		(void) ReleaseBitmapBlock(vcb, blockRef, false);
1293
1294	return (freeblocks);
1295}
1296
1297
1298/*
1299 * Obtain the next allocation block (bit) that's
1300 * outside the metadata allocation zone.
1301 */
1302static u_int32_t NextBitmapBlock(
1303	ExtendedVCB		*vcb,
1304	u_int32_t		bit)
1305{
1306	struct  hfsmount *hfsmp = VCBTOHFS(vcb);
1307
1308	if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
1309		return (bit);
1310	/*
1311	 * Skip over metadata allocation zone.
1312	 */
1313	if ((bit >= hfsmp->hfs_metazone_start) &&
1314	    (bit <= hfsmp->hfs_metazone_end)) {
1315		bit = hfsmp->hfs_metazone_end + 1;
1316	}
1317	return (bit);
1318}
1319
1320
1321/*
1322;_______________________________________________________________________
1323;
1324; Routine:	ReadBitmapBlock
1325;
1326; Function:	Read in a bitmap block corresponding to a given allocation
1327;			block (bit).  Return a pointer to the bitmap block.
1328;
1329; Inputs:
1330;	vcb			--	Pointer to ExtendedVCB
1331;	bit			--	Allocation block whose bitmap block is desired
1332;
1333; Outputs:
1334;	buffer		--	Pointer to bitmap block corresonding to "block"
1335;	blockRef
1336;_______________________________________________________________________
1337*/
1338static OSErr ReadBitmapBlock(
1339	ExtendedVCB		*vcb,
1340	u_int32_t		bit,
1341	u_int32_t		**buffer,
1342	uintptr_t		*blockRef)
1343{
1344	OSErr			err;
1345	struct buf *bp = NULL;
1346	struct vnode *vp = NULL;
1347	daddr64_t block;
1348	u_int32_t blockSize;
1349
1350	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1351		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);
1352
1353	/*
1354	 * volume bitmap blocks are protected by the allocation file lock
1355	 */
1356	REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1357
1358	blockSize = (u_int32_t)vcb->vcbVBMIOSize;
1359	block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
1360
1361	if (vcb->vcbSigWord == kHFSPlusSigWord) {
1362		vp = vcb->hfs_allocation_vp;	/* use allocation file vnode */
1363
1364	} else /* hfs */ {
1365		vp = VCBTOHFS(vcb)->hfs_devvp;	/* use device I/O vnode */
1366		block += vcb->vcbVBMSt;			/* map to physical block */
1367	}
1368
1369	err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
1370
1371	if (bp) {
1372		if (err) {
1373			buf_brelse(bp);
1374			*blockRef = 0;
1375			*buffer = NULL;
1376		} else {
1377			*blockRef = (uintptr_t)bp;
1378			*buffer = (u_int32_t *)buf_dataptr(bp);
1379		}
1380	}
1381
1382	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1383		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);
1384
1385	return err;
1386}
1387
1388
1389/*
1390;_______________________________________________________________________
1391;
1392; Routine:	ReleaseBitmapBlock
1393;
1394; Function:	Relase a bitmap block.
1395;
1396; Inputs:
1397;	vcb
1398;	blockRef
1399;	dirty
1400;_______________________________________________________________________
1401*/
1402static OSErr ReleaseBitmapBlock(
1403	ExtendedVCB		*vcb,
1404	uintptr_t		blockRef,
1405	Boolean			dirty)
1406{
1407	struct buf *bp = (struct buf *)blockRef;
1408
1409	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1410		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);
1411
1412	if (blockRef == 0) {
1413		if (dirty)
1414			panic("hfs: ReleaseBitmapBlock: missing bp");
1415		return (0);
1416	}
1417
1418	if (bp) {
1419		if (dirty) {
1420			// XXXdbg
1421			struct hfsmount *hfsmp = VCBTOHFS(vcb);
1422
1423			if (hfsmp->jnl) {
1424				journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
1425			} else {
1426				buf_bdwrite(bp);
1427			}
1428		} else {
1429			buf_brelse(bp);
1430		}
1431	}
1432
1433	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1434		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);
1435
1436	return (0);
1437}
1438
1439/*
1440 * ReleaseScanBitmapBlock is used to release struct bufs that were
1441 * created for use by bitmap scanning code.  We want to force
1442 * them to be purged out of the buffer cache ASAP, so we'll release them differently
1443 * than in the ReleaseBitmapBlock case.  Alternately, we know that we're only reading
1444 * the blocks, so we will never dirty them as part of the tree building scan.
1445 */
1446
1447static OSErr ReleaseScanBitmapBlock(struct buf *bp ) {
1448
1449	if (bp == NULL) {
1450		return (0);
1451	}
1452
1453	if (bp) {
1454		/* Mark the buffer invalid if it isn't locked, then release it */
1455		if ((buf_flags(bp) & B_LOCKED) == 0) {
1456			buf_markinvalid(bp);
1457		}
1458		buf_brelse(bp);
1459	}
1460
1461	return (0);
1462
1463
1464}
1465
1466/*
1467_______________________________________________________________________
1468
1469Routine:	BlockAllocateContig
1470
1471Function:	Allocate a contiguous group of allocation blocks.  The
1472			allocation is all-or-nothing.  The caller guarantees that
1473			there are enough free blocks (though they may not be
1474			contiguous, in which case this call will fail).
1475
1476Inputs:
1477	vcb				Pointer to volume where space is to be allocated
1478	startingBlock	Preferred first block for allocation
1479	minBlocks		Minimum number of contiguous blocks to allocate
1480	maxBlocks		Maximum number of contiguous blocks to allocate
1481	useMetaZone
1482
1483Outputs:
1484	actualStartBlock	First block of range allocated, or 0 if error
1485	actualNumBlocks		Number of blocks allocated, or 0 if error
1486_______________________________________________________________________
1487*/
1488static OSErr BlockAllocateContig(
1489	ExtendedVCB		*vcb,
1490	u_int32_t		startingBlock,
1491	u_int32_t		minBlocks,
1492	u_int32_t		maxBlocks,
1493	Boolean			useMetaZone,
1494	u_int32_t		*actualStartBlock,
1495	u_int32_t		*actualNumBlocks)
1496{
1497
1498#if CONFIG_HFS_ALLOC_RBTREE
1499	if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
1500		return BlockAllocateContigRBTree(vcb, startingBlock, minBlocks, maxBlocks, useMetaZone,
1501				actualStartBlock, actualNumBlocks, 1);
1502	}
1503#endif
1504	return BlockAllocateContigBitmap(vcb, startingBlock, minBlocks,
1505			maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
1506}
1507
1508/*
1509 * Variant of BlockAllocateContig that uses the original bitmap-searching logic
1510 */
1511
1512static OSErr BlockAllocateContigBitmap(
1513	ExtendedVCB		*vcb,
1514	u_int32_t		startingBlock,
1515	u_int32_t		minBlocks,
1516	u_int32_t		maxBlocks,
1517	Boolean			useMetaZone,
1518	u_int32_t		*actualStartBlock,
1519	u_int32_t		*actualNumBlocks)
1520{
1521	OSErr	err;
1522
1523	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1524		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
1525
1526	//
1527	//	Find a contiguous group of blocks at least minBlocks long.
1528	//	Determine the number of contiguous blocks available (up
1529	//	to maxBlocks).
1530	//
1531
1532	/*
1533	 * NOTE: If the only contiguous free extent of at least minBlocks
1534	 * crosses startingBlock (i.e. starts before, ends after), then we
1535	 * won't find it. Earlier versions *did* find this case by letting
1536	 * the second search look past startingBlock by minBlocks.  But
1537	 * with the free extent cache, this can lead to duplicate entries
1538	 * in the cache, causing the same blocks to be allocated twice.
1539	 */
1540	err = BlockFindContiguous(vcb, startingBlock, vcb->allocLimit, minBlocks,
1541	                          maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
1542	if (err == dskFulErr && startingBlock != 0) {
1543		/*
1544		 * Constrain the endingBlock so we don't bother looking for ranges
1545		 * that would overlap those found in the previous call.
1546		 */
1547		err = BlockFindContiguous(vcb, 1, startingBlock, minBlocks, maxBlocks,
1548		                          useMetaZone, actualStartBlock, actualNumBlocks);
1549	}
1550	//
1551	//	Now mark those blocks allocated.
1552	//
1553	if (err == noErr)
1554		err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
1555
1556	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1557		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
1558
1559	return err;
1560}
1561
1562#if CONFIG_HFS_ALLOC_RBTREE
1563/*
1564 * Variant of BlockAllocateContig that uses the newer red-black tree library
1565 * in order to manage free space extents.  This will search the red-black tree
1566 * and return results in the same fashion as BlockAllocateContigBitmap.
1567 *
1568 * Note that this function is invoked from both the red-black tree variant of BlockAllocateany
1569 * as well as BlockAllocateContig.  In order to determine when we should vend contiguous chunks over
1570 * locality-based-searches, we use the forceContig argument to determine who called us.
1571 */
1572
1573static OSErr BlockAllocateContigRBTree(
1574						  ExtendedVCB		*vcb,
1575						  u_int32_t		startingBlock,
1576						  u_int32_t		minBlocks,
1577						  u_int32_t		maxBlocks,
1578						  Boolean			useMetaZone,
1579						  u_int32_t		*actualStartBlock,
1580						  u_int32_t		*actualNumBlocks,
1581						  u_int32_t 	forceContig)
1582{
1583	OSErr	err;
1584	struct hfsmount *hfsmp = VCBTOHFS(vcb);
1585	extent_node_t search_sentinel;
1586	extent_node_t *node = NULL;
1587	extent_node_t tempnode;
1588
1589	bzero (&tempnode, sizeof(extent_node_t));
1590
1591	/* Begin search at the end of the file, via startingBlock */
1592	memset (&search_sentinel, 0, sizeof(extent_node_t));
1593	search_sentinel.offset = startingBlock;
1594
1595	*actualStartBlock = 0;
1596	*actualNumBlocks = 0;
1597
1598	/*
1599	 * Find the first available extent that satifies the allocation by searching
1600	 * from the starting point and moving forward
1601	 */
1602	node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel);
1603
1604	if (node) {
1605		*actualStartBlock = node->offset;
1606		*actualNumBlocks = node->length;
1607	}
1608
1609	 /* If we managed to grab at least minBlocks of space, then we're done. */
1610
1611	if (*actualNumBlocks >= minBlocks) {
1612		if (*actualNumBlocks > maxBlocks) {
1613			*actualNumBlocks = maxBlocks;
1614		}
1615
1616
1617		/* Check to see if blocks are already marked as in-use */
1618		if (ALLOC_DEBUG) {
1619			REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1620			if (hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
1621				printf("bad node: %p, offset %d, length %d\n", node, node->offset,node->length);
1622				panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n",
1623					   *actualStartBlock, *actualNumBlocks);
1624			}
1625		}
1626
1627		/*
1628		 * BlockMarkAllocatedRBTree is responsible for removing the nodes
1629		 * from the red-black tree after the bitmap has been updated on-disk.
1630		 */
1631		err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks);
1632		if (err == noErr) {
1633
1634			if ( ALLOC_DEBUG ) {
1635				REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1636				if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
1637					panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
1638						   *actualStartBlock, *actualNumBlocks);
1639				}
1640				check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC);
1641			}
1642
1643			return err;
1644		}
1645	}
1646
1647	/*
1648	 * We may have failed to grow at the end of the file.  We'll try to find
1649	 * appropriate free extents, searching by size in the normal allocation zone.
1650	 *
1651	 * However, if we're allocating on behalf of a sparse device that hasn't explicitly
1652	 * requested a contiguous chunk, then we try to search by offset, even if it
1653	 * means fragmenting the file.  We want all available entries starting
1654	 * from the front of the disk to avoid creating new bandfiles.  As a result,
1655	 * we'll start by searching the offset tree rather than the normal length
1656	 * tree. Note that this function can be invoked from BlockAllocateAny, in
1657	 * which the minimum block size is 1 block, making it easy to succeed.
1658	 */
1659	search_sentinel.offset = hfsmp->hfs_metazone_end;
1660	search_sentinel.length = minBlocks;
1661
1662	if ((vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) && (forceContig == 0)) {
1663		/* just start with the first offset node */
1664		node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel);
1665	}
1666	else {
1667		/*
1668		 * Otherwise, start from the end of the metadata zone or our next allocation pointer,
1669		 * and try to find the first chunk of size >= min.
1670		 */
1671		node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel);
1672
1673		if (node == NULL) {
1674			extent_node_t *metaend_node;
1675			/*
1676			 * Maybe there's a free extent coalesced with the space still in the metadata
1677			 * zone.  If there is, find it and allocate from the middle of it, starting at
1678			 * the end of the metadata zone.
1679			 *
1680			 * If search_prev yields a result that is not offset == metazone_end, then that
1681			 * means no node existed at that offset.  If the previous node's offset + length crosses
1682			 * the metazone boundary, then allocate from there.  If it is too small to
1683			 * cross the metazone boundary, then it is of no importance and we'd have to
1684			 * report ENOSPC.
1685			 */
1686			metaend_node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
1687
1688			if ((metaend_node) && (metaend_node->offset < hfsmp->hfs_metazone_end)) {
1689				u_int32_t node_end = metaend_node->offset + metaend_node->length;
1690				if (node_end > hfsmp->hfs_metazone_end) {
1691					u_int32_t modified_length = node_end - hfsmp->hfs_metazone_end;
1692					if (modified_length >= minBlocks) {
1693						/*
1694						 * Then we can allocate it.  Fill in the contents into tempnode,
1695						 * and BlockMarkAllocatedRBTree below will take care of the rest.
1696						 */
1697						tempnode.offset = hfsmp->hfs_metazone_end;
1698						tempnode.length = MIN(minBlocks, node_end - tempnode.offset);
1699						node = &tempnode;
1700					}
1701				}
1702			}
1703		}
1704	}
1705
1706	 /* If we can't find anything useful, search the metadata zone as a last resort. */
1707
1708	if ((!node) && useMetaZone) {
1709		search_sentinel.offset = 0;
1710		search_sentinel.length = minBlocks;
1711		node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel);
1712	}
1713
1714	/* If we found something useful, then go ahead and update the bitmap */
1715	if ((node) && (node->length >= minBlocks)) {
1716		*actualStartBlock = node->offset;
1717		if (node->length >= maxBlocks) {
1718			*actualNumBlocks = maxBlocks;
1719		}
1720		else {
1721			*actualNumBlocks = node->length;
1722		}
1723
1724		err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks);
1725
1726		if (err == noErr) {
1727			if ( ALLOC_DEBUG ) {
1728				REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1729				if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
1730					panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
1731						   *actualStartBlock, *actualNumBlocks);
1732				}
1733				check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC);
1734			}
1735		}
1736	}
1737	else {
1738		int destroy_trees = 0;
1739		/*
1740		 * TODO: Add High-water mark check here.  If we couldn't find anything useful,
1741		 * when do we tear down the tree?  Or should the logic be in BlockAllocateContig??
1742		 */
1743		if (destroy_trees) {
1744			DestroyTrees(VCBTOHFS(vcb));
1745			/* Reset the Free Ext Cache since we'll be using it now. */
1746			ResetVCBFreeExtCache(VCBTOHFS(vcb));
1747		}
1748
1749		if (ALLOC_DEBUG) {
1750			printf("HFS allocator: No space on FS (%s). Node  %p Start %d Min %d, Max %d, Tree still alive.\n",
1751				   hfsmp->vcbVN, node, startingBlock, minBlocks, maxBlocks);
1752
1753			/* Dump the list ? */
1754			extent_tree_offset_print(&hfsmp->offset_tree);
1755
1756			printf("HFS allocator: Done printing list on FS (%s). Min %d, Max %d, Tree still alive.\n",
1757				   hfsmp->vcbVN, minBlocks, maxBlocks);
1758
1759
1760
1761		}
1762		err = dskFulErr;
1763	}
1764
1765	if (err == noErr) {
1766		if (ALLOC_DEBUG) {
1767			if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
1768				panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
1769		}
1770	}
1771	else {
1772		*actualStartBlock = 0;
1773		*actualNumBlocks = 0;
1774	}
1775
1776	return err;
1777
1778}
1779#endif
1780
1781
1782
1783/*
1784_______________________________________________________________________
1785
1786Routine:	BlockAllocateAny
1787
1788Function:	Allocate one or more allocation blocks.  If there are fewer
1789			free blocks than requested, all free blocks will be
1790			allocated.  The caller guarantees that there is at least
1791			one free block.
1792
1793Inputs:
1794	vcb				Pointer to volume where space is to be allocated
1795	startingBlock	Preferred first block for allocation
1796	endingBlock		Last block to check + 1
1797	maxBlocks		Maximum number of contiguous blocks to allocate
1798	useMetaZone
1799
1800Outputs:
1801	actualStartBlock	First block of range allocated, or 0 if error
1802	actualNumBlocks		Number of blocks allocated, or 0 if error
1803_______________________________________________________________________
1804*/
1805
1806/*
1807 * BlockAllocateAny acts as a multiplexer between BlockAllocateAnyRBTree
1808 * and BlockAllocateAnyBitmap, which uses the bitmap scanning logic.
1809 */
1810
1811static OSErr BlockAllocateAny(
1812	ExtendedVCB		*vcb,
1813	u_int32_t		startingBlock,
1814	register u_int32_t	endingBlock,
1815	u_int32_t		maxBlocks,
1816	Boolean			useMetaZone,
1817	u_int32_t		*actualStartBlock,
1818	u_int32_t		*actualNumBlocks)
1819{
1820
1821#if CONFIG_HFS_ALLOC_RBTREE
1822	if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
1823		return BlockAllocateAnyRBTree(vcb, startingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
1824	}
1825#endif
1826	return BlockAllocateAnyBitmap(vcb, startingBlock, endingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
1827
1828}
1829
1830
1831#if CONFIG_HFS_ALLOC_RBTREE
1832/*
1833 * BlockAllocateAnyRBTree finds one or more allocation blocks by using
1834 * the red-black allocation tree to figure out where the free ranges are.
1835 * This function is typically used as a last resort becuase we were unable to
1836 * find the right ranges.  Outputs are the same as BlockAllocateAnyBitmap.
1837 */
1838static OSErr BlockAllocateAnyRBTree(
1839	ExtendedVCB		*vcb,
1840	u_int32_t		startingBlock,
1841	u_int32_t		maxBlocks,
1842	Boolean			useMetaZone,
1843	u_int32_t		*actualStartBlock,
1844	u_int32_t		*actualNumBlocks)
1845{
1846	OSErr err;
1847
1848	/*
1849	 * BlockAllocateContig
1850	 */
1851	/* If we're using the red-black tree, try searching at the specified offsets. */
1852	err = BlockAllocateContigRBTree(vcb, startingBlock, 1, maxBlocks, useMetaZone,
1853									actualStartBlock, actualNumBlocks, 0);
1854	return err;
1855
1856}
1857#endif
1858
1859/*
1860 * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out
1861 * where the free allocation blocks are.  Inputs and outputs are the same as for
1862 * BlockAllocateAny and BlockAllocateAnyRBTree
1863 */
1864
1865static OSErr BlockAllocateAnyBitmap(
1866	ExtendedVCB		*vcb,
1867	u_int32_t		startingBlock,
1868	register u_int32_t	endingBlock,
1869	u_int32_t		maxBlocks,
1870	Boolean			useMetaZone,
1871	u_int32_t		*actualStartBlock,
1872	u_int32_t		*actualNumBlocks)
1873{
1874	OSErr			err;
1875	register u_int32_t	block;			//	current block number
1876	register u_int32_t	currentWord;	//	Pointer to current word within bitmap block
1877	register u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
1878	register u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
1879	u_int32_t  *buffer = NULL;
1880	u_int32_t  *currCache = NULL;
1881	uintptr_t  blockRef;
1882	u_int32_t  bitsPerBlock;
1883	u_int32_t  wordsPerBlock;
1884	Boolean dirty = false;
1885	struct hfsmount *hfsmp = VCBTOHFS(vcb);
1886
1887	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1888		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
1889
1890	/*
1891	 * When we're skipping the metadata zone and the start/end
1892	 * range overlaps with the metadata zone then adjust the
1893	 * start to be outside of the metadata zone.  If the range
1894	 * is entirely inside the metadata zone then we can deny the
1895	 * request (dskFulErr).
1896	 */
1897	if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
1898		if (startingBlock <= vcb->hfs_metazone_end) {
1899			if (endingBlock > (vcb->hfs_metazone_end + 2))
1900				startingBlock = vcb->hfs_metazone_end + 1;
1901			else {
1902				err = dskFulErr;
1903				goto Exit;
1904			}
1905		}
1906	}
1907
1908	//	Since this routine doesn't wrap around
1909	if (maxBlocks > (endingBlock - startingBlock)) {
1910		maxBlocks = endingBlock - startingBlock;
1911	}
1912
1913	//
1914	//	Pre-read the first bitmap block
1915	//
1916	err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef);
1917	if (err != noErr) goto Exit;
1918	buffer = currCache;
1919
1920	//
1921	//	Set up the current position within the block
1922	//
1923	{
1924		u_int32_t wordIndexInBlock;
1925
1926		bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
1927		wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
1928
1929		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
1930		buffer += wordIndexInBlock;
1931		wordsLeft = wordsPerBlock - wordIndexInBlock;
1932		currentWord = SWAP_BE32 (*buffer);
1933		bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
1934	}
1935
1936	//
1937	//	Find the first unallocated block
1938	//
1939	block=startingBlock;
1940	while (block < endingBlock) {
1941		if ((currentWord & bitMask) == 0)
1942			break;
1943
1944		//	Next bit
1945		++block;
1946		bitMask >>= 1;
1947		if (bitMask == 0) {
1948			//	Next word
1949			bitMask = kHighBitInWordMask;
1950			++buffer;
1951
1952			if (--wordsLeft == 0) {
1953				//	Next block
1954				buffer = currCache = NULL;
1955				err = ReleaseBitmapBlock(vcb, blockRef, false);
1956				if (err != noErr) goto Exit;
1957
1958				/*
1959				 * Skip over metadata blocks.
1960				 */
1961				if (!useMetaZone) {
1962					block = NextBitmapBlock(vcb, block);
1963				}
1964				if (block >= endingBlock) {
1965					err = dskFulErr;
1966					goto Exit;
1967				}
1968
1969				err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
1970				if (err != noErr) goto Exit;
1971				buffer = currCache;
1972
1973				wordsLeft = wordsPerBlock;
1974			}
1975			currentWord = SWAP_BE32 (*buffer);
1976		}
1977	}
1978
1979	//	Did we get to the end of the bitmap before finding a free block?
1980	//	If so, then couldn't allocate anything.
1981	if (block >= endingBlock) {
1982		err = dskFulErr;
1983		goto Exit;
1984	}
1985
1986	//	Return the first block in the allocated range
1987	*actualStartBlock = block;
1988	dirty = true;
1989
1990	//	If we could get the desired number of blocks before hitting endingBlock,
1991	//	then adjust endingBlock so we won't keep looking.  Ideally, the comparison
1992	//	would be (block + maxBlocks) < endingBlock, but that could overflow.  The
1993	//	comparison below yields identical results, but without overflow.
1994	if (block < (endingBlock-maxBlocks)) {
1995		endingBlock = block + maxBlocks;	//	if we get this far, we've found enough
1996	}
1997
1998	// XXXdbg
1999	if (hfsmp->jnl) {
2000		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2001	}
2002
2003	//
2004	//	Allocate all of the consecutive blocks
2005	//
2006	while ((currentWord & bitMask) == 0) {
2007		//	Allocate this block
2008		currentWord |= bitMask;
2009
2010		//	Move to the next block.  If no more, then exit.
2011		++block;
2012		if (block == endingBlock)
2013			break;
2014
2015		//	Next bit
2016		bitMask >>= 1;
2017		if (bitMask == 0) {
2018			*buffer = SWAP_BE32 (currentWord);					//	update value in bitmap
2019
2020			//	Next word
2021			bitMask = kHighBitInWordMask;
2022			++buffer;
2023
2024			if (--wordsLeft == 0) {
2025				//	Next block
2026				buffer = currCache = NULL;
2027				err = ReleaseBitmapBlock(vcb, blockRef, true);
2028				if (err != noErr) goto Exit;
2029
2030				/*
2031				 * Skip over metadata blocks.
2032				 */
2033				if (!useMetaZone) {
2034					u_int32_t nextBlock;
2035
2036					nextBlock = NextBitmapBlock(vcb, block);
2037					if (nextBlock != block) {
2038						goto Exit;  /* allocation gap, so stop */
2039					}
2040				}
2041
2042				err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
2043				if (err != noErr) goto Exit;
2044				buffer = currCache;
2045
2046				// XXXdbg
2047				if (hfsmp->jnl) {
2048					journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2049				}
2050
2051				wordsLeft = wordsPerBlock;
2052			}
2053
2054			currentWord = SWAP_BE32 (*buffer);
2055		}
2056	}
2057	*buffer = SWAP_BE32 (currentWord);							//	update the last change
2058
2059Exit:
2060	if (err == noErr) {
2061		*actualNumBlocks = block - *actualStartBlock;
2062
2063		// sanity check
2064		if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
2065			panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
2066		}
2067
2068		/*
2069		 * Beware!
2070		 * Because this function directly manipulates the bitmap to mark the
2071		 * blocks it came across as allocated, we must inform the journal (and
2072		 * subsequently, the journal's trim list) that we are allocating these
2073		 * blocks, just like in BlockMarkAllocatedInternal.  hfs_unmap_alloc_extent
2074		 * and the functions it calls will serialize behind the journal trim list lock
2075		 * to ensure that either the asynchronous flush/TRIM/UNMAP happens prior to
2076		 * us manipulating the trim list, or we get there first and successfully remove
2077		 * these bitmap blocks before the TRIM happens.
2078		 */
2079		hfs_unmap_alloc_extent (vcb, *actualStartBlock, *actualNumBlocks);
2080	}
2081	else {
2082		*actualStartBlock = 0;
2083		*actualNumBlocks = 0;
2084	}
2085
2086	if (currCache)
2087		(void) ReleaseBitmapBlock(vcb, blockRef, dirty);
2088
2089	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2090		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2091
2092	return err;
2093}
2094
2095
2096/*
2097_______________________________________________________________________
2098
2099Routine:	BlockAllocateKnown
2100
2101Function:	Try to allocate space from known free space in the free
2102			extent cache.
2103
2104Inputs:
2105	vcb				Pointer to volume where space is to be allocated
2106	maxBlocks		Maximum number of contiguous blocks to allocate
2107
2108Outputs:
2109	actualStartBlock	First block of range allocated, or 0 if error
2110	actualNumBlocks		Number of blocks allocated, or 0 if error
2111
2112Returns:
2113	dskFulErr		Free extent cache is empty
2114_______________________________________________________________________
2115*/
2116
2117static OSErr BlockAllocateKnown(
2118	ExtendedVCB		*vcb,
2119	u_int32_t		maxBlocks,
2120	u_int32_t		*actualStartBlock,
2121	u_int32_t		*actualNumBlocks)
2122{
2123	OSErr			err;
2124	u_int32_t		foundBlocks;
2125
2126	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2127		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
2128
2129	HFS_MOUNT_LOCK(vcb, TRUE);
2130	lck_spin_lock(&vcb->vcbFreeExtLock);
2131	if ((hfs_isrbtree_active(vcb) == true) ||
2132		vcb->vcbFreeExtCnt == 0 ||
2133	    vcb->vcbFreeExt[0].blockCount == 0) {
2134		lck_spin_unlock(&vcb->vcbFreeExtLock);
2135		HFS_MOUNT_UNLOCK(vcb, TRUE);
2136		if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2137			KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
2138		return dskFulErr;
2139	}
2140	lck_spin_unlock(&vcb->vcbFreeExtLock);
2141	HFS_MOUNT_UNLOCK(vcb, TRUE);
2142
2143	lck_spin_lock(&vcb->vcbFreeExtLock);
2144
2145	//	Just grab up to maxBlocks of the first (largest) free exent.
2146	*actualStartBlock = vcb->vcbFreeExt[0].startBlock;
2147	foundBlocks = vcb->vcbFreeExt[0].blockCount;
2148	if (foundBlocks > maxBlocks)
2149		foundBlocks = maxBlocks;
2150	*actualNumBlocks = foundBlocks;
2151
2152	lck_spin_unlock(&vcb->vcbFreeExtLock);
2153
2154	remove_free_extent_cache(vcb, *actualStartBlock, *actualNumBlocks);
2155
2156	// sanity check
2157	if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
2158	{
2159		printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
2160		hfs_mark_volume_inconsistent(vcb);
2161		*actualStartBlock = 0;
2162		*actualNumBlocks = 0;
2163		err = EIO;
2164	}
2165	else
2166	{
2167		//
2168		//	Now mark the found extent in the bitmap
2169		//
2170		err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
2171	}
2172
2173	sanity_check_free_ext(vcb, 0);
2174
2175	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2176		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2177
2178	return err;
2179}
2180
2181/*
2182 * BlockMarkAllocated
2183 *
2184 * This is a wrapper function around the internal calls which will actually mark the blocks
2185 * as in-use.  It will mark the blocks in the red-black tree if appropriate.  We need to do
2186 * this logic here to avoid callers having to deal with whether or not the red-black tree
2187 * is enabled.
2188 */
2189
2190
2191OSErr BlockMarkAllocated(
2192	ExtendedVCB		*vcb,
2193	u_int32_t		startingBlock,
2194	register u_int32_t	numBlocks)
2195{
2196	struct hfsmount *hfsmp;
2197
2198	hfsmp = VCBTOHFS(vcb);
2199#if CONFIG_HFS_ALLOC_RBTREE
2200	if (hfs_isrbtree_active(hfsmp)) {
2201		int err;
2202
2203		if ((startingBlock >= hfsmp->offset_block_end) &&
2204			(hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
2205			/*
2206			 * We're manipulating a portion of the bitmap that is not controlled by the
2207			 * red-black tree.  Just update the bitmap and don't bother manipulating the tree
2208			 */
2209			goto justbitmap;
2210		}
2211
2212		err = BlockMarkAllocatedRBTree(vcb, startingBlock, numBlocks);
2213		if (err == noErr) {
2214			if ( ALLOC_DEBUG ) {
2215				REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
2216				if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2217					panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
2218						   startingBlock, numBlocks);
2219				}
2220				check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_ALLOC);
2221			}
2222		}
2223		return err;
2224
2225	}
2226justbitmap:
2227#endif
2228
2229	return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks);
2230
2231}
2232
2233
2234
2235/*
2236_______________________________________________________________________
2237
2238Routine:	BlockMarkAllocatedInternal
2239
2240Function:	Mark a contiguous group of blocks as allocated (set in the
2241			bitmap).  It assumes those bits are currently marked
2242			deallocated (clear in the bitmap).  Note that this function
2243			must be called regardless of whether or not the bitmap or
2244			tree-based allocator is used, as all allocations must correctly
2245			be marked on-disk.  If the tree-based approach is running, then
2246			this will be done before the node is removed from the tree.
2247
2248Inputs:
2249	vcb				Pointer to volume where space is to be allocated
2250	startingBlock	First block number to mark as allocated
2251	numBlocks		Number of blocks to mark as allocated
2252_______________________________________________________________________
2253*/
2254static
2255OSErr BlockMarkAllocatedInternal (
2256	ExtendedVCB		*vcb,
2257	u_int32_t		startingBlock,
2258	register u_int32_t	numBlocks)
2259{
2260	OSErr			err;
2261	register u_int32_t	*currentWord;	//	Pointer to current word within bitmap block
2262	register u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
2263	register u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
2264	u_int32_t		firstBit;		//	Bit index within word of first bit to allocate
2265	u_int32_t		numBits;		//	Number of bits in word to allocate
2266	u_int32_t		*buffer = NULL;
2267	uintptr_t  blockRef;
2268	u_int32_t  bitsPerBlock;
2269	u_int32_t  wordsPerBlock;
2270	// XXXdbg
2271	struct hfsmount *hfsmp = VCBTOHFS(vcb);
2272
2273	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2274		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
2275
2276	hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
2277
2278	//
2279	//	Pre-read the bitmap block containing the first word of allocation
2280	//
2281
2282	err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2283	if (err != noErr) goto Exit;
2284	//
2285	//	Initialize currentWord, and wordsLeft.
2286	//
2287	{
2288		u_int32_t wordIndexInBlock;
2289
2290		bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
2291		wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2292
2293		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2294		currentWord = buffer + wordIndexInBlock;
2295		wordsLeft = wordsPerBlock - wordIndexInBlock;
2296	}
2297
2298	// XXXdbg
2299	if (hfsmp->jnl) {
2300		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2301	}
2302
2303	//
2304	//	If the first block to allocate doesn't start on a word
2305	//	boundary in the bitmap, then treat that first word
2306	//	specially.
2307	//
2308
2309	firstBit = startingBlock % kBitsPerWord;
2310	if (firstBit != 0) {
2311		bitMask = kAllBitsSetInWord >> firstBit;	//	turn off all bits before firstBit
2312		numBits = kBitsPerWord - firstBit;			//	number of remaining bits in this word
2313		if (numBits > numBlocks) {
2314			numBits = numBlocks;					//	entire allocation is inside this one word
2315			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));	//	turn off bits after last
2316		}
2317#if DEBUG_BUILD
2318		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2319			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2320		}
2321#endif
2322		*currentWord |= SWAP_BE32 (bitMask);		//	set the bits in the bitmap
2323		numBlocks -= numBits;						//	adjust number of blocks left to allocate
2324
2325		++currentWord;								//	move to next word
2326		--wordsLeft;								//	one less word left in this block
2327	}
2328
2329	//
2330	//	Allocate whole words (32 blocks) at a time.
2331	//
2332
2333	bitMask = kAllBitsSetInWord;					//	put this in a register for 68K
2334	while (numBlocks >= kBitsPerWord) {
2335		if (wordsLeft == 0) {
2336			//	Read in the next bitmap block
2337			startingBlock += bitsPerBlock;			//	generate a block number in the next bitmap block
2338
2339			buffer = NULL;
2340			err = ReleaseBitmapBlock(vcb, blockRef, true);
2341			if (err != noErr) goto Exit;
2342
2343			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2344			if (err != noErr) goto Exit;
2345
2346			// XXXdbg
2347			if (hfsmp->jnl) {
2348				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2349			}
2350
2351			//	Readjust currentWord and wordsLeft
2352			currentWord = buffer;
2353			wordsLeft = wordsPerBlock;
2354		}
2355#if DEBUG_BUILD
2356		if (*currentWord != 0) {
2357			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2358		}
2359#endif
2360		*currentWord = SWAP_BE32 (bitMask);
2361		numBlocks -= kBitsPerWord;
2362
2363		++currentWord;								//	move to next word
2364		--wordsLeft;								//	one less word left in this block
2365	}
2366
2367	//
2368	//	Allocate any remaining blocks.
2369	//
2370
2371	if (numBlocks != 0) {
2372		bitMask = ~(kAllBitsSetInWord >> numBlocks);	//	set first numBlocks bits
2373		if (wordsLeft == 0) {
2374			//	Read in the next bitmap block
2375			startingBlock += bitsPerBlock;				//	generate a block number in the next bitmap block
2376
2377			buffer = NULL;
2378			err = ReleaseBitmapBlock(vcb, blockRef, true);
2379			if (err != noErr) goto Exit;
2380
2381			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2382			if (err != noErr) goto Exit;
2383
2384			// XXXdbg
2385			if (hfsmp->jnl) {
2386				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2387			}
2388
2389			//	Readjust currentWord and wordsLeft
2390			currentWord = buffer;
2391			wordsLeft = wordsPerBlock;
2392		}
2393#if DEBUG_BUILD
2394		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2395			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2396		}
2397#endif
2398		*currentWord |= SWAP_BE32 (bitMask);			//	set the bits in the bitmap
2399
2400		//	No need to update currentWord or wordsLeft
2401	}
2402
2403Exit:
2404
2405	if (buffer)
2406		(void)ReleaseBitmapBlock(vcb, blockRef, true);
2407
2408	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2409		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
2410
2411	return err;
2412}
2413
2414#if CONFIG_HFS_ALLOC_RBTREE
2415/*
2416 * This is a wrapper function around BlockMarkAllocated.  This function is
2417 * called when the RB Tree-based allocator needs to mark a block as in-use.
2418 * This function should take the locks that would not normally be
2419 * necessary for the normal bitmap allocator, and then call the function.  Once
2420 * the on-disk data structures are updated properly, then this will remove the
2421 * appropriate node from the tree.
2422 */
2423
2424static OSErr BlockMarkAllocatedRBTree(
2425	ExtendedVCB		*vcb,
2426	u_int32_t		startingBlock,
2427	u_int32_t		numBlocks)
2428{
2429	OSErr err;
2430	struct hfsmount *hfsmp  = VCBTOHFS(vcb);
2431	int rb_err = 0;
2432
2433
2434	if (ALLOC_DEBUG) {
2435		REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
2436		if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2437			panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n",
2438				   startingBlock, numBlocks);
2439		}
2440		check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE);
2441	}
2442
2443	err = BlockMarkAllocatedInternal (vcb, startingBlock, numBlocks);
2444
2445	if (err == noErr) {
2446
2447		if (ALLOC_DEBUG) {
2448			if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2449				panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet!\n",
2450					   startingBlock, numBlocks);
2451			}
2452		}
2453
2454		/*
2455		 * Mark the blocks in the offset tree.
2456		 */
2457		rb_err = extent_tree_offset_alloc_space(&hfsmp->offset_tree, numBlocks, startingBlock);
2458		if (rb_err) {
2459			if (ALLOC_DEBUG) {
2460				printf("HFS RBTree Allocator: Could not mark blocks as in-use! %d \n", rb_err);
2461			}
2462
2463			/*
2464			 * We may be called from the BlockMarkAllocated interface, in which case, they would
2465			 * not be picking extents from their start. Do a check here, find if the specified
2466			 * extent is free, and if it is, then find the containing node.
2467			 */
2468			extent_node_t *node = NULL;
2469			extent_node_t search_sentinel;
2470			search_sentinel.offset = startingBlock;
2471
2472			node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
2473
2474			if (node) {
2475				rb_err = extent_tree_offset_alloc_unaligned (&hfsmp->offset_tree, numBlocks, startingBlock);
2476			}
2477
2478			if (ALLOC_DEBUG) {
2479				if (rb_err) {
2480					printf ("HFS RBTree Allocator: Still Couldn't mark blocks as in-use! %d\n", rb_err);
2481				}
2482			}
2483		}
2484		if (ALLOC_DEBUG) {
2485			check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC);
2486		}
2487	}
2488
2489	/*
2490	 * If we encountered a red-black tree error, for now, we immediately back off and force
2491	 * destruction of rb-tree.  Set the persistent error-detected bit in the mount point.
2492	 * That will ensure that even if we reach a low-water-mark in the future we will still
2493	 * not allow the rb-tree to be used.  On next mount, we will force a re-construction from
2494	 * on-disk state.  As a fallback, we will now resort to the bitmap-scanning behavior.
2495	 */
2496	if (rb_err) {
2497		/* Mark RB-Trees with error */
2498		hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
2499		DestroyTrees(hfsmp);
2500		/* Reset the Free Ext Cache since we'll be using it now. */
2501		ResetVCBFreeExtCache(hfsmp);
2502		printf("HFS: Red-Black Allocator Tree BlockMarkAllocated error\n");
2503	}
2504
2505	return err;
2506}
2507#endif
2508
2509
2510
2511/*
2512 * BlockMarkFree
2513 *
2514 * This is a wrapper function around the internal calls which will actually mark the blocks
2515 * as freed.  It will mark the blocks in the red-black tree if appropriate.  We need to do
2516 * this logic here to avoid callers having to deal with whether or not the red-black tree
2517 * is enabled.
2518 *
2519 */
2520OSErr BlockMarkFree(
2521	ExtendedVCB		*vcb,
2522	u_int32_t		startingBlock,
2523	register u_int32_t	numBlocks)
2524{
2525	struct hfsmount *hfsmp;
2526	hfsmp = VCBTOHFS(vcb);
2527#if CONFIG_HFS_ALLOC_RBTREE
2528	if (hfs_isrbtree_active(hfsmp)) {
2529		int err;
2530
2531		if ((startingBlock >= hfsmp->offset_block_end) &&
2532			(hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
2533			/*
2534			 * We're manipulating a portion of the bitmap that is not controlled by the
2535			 * red-black tree.  Just update the bitmap and don't bother manipulating the tree
2536			 */
2537			goto justbitmap;
2538		}
2539
2540		err = BlockMarkFreeRBTree(vcb, startingBlock, numBlocks);
2541		if (err == noErr) {
2542			if ( ALLOC_DEBUG ) {
2543				REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
2544				if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2545					panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use!\n",
2546						   startingBlock, numBlocks);
2547				}
2548				check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_FREE);
2549			}
2550		}
2551		return err;
2552	}
2553justbitmap:
2554#endif
2555	return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
2556
2557}
2558
2559
2560/*
2561 * BlockMarkFreeUnused
2562 *
2563 * Scan the bitmap block beyond end of current file system for bits
2564 * that are marked as used.  If any of the bits are marked as used,
2565 * this function marks them free.
2566 *
2567 * Note:  This was specifically written to mark all bits beyond
2568 * end of current file system during hfs_extendfs(), which makes
2569 * sure that all the new blocks added to the file system are
2570 * marked as free.   We expect that all the blocks beyond end of
2571 * current file system are always marked as free, but there might
2572 * be cases where are marked as used.  This function assumes that
2573 * the number of blocks marked as used incorrectly are relatively
2574 * small, otherwise this can overflow journal transaction size
2575 * on certain file system configurations (example, large unused
2576 * bitmap with relatively small journal).
2577 *
2578 * Input:
2579 * 	startingBlock: First block of the range to mark unused
2580 * 	numBlocks: Number of blocks in the range to mark unused
2581 *
2582 * Returns: zero on success, non-zero on error.
2583 */
2584OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t	numBlocks)
2585{
2586	int error = 0;
2587	struct hfsmount *hfsmp = VCBTOHFS(vcb);
2588	u_int32_t curNumBlocks;
2589	u_int32_t  bitsPerBlock;
2590	u_int32_t lastBit;
2591
2592	/* Use the optimal bitmap I/O size instead of bitmap block size */
2593	bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
2594
2595	/*
2596	 * First clear any non bitmap allocation block aligned bits
2597	 *
2598	 * Calculate the first bit in the bitmap block next to
2599	 * the bitmap block containing the bit for startingBlock.
2600	 * Using this value, we calculate the total number of
2601	 * bits to be marked unused from startingBlock to the
2602	 * end of bitmap block containing startingBlock.
2603	 */
2604	lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
2605	curNumBlocks = lastBit - startingBlock;
2606	if (curNumBlocks > numBlocks) {
2607		curNumBlocks = numBlocks;
2608	}
2609	error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2610	if (error) {
2611		return error;
2612	}
2613	startingBlock += curNumBlocks;
2614	numBlocks -= curNumBlocks;
2615
2616	/*
2617	 * Check a full bitmap block for any 'used' bit.  If any bit is used,
2618	 * mark all the bits only in that bitmap block as free.  This ensures
2619	 * that we do not write unmodified bitmap blocks and do not
2620	 * overwhelm the journal.
2621	 *
2622	 * The code starts by checking full bitmap block at a time, and
2623	 * marks entire bitmap block as free only if any bit in that bitmap
2624	 * block is marked as used.  In the end, it handles the last bitmap
2625	 * block which might be partially full by only checking till the
2626	 * caller-specified last bit and if any bit is set, only mark that
2627	 * range as free.
2628	 */
2629	while (numBlocks) {
2630		if (numBlocks >= bitsPerBlock) {
2631			curNumBlocks = bitsPerBlock;
2632		} else {
2633			curNumBlocks = numBlocks;
2634		}
2635		if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
2636			error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2637			if (error) {
2638				return error;
2639			}
2640		}
2641		startingBlock += curNumBlocks;
2642		numBlocks -= curNumBlocks;
2643	}
2644
2645	return error;
2646}
2647
2648/*
2649_______________________________________________________________________
2650
2651Routine:	BlockMarkFreeInternal
2652
2653Function:	Mark a contiguous group of blocks as free (clear in the
2654			bitmap).  It assumes those bits are currently marked
2655			allocated (set in the bitmap).
2656
2657Inputs:
2658	vcb				Pointer to volume where space is to be freed
2659	startingBlock	First block number to mark as freed
2660	numBlocks		Number of blocks to mark as freed
2661	do_validate 	If true, validate that the blocks being
2662					deallocated to check if they are within totalBlocks
2663					for current volume and whether they were allocated
2664					before they are marked free.
2665_______________________________________________________________________
2666*/
2667static
2668OSErr BlockMarkFreeInternal(
2669	ExtendedVCB		*vcb,
2670	u_int32_t		startingBlock_in,
2671	register u_int32_t	numBlocks_in,
2672	Boolean 		do_validate)
2673{
2674	OSErr		err;
2675	u_int32_t	startingBlock = startingBlock_in;
2676	u_int32_t	numBlocks = numBlocks_in;
2677	uint32_t	unmapStart = startingBlock_in;
2678	uint32_t	unmapCount = numBlocks_in;
2679	uint32_t	wordIndexInBlock;
2680	u_int32_t	*currentWord;	//	Pointer to current word within bitmap block
2681	u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
2682	u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
2683	u_int32_t	currentBit;		//	Bit index within word of current bit to allocate
2684	u_int32_t	numBits;		//	Number of bits in word to allocate
2685	u_int32_t	*buffer = NULL;
2686	uintptr_t	blockRef;
2687	u_int32_t	bitsPerBlock;
2688	u_int32_t	wordsPerBlock;
2689    // XXXdbg
2690	struct hfsmount *hfsmp = VCBTOHFS(vcb);
2691
2692	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2693		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
2694
2695	/*
2696	 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
2697	 * need to be able to free blocks being relocated during hfs_truncatefs.
2698	 */
2699	if ((do_validate == true) &&
2700	    (startingBlock + numBlocks > vcb->totalBlocks)) {
2701		if (ALLOC_DEBUG) {
2702			panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
2703		}
2704
2705		printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
2706		hfs_mark_volume_inconsistent(vcb);
2707		err = EIO;
2708		goto Exit;
2709	}
2710
2711	//
2712	//	Pre-read the bitmap block containing the first word of allocation
2713	//
2714
2715	err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2716	if (err != noErr) goto Exit;
2717	// XXXdbg
2718	if (hfsmp->jnl) {
2719		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2720	}
2721
2722	//
2723	//	Figure out how many bits and words per bitmap block.
2724	//
2725	bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
2726	wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2727	wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2728
2729	//
2730	// Look for a range of free blocks immediately before startingBlock
2731	// (up to the start of the current bitmap block).  Set unmapStart to
2732	// the first free block.
2733	//
2734	currentWord = buffer + wordIndexInBlock;
2735	currentBit = startingBlock % kBitsPerWord;
2736	bitMask = kHighBitInWordMask >> currentBit;
2737	while (true) {
2738		// Move currentWord/bitMask back by one bit
2739		bitMask <<= 1;
2740		if (bitMask == 0) {
2741			if (--currentWord < buffer)
2742				break;
2743			bitMask = kLowBitInWordMask;
2744		}
2745
2746		if (*currentWord & SWAP_BE32(bitMask))
2747			break;	// Found an allocated block.  Stop searching.
2748		--unmapStart;
2749		++unmapCount;
2750	}
2751
2752	//
2753	//	If the first block to free doesn't start on a word
2754	//	boundary in the bitmap, then treat that first word
2755	//	specially.
2756	//
2757
2758	currentWord = buffer + wordIndexInBlock;
2759	wordsLeft = wordsPerBlock - wordIndexInBlock;
2760	currentBit = startingBlock % kBitsPerWord;
2761	if (currentBit != 0) {
2762		bitMask = kAllBitsSetInWord >> currentBit;	//	turn off all bits before currentBit
2763		numBits = kBitsPerWord - currentBit;		//	number of remaining bits in this word
2764		if (numBits > numBlocks) {
2765			numBits = numBlocks;					//	entire allocation is inside this one word
2766			bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits));	//	turn off bits after last
2767		}
2768		if ((do_validate == true) &&
2769		    (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
2770			goto Corruption;
2771		}
2772		*currentWord &= SWAP_BE32 (~bitMask);		//	clear the bits in the bitmap
2773		numBlocks -= numBits;						//	adjust number of blocks left to free
2774
2775		++currentWord;								//	move to next word
2776		--wordsLeft;								//	one less word left in this block
2777	}
2778
2779	//
2780	//	Free whole words (32 blocks) at a time.
2781	//
2782
2783	while (numBlocks >= kBitsPerWord) {
2784		if (wordsLeft == 0) {
2785			//	Read in the next bitmap block
2786			startingBlock += bitsPerBlock;			//	generate a block number in the next bitmap block
2787
2788			buffer = NULL;
2789			err = ReleaseBitmapBlock(vcb, blockRef, true);
2790			if (err != noErr) goto Exit;
2791
2792			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2793			if (err != noErr) goto Exit;
2794
2795			// XXXdbg
2796			if (hfsmp->jnl) {
2797				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2798			}
2799
2800			//	Readjust currentWord and wordsLeft
2801			currentWord = buffer;
2802			wordsLeft = wordsPerBlock;
2803		}
2804		if ((do_validate == true) &&
2805		    (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
2806			goto Corruption;
2807		}
2808		*currentWord = 0;							//	clear the entire word
2809		numBlocks -= kBitsPerWord;
2810
2811		++currentWord;								//	move to next word
2812		--wordsLeft;									//	one less word left in this block
2813	}
2814
2815	//
2816	//	Free any remaining blocks.
2817	//
2818
2819	if (numBlocks != 0) {
2820		bitMask = ~(kAllBitsSetInWord >> numBlocks);	//	set first numBlocks bits
2821		if (wordsLeft == 0) {
2822			//	Read in the next bitmap block
2823			startingBlock += bitsPerBlock;				//	generate a block number in the next bitmap block
2824
2825			buffer = NULL;
2826			err = ReleaseBitmapBlock(vcb, blockRef, true);
2827			if (err != noErr) goto Exit;
2828
2829			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2830			if (err != noErr) goto Exit;
2831
2832			// XXXdbg
2833			if (hfsmp->jnl) {
2834				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2835			}
2836
2837			//	Readjust currentWord and wordsLeft
2838			currentWord = buffer;
2839			wordsLeft = wordsPerBlock;
2840		}
2841		if ((do_validate == true) &&
2842		    (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
2843			goto Corruption;
2844		}
2845		*currentWord &= SWAP_BE32 (~bitMask);			//	clear the bits in the bitmap
2846
2847		//	No need to update currentWord or wordsLeft
2848	}
2849
2850	//
2851	// Look for a range of free blocks immediately after the range we just freed
2852	// (up to the end of the current bitmap block).
2853	//
2854	wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord;
2855	wordsLeft = wordsPerBlock - wordIndexInBlock;
2856	currentWord = buffer + wordIndexInBlock;
2857	currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
2858	bitMask = kHighBitInWordMask >> currentBit;
2859	while (true) {
2860		// Move currentWord/bitMask/wordsLeft forward one bit
2861		bitMask >>= 1;
2862		if (bitMask == 0) {
2863			if (--wordsLeft == 0)
2864				break;
2865			++currentWord;
2866			bitMask = kHighBitInWordMask;
2867		}
2868
2869		if (*currentWord & SWAP_BE32(bitMask))
2870			break;	// Found an allocated block.  Stop searching.
2871		++unmapCount;
2872	}
2873
2874Exit:
2875
2876	if (buffer)
2877		(void)ReleaseBitmapBlock(vcb, blockRef, true);
2878
2879	if (err == noErr) {
2880		hfs_unmap_free_extent(vcb, unmapStart, unmapCount);
2881	}
2882
2883	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2884		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
2885
2886	return err;
2887
2888Corruption:
2889#if DEBUG_BUILD
2890	panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
2891#else
2892	printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s\n", vcb->vcbVN);
2893	hfs_mark_volume_inconsistent(vcb);
2894	err = EIO;
2895	goto Exit;
2896#endif
2897}
2898
2899
2900#if CONFIG_HFS_ALLOC_RBTREE
2901/*
2902 * This is a wrapper function around BlockMarkFree.  This function is
2903 * called when the RB Tree-based allocator needs to mark a block as no longer
2904 * in use. This function should take the locks that would not normally be
2905 * necessary for the normal bitmap deallocator, and then call the function.  Once
2906 * the on-disk data structures are updated properly, then this will update an
2907 * existing rb-tree node if possible, or else create a new one.
2908 */
2909
2910OSErr BlockMarkFreeRBTree(
2911	ExtendedVCB		*vcb,
2912	u_int32_t		startingBlock,
2913	register u_int32_t	numBlocks)
2914{
2915	OSErr err;
2916	struct hfsmount *hfsmp  = VCBTOHFS(vcb);
2917	int rb_err = 0;
2918
2919	if (ALLOC_DEBUG) {
2920		REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
2921		if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2922			panic ("HFS RBTree Allocator: Trying to free blocks starting @ %x for %x but blocks not in use! \n",
2923				   startingBlock, numBlocks);
2924		}
2925		check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC);
2926	}
2927
2928	err = BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
2929
2930	if (err == noErr) {
2931
2932		/*
2933		 * During a filesystem truncation, we may need to relocate files out of the
2934		 * portion of the bitmap that is no longer controlled by the r/b tree.
2935		 * In this case, just update the bitmap and do not attempt to manipulate the tree.
2936		 */
2937		if ((startingBlock >= hfsmp->offset_block_end) &&
2938			(hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
2939			goto free_error;
2940		}
2941
2942		extent_node_t *newnode;
2943
2944		if (ALLOC_DEBUG) {
2945			/*
2946			 * Validate that the blocks in question are not allocated in the bitmap, and that they're
2947			 * not in the offset tree, since it should be tracking free extents, rather than allocated
2948			 * extents
2949			 */
2950			if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2951				panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks still marked in-use!\n",
2952					   startingBlock, numBlocks);
2953			}
2954		}
2955
2956		if ((hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) == 0) {
2957			if (startingBlock >= hfsmp->offset_block_end) {
2958				/*
2959				 * If the tree generation code has not yet finished scanning the
2960				 * bitmap region containing this extent, do nothing.  If the start
2961				 * of the range to be deallocated is greater than the current high
2962				 * watermark on the offset tree, just bail out and let the scanner catch up with us.
2963				 */
2964				rb_err = 0;
2965				goto free_error;
2966			}
2967		}
2968
2969		newnode = extent_tree_free_space(&hfsmp->offset_tree, numBlocks, startingBlock);
2970		if (newnode == NULL) {
2971			rb_err = 1;
2972			goto free_error;
2973		}
2974
2975		if (ALLOC_DEBUG) {
2976			check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE);
2977		}
2978
2979	}
2980
2981free_error:
2982	/*
2983	 * We follow the same principle as in BlockMarkAllocatedRB.
2984	 * If we encounter an error in adding the extents to the rb-tree, then immediately
2985	 * back off, destroy the trees, and persistently set a bit in the runtime hfsmp flags
2986	 * to indicate we should not use the rb-tree until next mount, when we can force a rebuild.
2987	 */
2988	if (rb_err) {
2989		/* Mark RB-Trees with error */
2990		hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
2991		DestroyTrees(hfsmp);
2992		/* Reset the Free Ext Cache since we'll be using it now. */
2993		ResetVCBFreeExtCache(hfsmp);
2994		printf("HFS: Red-Black Allocator Tree BlockMarkFree error\n");
2995	}
2996
2997
2998	return err;
2999
3000}
3001#endif
3002
3003/*
3004_______________________________________________________________________
3005
3006Routine:	BlockFindContiguous
3007
3008Function:	Find a contiguous range of blocks that are free (bits
3009			clear in the bitmap).  If a contiguous range of the
3010			minimum size can't be found, an error will be returned.
3011			This is only needed to support the bitmap-scanning logic,
3012			as the red-black tree should be able to do this by internally
3013			searching its tree.
3014
3015Inputs:
3016	vcb				Pointer to volume where space is to be allocated
3017	startingBlock	Preferred first block of range
3018	endingBlock		Last possible block in range + 1
3019	minBlocks		Minimum number of blocks needed.  Must be > 0.
3020	maxBlocks		Maximum (ideal) number of blocks desired
3021	useMetaZone	OK to dip into metadata allocation zone
3022
3023Outputs:
3024	actualStartBlock	First block of range found, or 0 if error
3025	actualNumBlocks		Number of blocks found, or 0 if error
3026
3027Returns:
3028	noErr			Found at least minBlocks contiguous
3029	dskFulErr		No contiguous space found, or all less than minBlocks
3030_______________________________________________________________________
3031*/
3032
3033static OSErr BlockFindContiguous(
3034	ExtendedVCB		*vcb,
3035	u_int32_t		startingBlock,
3036	u_int32_t		endingBlock,
3037	u_int32_t		minBlocks,
3038	u_int32_t		maxBlocks,
3039	Boolean			useMetaZone,
3040	u_int32_t		*actualStartBlock,
3041	u_int32_t		*actualNumBlocks)
3042{
3043	OSErr			err;
3044	register u_int32_t	currentBlock;		//	Block we're currently looking at.
3045	u_int32_t			firstBlock;			//	First free block in current extent.
3046	u_int32_t			stopBlock;			//	If we get to this block, stop searching for first free block.
3047	u_int32_t			foundBlocks;		//	Number of contiguous free blocks in current extent.
3048	u_int32_t			*buffer = NULL;
3049	register u_int32_t	*currentWord;
3050	register u_int32_t	bitMask;
3051	register u_int32_t	wordsLeft;
3052	register u_int32_t	tempWord;
3053	uintptr_t  blockRef;
3054	u_int32_t  wordsPerBlock;
3055	u_int32_t  updated_free_extent = 0;
3056
3057	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3058		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
3059
3060	/*
3061	 * When we're skipping the metadata zone and the start/end
3062	 * range overlaps with the metadata zone then adjust the
3063	 * start to be outside of the metadata zone.  If the range
3064	 * is entirely inside the metadata zone then we can deny the
3065	 * request (dskFulErr).
3066	 */
3067	if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
3068		if (startingBlock <= vcb->hfs_metazone_end) {
3069			if (endingBlock > (vcb->hfs_metazone_end + 2))
3070				startingBlock = vcb->hfs_metazone_end + 1;
3071			else
3072				goto DiskFull;
3073		}
3074	}
3075
3076	if ((endingBlock - startingBlock) < minBlocks)
3077	{
3078		//	The set of blocks we're checking is smaller than the minimum number
3079		//	of blocks, so we couldn't possibly find a good range.
3080		goto DiskFull;
3081	}
3082
3083	stopBlock = endingBlock - minBlocks + 1;
3084	currentBlock = startingBlock;
3085	firstBlock = 0;
3086
3087	/*
3088	 * Skip over metadata blocks.
3089	 */
3090	if (!useMetaZone)
3091		currentBlock = NextBitmapBlock(vcb, currentBlock);
3092
3093	//
3094	//	Pre-read the first bitmap block.
3095	//
3096	err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
3097	if ( err != noErr ) goto ErrorExit;
3098
3099	//
3100	//	Figure out where currentBlock is within the buffer.
3101	//
3102	wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
3103
3104	wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1);	// Current index into buffer
3105	currentWord = buffer + wordsLeft;
3106	wordsLeft = wordsPerBlock - wordsLeft;
3107
3108	do
3109	{
3110		foundBlocks = 0;
3111
3112		//============================================================
3113		//	Look for a free block, skipping over allocated blocks.
3114		//============================================================
3115
3116		//
3117		//	Check an initial partial word (if any)
3118		//
3119		bitMask = currentBlock & kBitsWithinWordMask;
3120		if (bitMask)
3121		{
3122			tempWord = SWAP_BE32(*currentWord);			//	Fetch the current word only once
3123			bitMask = kHighBitInWordMask >> bitMask;
3124			while (tempWord & bitMask)
3125			{
3126				bitMask >>= 1;
3127				++currentBlock;
3128			}
3129
3130			//	Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)?
3131			if (bitMask)
3132				goto FoundUnused;
3133
3134			//	Didn't find any unused bits, so we're done with this word.
3135			++currentWord;
3136			--wordsLeft;
3137		}
3138
3139		//
3140		//	Check whole words
3141		//
3142		while (currentBlock < stopBlock)
3143		{
3144			//	See if it's time to read another block.
3145			if (wordsLeft == 0)
3146			{
3147				buffer = NULL;
3148				err = ReleaseBitmapBlock(vcb, blockRef, false);
3149				if (err != noErr) goto ErrorExit;
3150
3151				/*
3152				 * Skip over metadata blocks.
3153				 */
3154				if (!useMetaZone) {
3155					currentBlock = NextBitmapBlock(vcb, currentBlock);
3156					if (currentBlock >= stopBlock) {
3157						goto LoopExit;
3158					}
3159				}
3160
3161				err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
3162				if ( err != noErr ) goto ErrorExit;
3163
3164				currentWord = buffer;
3165				wordsLeft = wordsPerBlock;
3166			}
3167
3168			//	See if any of the bits are clear
3169			if ((tempWord = SWAP_BE32(*currentWord)) + 1)	//	non-zero if any bits were clear
3170			{
3171				//	Figure out which bit is clear
3172				bitMask = kHighBitInWordMask;
3173				while (tempWord & bitMask)
3174				{
3175					bitMask >>= 1;
3176					++currentBlock;
3177				}
3178
3179				break;		//	Found the free bit; break out to FoundUnused.
3180			}
3181
3182			//	Keep looking at the next word
3183			currentBlock += kBitsPerWord;
3184			++currentWord;
3185			--wordsLeft;
3186		}
3187
3188FoundUnused:
3189		//	Make sure the unused bit is early enough to use
3190		if (currentBlock >= stopBlock)
3191		{
3192			break;
3193		}
3194
3195		//	Remember the start of the extent
3196		firstBlock = currentBlock;
3197
3198		//============================================================
3199		//	Count the number of contiguous free blocks.
3200		//============================================================
3201
3202		//
3203		//	Check an initial partial word (if any)
3204		//
3205		bitMask = currentBlock & kBitsWithinWordMask;
3206		if (bitMask)
3207		{
3208			tempWord = SWAP_BE32(*currentWord);			//	Fetch the current word only once
3209			bitMask = kHighBitInWordMask >> bitMask;
3210			while (bitMask && !(tempWord & bitMask))
3211			{
3212				bitMask >>= 1;
3213				++currentBlock;
3214			}
3215
3216			//	Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)?
3217			if (bitMask)
3218				goto FoundUsed;
3219
3220			//	Didn't find any used bits, so we're done with this word.
3221			++currentWord;
3222			--wordsLeft;
3223		}
3224
3225		//
3226		//	Check whole words
3227		//
3228		while (currentBlock < endingBlock)
3229		{
3230			//	See if it's time to read another block.
3231			if (wordsLeft == 0)
3232			{
3233				buffer = NULL;
3234				err = ReleaseBitmapBlock(vcb, blockRef, false);
3235				if (err != noErr) goto ErrorExit;
3236
3237				/*
3238				 * Skip over metadata blocks.
3239				 */
3240				if (!useMetaZone) {
3241					u_int32_t nextBlock;
3242
3243					nextBlock = NextBitmapBlock(vcb, currentBlock);
3244					if (nextBlock != currentBlock) {
3245						goto LoopExit;  /* allocation gap, so stop */
3246					}
3247				}
3248
3249				err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
3250				if ( err != noErr ) goto ErrorExit;
3251
3252				currentWord = buffer;
3253				wordsLeft = wordsPerBlock;
3254			}
3255
3256			//	See if any of the bits are set
3257			if ((tempWord = SWAP_BE32(*currentWord)) != 0)
3258			{
3259				//	Figure out which bit is set
3260				bitMask = kHighBitInWordMask;
3261				while (!(tempWord & bitMask))
3262				{
3263					bitMask >>= 1;
3264					++currentBlock;
3265				}
3266
3267				break;		//	Found the used bit; break out to FoundUsed.
3268			}
3269
3270			//	Keep looking at the next word
3271			currentBlock += kBitsPerWord;
3272			++currentWord;
3273			--wordsLeft;
3274
3275			//	If we found at least maxBlocks, we can quit early.
3276			if ((currentBlock - firstBlock) >= maxBlocks)
3277				break;
3278		}
3279
3280FoundUsed:
3281		//	Make sure we didn't run out of bitmap looking for a used block.
3282		//	If so, pin to the end of the bitmap.
3283		if (currentBlock > endingBlock)
3284			currentBlock = endingBlock;
3285
3286		//	Figure out how many contiguous free blocks there were.
3287		//	Pin the answer to maxBlocks.
3288		foundBlocks = currentBlock - firstBlock;
3289		if (foundBlocks > maxBlocks)
3290			foundBlocks = maxBlocks;
3291		if (foundBlocks >= minBlocks)
3292			break;		//	Found what we needed!
3293
3294		/* We did not find the total blocks were were looking for, but
3295		 * lets add this free block run to our free extent cache list
3296		 */
3297		updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3298
3299	} while (currentBlock < stopBlock);
3300LoopExit:
3301
3302	//	Return the outputs.
3303	if (foundBlocks < minBlocks)
3304	{
3305DiskFull:
3306		err = dskFulErr;
3307ErrorExit:
3308		*actualStartBlock = 0;
3309		*actualNumBlocks = 0;
3310	}
3311	else
3312	{
3313		err = noErr;
3314		*actualStartBlock = firstBlock;
3315		*actualNumBlocks = foundBlocks;
3316		/*
3317		 * Sanity check for overflow
3318		 */
3319		if ((firstBlock + foundBlocks) > vcb->allocLimit) {
3320			panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
3321				vcb->vcbVN, startingBlock, endingBlock, currentBlock,
3322				firstBlock, stopBlock, minBlocks, foundBlocks);
3323		}
3324	}
3325
3326	if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
3327		int i;
3328		u_int32_t min_start = vcb->totalBlocks;
3329
3330		// set the nextAllocation pointer to the smallest free block number
3331		// we've seen so on the next mount we won't rescan unnecessarily
3332		lck_spin_lock(&vcb->vcbFreeExtLock);
3333		for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
3334			if (vcb->vcbFreeExt[i].startBlock < min_start) {
3335				min_start = vcb->vcbFreeExt[i].startBlock;
3336			}
3337		}
3338		lck_spin_unlock(&vcb->vcbFreeExtLock);
3339		if (min_start != vcb->totalBlocks) {
3340			if (min_start < vcb->nextAllocation) {
3341				vcb->nextAllocation = min_start;
3342			}
3343			if (min_start < vcb->sparseAllocation) {
3344				vcb->sparseAllocation = min_start;
3345			}
3346		}
3347	}
3348
3349	if (buffer)
3350		(void) ReleaseBitmapBlock(vcb, blockRef, false);
3351
3352	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3353		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
3354
3355	return err;
3356}
3357
3358
3359#if CONFIG_HFS_ALLOC_RBTREE
3360/*
3361 * Wrapper function around hfs_isrbtree_allocated.  This just takes the start offset,
3362 * and the number of blocks, and whether or not we should check if the blocks are
3363 * free or not.  This function is designed to be used primarily with the debug #ifdef
3364 * enabled, so it results in a panic if anything unexpected occurs.
3365 *
3366 * shouldBeFree will be nonzero if the caller expects the zone to be free.
3367 */
3368
3369void check_rbtree_extents (struct hfsmount *hfsmp, u_int32_t startBlocks,
3370								 u_int32_t numBlocks, int shouldBeFree) {
3371	int alloc;
3372	extent_node_t *node1 = NULL;
3373	u_int32_t off1 = 0;
3374	u_int32_t len1 = 0;
3375	alloc = hfs_isrbtree_allocated (hfsmp, startBlocks, numBlocks, &node1);
3376
3377	if (node1) {
3378		off1 = node1->offset;
3379		len1 = node1->length;
3380	}
3381
3382	if (shouldBeFree) {
3383		/*
3384		 * If the region should be free, then we expect to see extents in the tree
3385		 * matching this start and length.  Alloc != 0 means some portion of the extent
3386		 * specified was allocated.
3387		 */
3388		if (alloc != 0){
3389			panic ("HFS check_rbtree_extents: Node (%p) do not exist! "
3390				   "node1 off (%d),len(%d),, start(%d) end(%d)\n",
3391				   node1, off1, len1, startBlocks, numBlocks);
3392		}
3393	}
3394	else {
3395		/*
3396		 * Otherwise, this means that the region should be allocated, and if we find
3397		 * an extent matching it, that's bad.
3398		 */
3399		if (alloc == 0){
3400			panic ("HFS check_rbtree_extents: Node (%p) exists! "
3401				   "node1 off (%d),len(%d), start(%d) end(%d)\n",
3402				   node1, off1, len1, startBlocks, numBlocks);
3403		}
3404	}
3405}
3406#endif
3407
3408#if CONFIG_HFS_ALLOC_RBTREE
3409/*
3410 * Exhaustive validation search.  This function iterates over all allocation blocks and
3411 * compares their status in the red-black tree vs. the allocation bitmap.  If the two are out of sync
3412 * then it will panic.  Bitmap lock must be held while this function is run.
3413 *
3414 * Because this function requires a red-black tree search to validate every allocation block, it is
3415 * very expensive and should ONLY be run in debug mode, and even then, infrequently.
3416 *
3417 * 'end' is non-inclusive, so it should represent the total number of blocks in the volume.
3418 *
3419 */
3420void
3421hfs_validate_rbtree (struct hfsmount *hfsmp, u_int32_t start, u_int32_t end){
3422
3423	u_int32_t current;
3424	extent_node_t* node1;
3425
3426	hfs_checktreelinks (hfsmp);
3427
3428	for (current = start; current < end; current++) {
3429		node1 = NULL;
3430		int rbtree = hfs_isrbtree_allocated(hfsmp, current, 1, &node1);
3431		int bitmap = hfs_isallocated(hfsmp, current, 1);
3432
3433		if (bitmap != rbtree){
3434			panic("HFS: Allocator mismatch @ block %d -- bitmap %d : rbtree %d\n",
3435				  current, bitmap, rbtree);
3436		}
3437	}
3438}
3439
3440/*
3441 * Exhaustive Red-Black Tree Linked List verification routine.
3442 *
3443 * This function iterates through the red-black tree's nodes, and then verifies that the linked list
3444 * embedded within each of the nodes accurately points to the correct node as its "next" pointer.
3445 * The bitmap lock must be held while this function is run.
3446 */
3447
3448void
3449hfs_checktreelinks (struct hfsmount *hfsmp) {
3450	extent_tree_offset_t *tree = &hfsmp->offset_tree;
3451
3452	extent_node_t *current = NULL;
3453	extent_node_t *next = NULL;
3454	extent_node_t *treenext;
3455
3456	current = extent_tree_off_first (tree);
3457
3458	while (current) {
3459		next = current->offset_next;
3460		treenext = extent_tree_off_next (tree, current);
3461		if (next != treenext) {
3462			panic("hfs_checktreelinks: mismatch for node (%p), next: %p , treenext %p !\n", current, next, treenext);
3463		}
3464		current = treenext;
3465	}
3466}
3467
3468#endif
3469
3470
3471#if CONFIG_HFS_ALLOC_RBTREE
3472/*
3473 * Test to see if any free blocks exist at a given offset.
3474 * If there exists a node at the specified offset, it will return the appropriate
3475 * node.
3476 *
3477 * NULL indicates allocated blocks exist at that offset.
3478 *
3479 * Allocation file lock must be held.
3480 *
3481 * Returns:
3482 *	1 if blocks in the range are allocated.
3483 *	0 if all blocks in the range are free.
3484 */
3485
3486static int
3487hfs_isrbtree_allocated (struct hfsmount *hfsmp, u_int32_t startBlock,
3488						u_int32_t numBlocks, extent_node_t **ret_node) {
3489
3490	extent_node_t search_sentinel;
3491	extent_node_t *node = NULL;
3492	extent_node_t *nextnode = NULL;
3493
3494	/*
3495	 * With only one tree, then we just have to validate that there are entries
3496	 * in the R/B tree at the specified offset if it really is free.
3497	 */
3498	search_sentinel.offset = startBlock;
3499	search_sentinel.length = numBlocks;
3500
3501	node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
3502	if (node) {
3503
3504		*ret_node = node;
3505		nextnode = extent_tree_off_next (&hfsmp->offset_tree, node);
3506		if (nextnode != node->offset_next) {
3507			panic ("hfs_rbtree_isallocated: Next pointers out of sync!\n");
3508		}
3509
3510		/*
3511		 * Check to see if it is a superset of our target range. Because we started
3512		 * with the offset or some offset prior to it, then we know the node's offset is
3513		 * at least <= startBlock.  So, if the end of the node is greater than the end of
3514		 * our target range, then the whole range is free.
3515		 */
3516
3517		if ((node->offset + node->length) >= (startBlock + numBlocks)) {
3518			if (node->offset > startBlock) {
3519				panic ("hfs_rbtree_isallocated: bad node ordering!");
3520			}
3521			return 0;
3522		}
3523	}
3524	/*
3525	 * We got here if either our node search resulted in a node whose extent
3526	 * was strictly before our target offset, or we couldnt' find a previous node
3527	 * at all (the beginning of the volume).  If the former, then we can infer that
3528	 * at least one block in the target range is allocated since the next node's offset
3529	 * must be greater than startBlock.
3530	 *
3531	 * Either way, this means that the target node is unavailable to allocate, so
3532	 * just return 1;
3533	 */
3534	return 1;
3535}
3536
3537
3538#endif
3539
3540/*
3541 * Count number of bits set in the given 32-bit unsigned number
3542 *
3543 * Returns:
3544 * 	Number of bits set
3545 */
3546static int num_bits_set(u_int32_t num)
3547{
3548	int count;
3549
3550	for (count = 0; num; count++) {
3551		num &= num - 1;
3552	}
3553
3554	return count;
3555}
3556
3557/*
3558 * For a given range of blocks, find the total number of blocks
3559 * allocated.  If 'stop_on_first' is true, it stops as soon as it
3560 * encounters the first allocated block.  This option is useful
3561 * to determine if any block is allocated or not.
3562 *
3563 * Inputs:
3564 * 	startingBlock	First allocation block number of the range to be scanned.
3565 * 	numBlocks	Total number of blocks that need to be scanned.
3566 * 	stop_on_first	Stop the search after the first allocated block is found.
3567 *
3568 * Output:
3569 * 	allocCount	Total number of allocation blocks allocated in the given range.
3570 *
3571 * 			On error, it is the number of allocated blocks found
3572 * 			before the function got an error.
3573 *
3574 * 			If 'stop_on_first' is set,
3575 * 				allocCount = 1 if any allocated block was found.
3576 * 				allocCount = 0 if no allocated block was found.
3577 *
3578 * Returns:
3579 * 	0 on success, non-zero on failure.
3580 */
3581static int
3582hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
3583		u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
3584{
3585	u_int32_t  *currentWord;   // Pointer to current word within bitmap block
3586	u_int32_t  wordsLeft;      // Number of words left in this bitmap block
3587	u_int32_t  bitMask;        // Word with given bits already set (ready to test)
3588	u_int32_t  firstBit;       // Bit index within word of first bit to allocate
3589	u_int32_t  numBits;        // Number of bits in word to allocate
3590	u_int32_t  *buffer = NULL;
3591	uintptr_t  blockRef;
3592	u_int32_t  bitsPerBlock;
3593	u_int32_t  wordsPerBlock;
3594 	u_int32_t  blockCount = 0;
3595	int  error;
3596
3597	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3598		KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);
3599
3600	/*
3601	 * Pre-read the bitmap block containing the first word of allocation
3602	 */
3603	error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3604	if (error)
3605		goto JustReturn;
3606
3607	/*
3608	 * Initialize currentWord, and wordsLeft.
3609	 */
3610	{
3611		u_int32_t wordIndexInBlock;
3612
3613		bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
3614		wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
3615
3616		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3617		currentWord = buffer + wordIndexInBlock;
3618		wordsLeft = wordsPerBlock - wordIndexInBlock;
3619	}
3620
3621	/*
3622	 * First test any non word aligned bits.
3623	 */
3624	firstBit = startingBlock % kBitsPerWord;
3625	if (firstBit != 0) {
3626		bitMask = kAllBitsSetInWord >> firstBit;
3627		numBits = kBitsPerWord - firstBit;
3628		if (numBits > numBlocks) {
3629			numBits = numBlocks;
3630			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
3631		}
3632		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3633			if (stop_on_first) {
3634				blockCount = 1;
3635				goto Exit;
3636			}
3637			blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3638		}
3639		numBlocks -= numBits;
3640		++currentWord;
3641		--wordsLeft;
3642	}
3643
3644	/*
3645	 * Test whole words (32 blocks) at a time.
3646	 */
3647	while (numBlocks >= kBitsPerWord) {
3648		if (wordsLeft == 0) {
3649			/* Read in the next bitmap block. */
3650			startingBlock += bitsPerBlock;
3651
3652			buffer = NULL;
3653			error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3654			if (error) goto Exit;
3655
3656			error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3657			if (error) goto Exit;
3658
3659			/* Readjust currentWord and wordsLeft. */
3660			currentWord = buffer;
3661			wordsLeft = wordsPerBlock;
3662		}
3663		if (*currentWord != 0) {
3664			if (stop_on_first) {
3665				blockCount = 1;
3666				goto Exit;
3667			}
3668			blockCount += num_bits_set(*currentWord);
3669		}
3670		numBlocks -= kBitsPerWord;
3671		++currentWord;
3672		--wordsLeft;
3673	}
3674
3675	/*
3676	 * Test any remaining blocks.
3677	 */
3678	if (numBlocks != 0) {
3679		bitMask = ~(kAllBitsSetInWord >> numBlocks);
3680		if (wordsLeft == 0) {
3681			/* Read in the next bitmap block */
3682			startingBlock += bitsPerBlock;
3683
3684			buffer = NULL;
3685			error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3686			if (error) goto Exit;
3687
3688			error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3689			if (error) goto Exit;
3690
3691			currentWord = buffer;
3692			wordsLeft = wordsPerBlock;
3693		}
3694		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3695			if (stop_on_first) {
3696				blockCount = 1;
3697				goto Exit;
3698			}
3699			blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3700		}
3701	}
3702Exit:
3703	if (buffer) {
3704		(void)ReleaseBitmapBlock(hfsmp, blockRef, false);
3705	}
3706	if (allocCount) {
3707		*allocCount = blockCount;
3708	}
3709
3710JustReturn:
3711	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3712		KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);
3713
3714	return (error);
3715}
3716
3717/*
3718 * Count total number of blocks that are allocated in the given
3719 * range from the bitmap.  This is used to preflight total blocks
3720 * that need to be relocated during volume resize.
3721 *
3722 * The journal or allocation file lock must be held.
3723 *
3724 * Returns:
3725 * 	0 on success, non-zero on failure.
3726 * 	On failure, allocCount is zero.
3727 */
3728int
3729hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
3730		u_int32_t numBlocks, u_int32_t *allocCount)
3731{
3732	return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
3733}
3734
3735/*
3736 * Test to see if any blocks in a range are allocated.
3737 *
3738 * Note:  On error, this function returns 1, which means that
3739 * one or more blocks in the range are allocated.  This function
3740 * is primarily used for volume resize and we do not want
3741 * to report to the caller that the blocks are free when we
3742 * were not able to deterministically find it out.  So on error,
3743 * we always report that the blocks are allocated.
3744 *
3745 * The journal or allocation file lock must be held.
3746 *
3747 * Returns
3748 *	0 if all blocks in the range are free.
3749 *	1 if blocks in the range are allocated, or there was an error.
3750 */
3751int
3752hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
3753{
3754	int error;
3755	u_int32_t allocCount;
3756
3757	error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
3758	if (error) {
3759		/* On error, we always say that the blocks are allocated
3760		 * so that volume resize does not return false success.
3761		 */
3762		return 1;
3763	} else {
3764		/* The function was deterministically able to find out
3765		 * if there was any block allocated or not.  In that case,
3766		 * the value in allocCount is good enough to be returned
3767		 * back to the caller.
3768		 */
3769		return allocCount;
3770	}
3771}
3772
3773/*
3774 * Check to see if the red-black tree is live.  Allocation file lock must be held
3775 * shared or exclusive to call this function. Note that we may call this even if
3776 * HFS is built without activating the red-black tree code.
3777 */
3778__private_extern__
3779int
3780hfs_isrbtree_active(struct hfsmount *hfsmp){
3781
3782	//TODO: Update this function to deal with a truncate/resize coming in when the tree
3783	//isn't fully finished.  maybe we need to check the flags for something other than ENABLED?
3784
3785#if CONFIG_HFS_ALLOC_RBTREE
3786	if (ALLOC_DEBUG) {
3787		REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
3788	}
3789	if (hfsmp){
3790
3791		if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) {
3792			return 1;
3793		}
3794	}
3795#else
3796	#pragma unused (hfsmp)
3797#endif
3798	/* If the RB Tree code is not enabled, then just always return 0 */
3799	return 0;
3800}
3801
3802
3803/*
3804 * This function scans the specified bitmap block and acts on it as necessary.
3805 * We may add it to the list of blocks to be UNMAP/TRIM'd or add it to allocator
3806 * data structures.  This function is not #if'd to the CONFIG_RB case because
3807 * we want to use it unilaterally at mount time if on a UNMAP-capable device.
3808 *
3809 * Additionally, we may want an allocating thread to invoke this if the tree
3810 * does not have enough extents to satisfy an allocation request.
3811 *
3812 * startbit		- the allocation block represented by a bit in 'allocblock' where we need to
3813 *				start our scan.  For instance, we may need to start the normal allocation scan
3814 *				in the middle of an existing allocation block.
3815 * endBit		- the allocation block where we should end this search (inclusive).
3816 * bitToScan	- output argument for this function to specify the next bit to scan.
3817 *
3818 * Returns:
3819 *		0 on success
3820 *		nonzero on failure.
3821 */
3822
3823static int hfs_alloc_scan_block(struct hfsmount *hfsmp, u_int32_t startbit,
3824                                u_int32_t endBit, u_int32_t *bitToScan,
3825                                struct jnl_trim_list *list) {
3826
3827	int error;
3828	u_int32_t curAllocBlock;
3829	struct buf *blockRef = NULL;
3830	u_int32_t *buffer = NULL;
3831	u_int32_t wordIndexInBlock;
3832	u_int32_t blockSize = (u_int32_t)hfsmp->vcbVBMIOSize;
3833	u_int32_t wordsPerBlock = blockSize / kBytesPerWord;
3834	u_int32_t offset = 0;
3835	u_int32_t size = 0;
3836
3837	/*
3838	 * Read the appropriate block from the bitmap file.  ReadBitmapBlock
3839	 * figures out which actual on-disk block corresponds to the bit we're
3840	 * looking at.
3841	 */
3842	error = ReadBitmapBlock(hfsmp, startbit, &buffer, (uintptr_t*)&blockRef);
3843	if (error) {
3844		return error;
3845	}
3846
3847	/* curAllocBlock represents the logical block we're analyzing. */
3848	curAllocBlock = startbit;
3849
3850	/*  Figure out which word curAllocBlock corresponds to in the block we read  */
3851	wordIndexInBlock = (curAllocBlock / kBitsPerWord) % wordsPerBlock;
3852
3853	/* Scan a word at a time */
3854	while (wordIndexInBlock < wordsPerBlock) {
3855		u_int32_t currentWord = SWAP_BE32(buffer[wordIndexInBlock]);
3856		u_int32_t curBit;
3857
3858		/* modulate curBit because it may start in the middle of a word */
3859		for (curBit = curAllocBlock % kBitsPerWord; curBit < kBitsPerWord; curBit++) {
3860
3861			u_int32_t is_allocated = currentWord & (1 << (kBitsWithinWordMask - curBit));
3862			if (ALLOC_DEBUG) {
3863				u_int32_t res = hfs_isallocated_scan (hfsmp, curAllocBlock, buffer);
3864				if ( ((res) && (!is_allocated)) || ((!res) && (is_allocated))) {
3865					panic("hfs_alloc_scan: curAllocBit %u, curBit (%d), word (0x%x), is_allocated (0x%x)  res(0x%x) \n",
3866						  curAllocBlock, curBit, currentWord, is_allocated, res);
3867				}
3868			}
3869			/*
3870			 * If curBit is not allocated, keep track of the start of the free range.
3871			 * Increment a running tally on how many free blocks in a row we've seen.
3872			 */
3873			if (!is_allocated) {
3874				size++;
3875				if (offset == 0) {
3876					offset = curAllocBlock;
3877				}
3878			}
3879			else {
3880				/*
3881				 * If we hit an allocated block, insert the extent that tracked the range
3882				 * we saw, and reset our tally counter.
3883				 */
3884				if (size != 0) {
3885#if CONFIG_HFS_ALLOC_RBTREE
3886					extent_tree_free_space(&hfsmp->offset_tree, size, offset);
3887#endif
3888                    hfs_track_unmap_blocks (hfsmp, offset, size, list);
3889                    size = 0;
3890                    offset = 0;
3891				}
3892			}
3893			curAllocBlock++;
3894			/*
3895			 * Exit early if the next bit we'd analyze would take us beyond the end of the
3896			 * range that we're supposed to scan.
3897			 */
3898			if (curAllocBlock >= endBit) {
3899				goto DoneScanning;
3900			}
3901		}
3902		wordIndexInBlock++;
3903	}
3904DoneScanning:
3905
3906	/* We may have been tracking a range of free blocks that hasn't been inserted yet. */
3907	if (size != 0) {
3908#if CONFIG_HFS_ALLOC_RBTREE
3909		extent_tree_free_space(&hfsmp->offset_tree, size, offset);
3910#endif
3911        hfs_track_unmap_blocks (hfsmp, offset, size, list);
3912	}
3913	/*
3914	 * curAllocBlock represents the next block we need to scan while we're in this
3915	 * function.
3916	 */
3917	*bitToScan = curAllocBlock;
3918
3919	ReleaseScanBitmapBlock(blockRef);
3920
3921	return 0;
3922}
3923
3924
3925/*
3926 * This function is basically the same as hfs_isallocated, except it's designed for
3927 * use with the red-black tree validation code.  It assumes we're only checking whether
3928 * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
3929 * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
3930 *
3931 * This should not be called in general purpose scanning code.
3932 */
3933int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
3934
3935	u_int32_t  *currentWord;   // Pointer to current word within bitmap block
3936	u_int32_t  bitMask;        // Word with given bits already set (ready to test)
3937	u_int32_t  firstBit;       // Bit index within word of first bit to allocate
3938	u_int32_t  numBits;        // Number of bits in word to allocate
3939	u_int32_t  bitsPerBlock;
3940	uintptr_t  blockRef;
3941	u_int32_t  wordsPerBlock;
3942	u_int32_t  numBlocks = 1;
3943	u_int32_t  *buffer = NULL;
3944
3945	int  inuse = 0;
3946	int error;
3947
3948
3949	if (bp_buf) {
3950		/* just use passed-in buffer if avail. */
3951		buffer = bp_buf;
3952	}
3953	else {
3954		/*
3955		 * Pre-read the bitmap block containing the first word of allocation
3956		 */
3957		error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3958		if (error)
3959			return (error);
3960	}
3961
3962	/*
3963	 * Initialize currentWord, and wordsLeft.
3964	 */
3965	u_int32_t wordIndexInBlock;
3966
3967	bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
3968	wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
3969
3970	wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3971	currentWord = buffer + wordIndexInBlock;
3972
3973	/*
3974	 * First test any non word aligned bits.
3975	 */
3976	firstBit = startingBlock % kBitsPerWord;
3977	bitMask = kAllBitsSetInWord >> firstBit;
3978	numBits = kBitsPerWord - firstBit;
3979	if (numBits > numBlocks) {
3980		numBits = numBlocks;
3981		bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
3982	}
3983	if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3984		inuse = 1;
3985		goto Exit;
3986	}
3987	numBlocks -= numBits;
3988	++currentWord;
3989
3990Exit:
3991	if(bp_buf == NULL) {
3992		if (buffer) {
3993			(void)ReleaseBitmapBlock(hfsmp, blockRef, false);
3994		}
3995	}
3996	return (inuse);
3997
3998
3999
4000}
4001
4002#if CONFIG_HFS_ALLOC_RBTREE
4003
4004/*
4005 * Extern function that is called from mount and upgrade mount routines
4006 * that enable us to initialize the tree.
4007 */
4008
4009__private_extern__
4010u_int32_t InitTree(struct hfsmount *hfsmp) {
4011	extent_tree_init (&(hfsmp->offset_tree));
4012	return 0;
4013}
4014
4015
4016/*
4017 * This function builds the trees specified in its arguments. It uses
4018 * buf_meta_breads to scan through the bitmap and re-build the tree state.
4019 * It is very important to use buf_meta_bread because we need to ensure that we
4020 * read the most current version of the blocks that we're scanning.  If we used
4021 * cluster_io, then journaled transactions could still be sitting in RAM since they are
4022 * written to disk in the proper location asynchronously.
4023 *
4024 * Because this could be still running when mount has finished, we need to check
4025 * after every allocation block that we're working on if an unmount or some other
4026 * operation that would cause us to teardown has come in. (think downgrade mount).
4027 * If an unmount has come in, then abort whatever we're doing and return -1
4028 * to indicate we hit an error.  If we don't do this, we'd hold up unmount for
4029 * a very long time.
4030 *
4031 * This function assumes that the bitmap lock is acquired exclusively before being
4032 * called.  It will drop the lock and then re-acquire it during operation, but
4033 * will always return with the lock held.
4034 */
4035__private_extern__
4036u_int32_t GenerateTree(struct hfsmount *hfsmp, u_int32_t endBlock, int *flags, int initialscan) {
4037
4038	REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
4039
4040	u_int32_t *cur_block_eof;
4041	int error = 0;
4042
4043	int USE_FINE_GRAINED_LOCKING = 0;
4044
4045	/* Initialize the block counter while we hold the bitmap lock */
4046	cur_block_eof = &hfsmp->offset_block_end;
4047
4048	/*
4049	 * This loop advances over all allocation bitmap blocks of the current region
4050	 * to scan them and add the results into the red-black tree.  We use the mount point
4051	 * variable offset_block_end as our loop counter.  This gives us flexibility
4052	 * because we can release the allocation bitmap lock and allow a thread that wants
4053	 * to make an allocation to grab the lock and do some scanning on our behalf while we're
4054	 * waiting to re-acquire the lock.  Then, the allocating thread will only do as much bitmap
4055	 * scanning as needed to fulfill its allocation.
4056	 *
4057	 * If the other thread does IO for us, then it will update the offset_block_end
4058	 * variable as well, since it will use the same hfs_alloc_scan_block function to do its bit
4059	 * scanning.  So when we re-grab the lock, our current EOF/loop will immediately skip us to the next
4060	 * block that needs scanning.
4061	 */
4062
4063	while (*cur_block_eof < endBlock) {
4064
4065		/*
4066		 * If the filesystem is being resized before the bitmap has been fully scanned, we'll
4067		 * update our endBlock to match the current allocation limit in the hfsmp struct.
4068		 * The allocLimit field would only be be updated while holding the bitmap lock, so we won't
4069		 * be executing this code at the same time that the resize is going on.
4070		 */
4071		if ((initialscan) && (endBlock != hfsmp->allocLimit)) {
4072
4073			/* If we're past the new/modified allocLimit, then just stop immediately.*/
4074			if (*cur_block_eof >= hfsmp->allocLimit ) {
4075				break;
4076			}
4077			endBlock = hfsmp->allocLimit;
4078		}
4079
4080		/*
4081		 * TODO: fix unmount stuff!
4082		 * See rdar://7391404
4083		 *
4084		 * Once the RB allocator is checked in, we'll want to augment it to not hold the
4085		 * allocation bitmap lock for the entire duration of the tree scan.  For a first check-in
4086		 * it's ok to do that but we can't leave it like that forever.
4087		 *
4088		 * The gist of the new algorithm will work as follows:
4089		 * if an unmount is in flight and has been detected:
4090		 *		abort tree-build.
4091		 *		unset tree-in-progress bit.
4092		 *		wakeup unmount thread
4093		 *		unlock allocation bitmap lock, fail out.
4094		 *
4095		 * The corresponding code in the unmount side should already be in place.
4096		 */
4097
4098		error = hfs_alloc_scan_block (hfsmp, *cur_block_eof, endBlock, cur_block_eof);
4099
4100		//TODO: Fix this below!
4101		if (USE_FINE_GRAINED_LOCKING){
4102			hfs_systemfile_unlock(hfsmp, *flags);
4103			*flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4104		}
4105		//TODO: Infer that if *flags == 0, we don't actually need to lock/unlock.
4106	}
4107
4108	return error;
4109}
4110
4111/*
4112 * This function destroys the specified rb-trees associated with the mount point.
4113 */
4114__private_extern__
4115void DestroyTrees(struct hfsmount *hfsmp) {
4116
4117	if (ALLOC_DEBUG) {
4118		REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
4119		printf("DestroyTrees: Validating red/black tree for vol %s\n", (char*) hfsmp->vcbVN);
4120		hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end );
4121	}
4122
4123	/*
4124	 * extent_tree_destroy will start with the first entry in the tree (by offset), then
4125	 * iterate through the tree quickly using its embedded linked list.  This results in tree
4126	 * destruction in O(n) time.
4127	 */
4128
4129	if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) {
4130		extent_tree_destroy(&hfsmp->offset_tree);
4131
4132		/* Mark Trees as disabled */
4133		hfsmp->extent_tree_flags &= ~HFS_ALLOC_RB_ENABLED;
4134	}
4135
4136	return;
4137}
4138
4139#endif
4140
4141/*
4142 * This function resets all of the data structures relevant to the
4143 * free extent cache stored in the hfsmount struct.
4144 *
4145 * If we are using the red-black tree code then we need to account for the fact that
4146 * we may encounter situations where we need to jettison the tree.  If that is the
4147 * case, then we fail-over to the bitmap scanning logic, but we need to ensure that
4148 * the free ext cache is zeroed before we start using it.
4149 *
4150 * We also reset and disable the cache when allocLimit is updated... which
4151 * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()).
4152 * It is independent of the type of allocator being used currently.
4153 */
4154void ResetVCBFreeExtCache(struct hfsmount *hfsmp)
4155{
4156	int bytes;
4157	void *freeExt;
4158
4159	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4160		KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);
4161
4162	lck_spin_lock(&hfsmp->vcbFreeExtLock);
4163
4164	/* reset Free Extent Count */
4165	hfsmp->vcbFreeExtCnt = 0;
4166
4167	/* reset the actual array */
4168	bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
4169	freeExt = (void*)(hfsmp->vcbFreeExt);
4170
4171	bzero (freeExt, bytes);
4172
4173	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4174
4175	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4176		KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);
4177
4178	return;
4179}
4180
4181/*
4182 * This function is used to inform the allocator if we have to effectively shrink
4183 * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs.
4184 *
4185 * The bitmap lock must be held when calling this function.  This function also modifies the
4186 * allocLimit field in the hfs mount point structure in the general case.
4187 *
4188 * In the shrinking case, we'll have to remove all free extents from the red-black
4189 * tree past the specified offset new_end_block.  In the growth case, we'll have to force
4190 * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
4191 *
4192 * new_end_block represents the total number of blocks available for allocation in the resized
4193 * filesystem.  Block #new_end_block should not be allocatable in the resized filesystem since it
4194 * will be out of the (0, n-1) range that are indexable in the bitmap.
4195 *
4196 * Returns	0 on success
4197 *			errno on failure
4198 */
4199__private_extern__
4200u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
4201
4202	/*
4203	 * Update allocLimit to the argument specified, but don't do anything else
4204	 * if the red/black tree is not enabled.
4205	 */
4206	hfsmp->allocLimit = new_end_block;
4207
4208	/* Invalidate the free extent cache completely so that
4209	 * it does not have any extents beyond end of current
4210	 * volume.
4211	 */
4212	ResetVCBFreeExtCache(hfsmp);
4213
4214#if CONFIG_HFS_ALLOC_RBTREE
4215	/* Shrinking the existing filesystem */
4216	if ((new_end_block < hfsmp->offset_block_end) &&
4217		(hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) {
4218		extent_node_t search_sentinel;
4219		extent_node_t *node = NULL;
4220		/* Remover points to the current item to free/remove from the tree */
4221		extent_node_t *remover = NULL;
4222
4223		/* Begin search at the specified offset */
4224		memset (&search_sentinel, 0, sizeof(extent_node_t));
4225		search_sentinel.offset = new_end_block;
4226
4227		/*
4228		 * Find the first available extent that satifies the allocation by searching
4229		 * from the starting point or 1 earlier.  We may need to split apart an existing node
4230		 * if it straddles the new alloc limit.
4231		 */
4232		node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
4233		if (node) {
4234			/* If it's an exact match, then just remove them all from this point forward */
4235			if (node->offset == new_end_block) {
4236				/*
4237				 * Find the previous entry and update its next pointer to NULL
4238				 * since this entry is biting the dust.  Update remover to node.
4239				 */
4240				extent_node_t *prev = NULL;
4241				prev = extent_tree_off_prev (&hfsmp->offset_tree, node);
4242				if (prev) {
4243					prev->offset_next = NULL;
4244				}
4245				remover = node;
4246			}
4247			else {
4248				/* See if we need to split this node */
4249				if ((node->offset + node->length) > new_end_block) {
4250					/*
4251					 * Update node to reflect its new size up until new_end_block.
4252					 */
4253					remover = node->offset_next;
4254					node->length = new_end_block - node->offset;
4255					/* node is becoming the last free extent in the volume.  */
4256					node->offset_next = NULL;
4257				}
4258				else {
4259					if (node->offset_next == NULL) {
4260						/*
4261						 * 'node' points to the last free extent in the volume.
4262						 * Coincidentally, it is also before the new cut-off point at which
4263						 * we will stop representing bitmap values in the tree.  Just bail out now.
4264						 */
4265						return 0;
4266					}
4267					/*
4268					 * Otherwise, point our temp variable 'remover' to the node where
4269					 * we'll need to start yanking things out of the tree, and make 'node'
4270					 * the last element in the tree in the linked list.
4271					 */
4272					remover = node->offset_next;
4273					if (remover->offset <= new_end_block) {
4274						panic ("UpdateAllocLimit: Invalid RBTree node next ptr!");
4275					}
4276					node->offset_next = NULL;
4277				}
4278			}
4279
4280			/*
4281			 * Remover is our "temp" pointer that points to the current node to remove from
4282			 * the offset tree.  We'll simply iterate through the tree linked list, removing the current
4283			 * element from the tree, freeing them as we come across them.
4284			 */
4285			while (remover) {
4286				extent_node_t *next = remover->offset_next;
4287				extent_tree_remove_node (&hfsmp->offset_tree, remover);
4288				free_node (remover);
4289				remover = next;
4290			}
4291
4292			if (ALLOC_DEBUG) {
4293				printf ("UpdateAllocLimit: Validating rbtree after truncation\n");
4294				hfs_validate_rbtree (hfsmp, 0, new_end_block-1);
4295			}
4296
4297			/*
4298			 * Don't forget to shrink offset_block_end after a successful truncation
4299			 * new_end_block should represent the number of blocks available on the
4300			 * truncated volume.
4301			 */
4302
4303			hfsmp->offset_block_end = new_end_block;
4304
4305			return 0;
4306		}
4307		else {
4308			if (ALLOC_DEBUG) {
4309				panic ("UpdateAllocLimit: no prev!");
4310			}
4311			return ENOSPC;
4312		}
4313	}
4314	/* Growing the existing filesystem */
4315	else if ((new_end_block > hfsmp->offset_block_end) &&
4316		(hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) {
4317		int flags = 0;
4318		int retval = 0;
4319
4320		if (ALLOC_DEBUG) {
4321			printf ("UpdateAllocLimit: Validating rbtree prior to growth\n");
4322			hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end);
4323		}
4324
4325
4326		retval = GenerateTree (hfsmp, new_end_block, &flags, 0);
4327
4328		/*
4329		 * Don't forget to update offset_block_end after a successful tree extension.
4330		 */
4331		if (retval == 0) {
4332
4333			if (ALLOC_DEBUG) {
4334				printf ("UpdateAllocLimit: Validating rbtree after growth\n");
4335				hfs_validate_rbtree (hfsmp, 0, new_end_block);
4336			}
4337
4338			hfsmp->offset_block_end = new_end_block;
4339		}
4340
4341		return retval;
4342	}
4343	/* Otherwise, do nothing. fall through to the code below. */
4344	printf ("error : off_block_end: %d, alloclimit: %d, new_end_block: %d\n",
4345			hfsmp->offset_block_end,hfsmp->allocLimit, new_end_block);
4346#endif
4347
4348	return 0;
4349
4350}
4351
4352
4353/*
4354 * Remove an extent from the list of free extents.
4355 *
4356 * This is a low-level routine.	 It does not handle overlaps or splitting;
4357 * that is the responsibility of the caller.  The input extent must exactly
4358 * match an extent already in the list; it will be removed, and any following
4359 * extents in the list will be shifted up.
4360 *
4361 * Inputs:
4362 *	startBlock - Start of extent to remove
4363 *	blockCount - Number of blocks in extent to remove
4364 *
4365 * Result:
4366 *	The index of the extent that was removed.
4367 */
4368static void remove_free_extent_list(struct hfsmount *hfsmp, int index)
4369{
4370	if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) {
4371		if (ALLOC_DEBUG)
4372			panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
4373		else
4374			printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
4375		return;
4376	}
4377	int shift_count = hfsmp->vcbFreeExtCnt - index - 1;
4378	if (shift_count > 0) {
4379		memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
4380	}
4381	hfsmp->vcbFreeExtCnt--;
4382}
4383
4384
4385/*
4386 * Add an extent to the list of free extents.
4387 *
4388 * This is a low-level routine.	 It does not handle overlaps or coalescing;
4389 * that is the responsibility of the caller.  This routine *does* make
4390 * sure that the extent it is adding is inserted in the correct location.
4391 * If the list is full, this routine will handle either removing the last
4392 * extent in the list to make room for the new extent, or ignoring the
4393 * new extent if it is "worse" than the last extent in the list.
4394 *
4395 * Inputs:
4396 *	startBlock - Start of extent to add
4397 *	blockCount - Number of blocks in extent to add
4398 *
4399 * Result:
4400 *	The index where the extent that was inserted, or kMaxFreeExtents
4401 *	if the extent was not inserted (the list was full, and the extent
4402 *	being added was "worse" than everything in the list).
4403 */
4404static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4405{
4406	uint32_t i;
4407
4408	/* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
4409	if (ALLOC_DEBUG) {
4410		uint32_t endBlock = startBlock + blockCount;
4411		for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4412			if (endBlock < hfsmp->vcbFreeExt[i].startBlock ||
4413				startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) {
4414					continue;
4415			}
4416			panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d",
4417				hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i);
4418		}
4419	}
4420
4421	/* Figure out what index the new extent should be inserted at. */
4422	for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4423		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4424			/* The list is sorted by increasing offset. */
4425			if (startBlock < hfsmp->vcbFreeExt[i].startBlock) {
4426				break;
4427			}
4428		} else {
4429			/* The list is sorted by decreasing size. */
4430			if (blockCount > hfsmp->vcbFreeExt[i].blockCount) {
4431				break;
4432			}
4433		}
4434	}
4435
4436	/* When we get here, i is the index where the extent should be inserted. */
4437	if (i == kMaxFreeExtents) {
4438		/*
4439		 * The new extent is worse than anything already in the list,
4440		 * and the list is full, so just ignore the extent to be added.
4441		 */
4442		return i;
4443	}
4444
4445	/*
4446	 * Grow the list (if possible) to make room for an insert.
4447	 */
4448	if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents)
4449		hfsmp->vcbFreeExtCnt++;
4450
4451	/*
4452	 * If we'll be keeping any extents after the insert position, then shift them.
4453	 */
4454	int shift_count = hfsmp->vcbFreeExtCnt - i - 1;
4455	if (shift_count > 0) {
4456		memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
4457	}
4458
4459	/* Finally, store the new extent at its correct position. */
4460	hfsmp->vcbFreeExt[i].startBlock = startBlock;
4461	hfsmp->vcbFreeExt[i].blockCount = blockCount;
4462	return i;
4463}
4464
4465
4466/*
4467 * Remove an entry from free extent cache after it has been allocated.
4468 *
4469 * This is a high-level routine.  It handles removing a portion of a
4470 * cached extent, potentially splitting it into two (if the cache was
4471 * already full, throwing away the extent that would sort last).  It
4472 * also handles removing an extent that overlaps multiple extents in
4473 * the cache.
4474 *
4475 * Inputs:
4476 *	hfsmp		- mount point structure
4477 *	startBlock	- starting block of the extent to be removed.
4478 *	blockCount	- number of blocks of the extent to be removed.
4479 */
4480static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4481{
4482	u_int32_t i, insertedIndex;
4483	u_int32_t currentStart, currentEnd, endBlock;
4484	int extentsRemoved = 0;
4485
4486#if CONFIG_HFS_ALLOC_RBTREE
4487	/* If red-black tree is enabled, no free extent cache is necessary */
4488	if (hfs_isrbtree_active(hfsmp) == true) {
4489		return;
4490	}
4491#endif
4492
4493	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4494		KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
4495
4496	endBlock = startBlock + blockCount;
4497
4498	lck_spin_lock(&hfsmp->vcbFreeExtLock);
4499
4500	/*
4501	 * Iterate over all of the extents in the free extent cache, removing or
4502	 * updating any entries that overlap with the input extent.
4503	 */
4504	for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4505		currentStart = hfsmp->vcbFreeExt[i].startBlock;
4506		currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount;
4507
4508		/*
4509		 * If the current extent is entirely before or entirely after the
4510		 * the extent to be removed, then we keep it as-is.
4511		 */
4512		if (currentEnd <= startBlock || currentStart >= endBlock) {
4513			continue;
4514		}
4515
4516		/*
4517		 * If the extent being removed entirely contains the current extent,
4518		 * then remove the current extent.
4519		 */
4520		if (startBlock <= currentStart && endBlock >= currentEnd) {
4521			remove_free_extent_list(hfsmp, i);
4522
4523			/*
4524			 * We just removed the extent at index i.  The extent at
4525			 * index i+1 just got shifted to index i.  So decrement i
4526			 * to undo the loop's "++i", and the next iteration will
4527			 * examine index i again, which contains the next extent
4528			 * in the list.
4529			 */
4530			--i;
4531			++extentsRemoved;
4532			continue;
4533		}
4534
4535		/*
4536		 * If the extent being removed is strictly "in the middle" of the
4537		 * current extent, then we need to split the current extent into
4538		 * two discontiguous extents (the "head" and "tail").  The good
4539		 * news is that we don't need to examine any other extents in
4540		 * the list.
4541		 */
4542		if (startBlock > currentStart && endBlock < currentEnd) {
4543			remove_free_extent_list(hfsmp, i);
4544			add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
4545			add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
4546			break;
4547		}
4548
4549		/*
4550		 * The only remaining possibility is that the extent to be removed
4551		 * overlaps the start or end (but not both!) of the current extent.
4552		 * So we need to replace the current extent with a shorter one.
4553		 *
4554		 * The only tricky part is that the updated extent might be at a
4555		 * different index than the original extent.  If the updated extent
4556		 * was inserted after the current extent, then we need to re-examine
4557		 * the entry at index i, since it now contains the extent that was
4558		 * previously at index i+1.	 If the updated extent was inserted
4559		 * before or at the same index as the removed extent, then the
4560		 * following extents haven't changed position.
4561		 */
4562		remove_free_extent_list(hfsmp, i);
4563		if (startBlock > currentStart) {
4564			/* Remove the tail of the current extent. */
4565			insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
4566		} else {
4567			/* Remove the head of the current extent. */
4568			insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
4569		}
4570		if (insertedIndex > i) {
4571			--i;	/* Undo the "++i" in the loop, so we examine the entry at index i again. */
4572		}
4573	}
4574
4575	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4576
4577	sanity_check_free_ext(hfsmp, 0);
4578
4579	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4580		KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
4581
4582	return;
4583}
4584
4585
4586/*
4587 * Add an entry to free extent cache after it has been deallocated.
4588 *
4589 * This is a high-level routine.  It will merge overlapping or contiguous
4590 * extents into a single, larger extent.
4591 *
4592 * If the extent provided has blocks beyond current allocLimit, it is
4593 * clipped to allocLimit (so that we won't accidentally find and allocate
4594 * space beyond allocLimit).
4595 *
4596 * Inputs:
4597 *	hfsmp		- mount point structure
4598 *	startBlock	- starting block of the extent to be removed.
4599 *	blockCount	- number of blocks of the extent to be removed.
4600 *
4601 * Returns:
4602 *	true		- if the extent was added successfully to the list
4603 *	false		- if the extent was not added to the list, maybe because
4604 *			  the extent was beyond allocLimit, or is not best
4605 *			  candidate to be put in the cache.
4606 */
4607static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4608{
4609	Boolean retval = false;
4610	uint32_t endBlock;
4611	uint32_t currentEnd;
4612	uint32_t i;
4613
4614	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4615		KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
4616
4617	/*
4618	 * If using the red-black tree allocator, then there's no need to special case
4619	 * for the sparse device case.	We'll simply add the region we've recently freed
4620	 * to the red-black tree, where it will get sorted by offset and length.  The only special
4621	 * casing will need to be done on the allocation side, where we may favor free extents
4622	 * based on offset even if it will cause fragmentation.	 This may be true, for example, if
4623	 * we are trying to reduce the number of bandfiles created in a sparse bundle disk image.
4624	 */
4625#if CONFIG_HFS_ALLOC_RBTREE
4626	if (hfs_isrbtree_active(hfsmp) == true) {
4627		goto out_not_locked;
4628	}
4629#endif
4630
4631	/* No need to add extent that is beyond current allocLimit */
4632	if (startBlock >= hfsmp->allocLimit) {
4633		goto out_not_locked;
4634	}
4635
4636	/* If end of the free extent is beyond current allocLimit, clip the extent */
4637	if ((startBlock + blockCount) > hfsmp->allocLimit) {
4638		blockCount = hfsmp->allocLimit - startBlock;
4639	}
4640
4641	lck_spin_lock(&hfsmp->vcbFreeExtLock);
4642
4643	/*
4644	 * Make a pass through the free extent cache, looking for known extents that
4645	 * overlap or are contiguous with the extent to be added.  We'll remove those
4646	 * extents from the cache, and incorporate them into the new extent to be added.
4647	 */
4648	endBlock = startBlock + blockCount;
4649	for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) {
4650		currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount;
4651		if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) {
4652			/* Extent i does not overlap and is not contiguous, so keep it. */
4653			continue;
4654		} else {
4655			/* We need to remove extent i and combine it with the input extent. */
4656			if (hfsmp->vcbFreeExt[i].startBlock < startBlock)
4657				startBlock = hfsmp->vcbFreeExt[i].startBlock;
4658			if (currentEnd > endBlock)
4659				endBlock = currentEnd;
4660
4661			remove_free_extent_list(hfsmp, i);
4662			/*
4663			 * We just removed the extent at index i.  The extent at
4664			 * index i+1 just got shifted to index i.  So decrement i
4665			 * to undo the loop's "++i", and the next iteration will
4666			 * examine index i again, which contains the next extent
4667			 * in the list.
4668			 */
4669			--i;
4670		}
4671	}
4672	add_free_extent_list(hfsmp, startBlock, endBlock - startBlock);
4673
4674	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4675
4676out_not_locked:
4677	sanity_check_free_ext(hfsmp, 0);
4678
4679	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4680		KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
4681
4682	return retval;
4683}
4684
4685/* Debug function to check if the free extent cache is good or not */
4686static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
4687{
4688	u_int32_t i, j;
4689
4690	/* Do not do anything if debug is not on, or if we're using the red-black tree */
4691	if ((ALLOC_DEBUG == 0) || (hfs_isrbtree_active(hfsmp) == true)) {
4692		return;
4693	}
4694
4695	lck_spin_lock(&hfsmp->vcbFreeExtLock);
4696
4697	if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents)
4698		panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt);
4699
4700	/*
4701	 * Iterate the Free extent cache and ensure no entries are bogus or refer to
4702	 * allocated blocks.
4703	 */
4704	for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
4705		u_int32_t start, nblocks;
4706
4707		start   = hfsmp->vcbFreeExt[i].startBlock;
4708		nblocks = hfsmp->vcbFreeExt[i].blockCount;
4709
4710		//printf ("hfs: %p: slot:%d (%u,%u)\n", hfsmp, i, start, nblocks);
4711
4712		/* Check if any of the blocks in free extent cache are allocated.
4713		 * This should not be enabled always because it might take
4714		 * very long for large extents that get added to the list.
4715		 *
4716		 * We have to drop vcbFreeExtLock while we call hfs_isallocated
4717		 * because it is going to do I/O.  Note that the free extent
4718		 * cache could change.  That's a risk we take when using this
4719		 * debugging code.  (Another alternative would be to try to
4720		 * detect when the free extent cache changed, and perhaps
4721		 * restart if the list changed while we dropped the lock.)
4722		 */
4723		if (check_allocated) {
4724			lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4725			if (hfs_isallocated(hfsmp, start, nblocks)) {
4726				panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
4727					  hfsmp, i, start, nblocks);
4728			}
4729			lck_spin_lock(&hfsmp->vcbFreeExtLock);
4730		}
4731
4732		/* Check if any part of the extent is beyond allocLimit */
4733		if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
4734			panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
4735					hfsmp, i, start, nblocks, hfsmp->allocLimit);
4736		}
4737
4738		/* Check if there are any duplicate start blocks */
4739		for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
4740			if (start == hfsmp->vcbFreeExt[j].startBlock) {
4741				panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n",
4742				      hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock,
4743				      hfsmp->vcbFreeExt[j].blockCount);
4744			}
4745		}
4746
4747		/* Check if the entries are out of order */
4748		if ((i+1) != hfsmp->vcbFreeExtCnt) {
4749			if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4750				/* sparse devices are sorted by starting block number (ascending) */
4751				if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
4752					panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n",
4753						hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
4754						hfsmp->vcbFreeExt[i+1].blockCount);
4755				}
4756			} else {
4757				/* normally sorted by block count (descending) */
4758				if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
4759					panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n",
4760						hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
4761						hfsmp->vcbFreeExt[i+1].blockCount);
4762				}
4763			}
4764		}
4765	}
4766	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4767}
4768