1/*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29	File:		VolumeAllocation.c
30
31	Contains:	Routines for accessing and modifying the volume bitmap.
32
33	Version:	HFS Plus 1.0
34
35	Copyright:	� 1996-2009 by Apple Computer, Inc., all rights reserved.
36
37*/
38
39/*
40Public routines:
41	BlockAllocate
42					Allocate space on a volume.  Can allocate space contiguously.
43					If not contiguous, then allocation may be less than what was
44					asked for.  Returns the starting block number, and number of
45					blocks.  (Will only do a single extent???)
46	BlockDeallocate
47					Deallocate a contiguous run of allocation blocks.
48
49	BlockMarkAllocated
50					Exported wrapper to mark blocks as in-use.  This will correctly determine
51					whether or not the red-black tree is enabled and call the appropriate function
52					if applicable.
53	BlockMarkFree
54					Exported wrapper to mark blocks as freed.  This will correctly determine whether or
55					not the red-black tree is enabled and call the appropriate function if applicable.
56
57
58	ResetVCBFreeExtCache
59					Since the red-black tree obviates the need to maintain the free extent cache, we do
60					not update it if the tree is also live.  As a result, if we ever need to destroy the trees
61					we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
62					bitmap scanning allocator.
63					We also reset and disable the free extent cache when volume resizing is
64					in flight.
65
66	UpdateAllocLimit
67					Adjusts the AllocLimit field in the hfs mount point.  This is used when we need to prevent
68					allocations from occupying space in the region we are modifying during a filesystem resize.
69					At other times, it should be consistent with the total number of allocation blocks in the
70					filesystem.  It is also used to shrink or grow the number of blocks that the red-black tree should
71					know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
72					number of items in the tree that we can allocate from.
73
74	ScanUnmapBlocks
75					Traverse the entire allocation bitmap.  Potentially issue DKIOCUNMAPs to the device as it
76					tracks unallocated ranges when iterating the volume bitmap.  Additionally, build up the in-core
77					summary table of the allocation bitmap.
78
79Internal routines:
80	BlockMarkFreeInternal
81					Mark a contiguous range of blocks as free.  The corresponding
82					bits in the volume bitmap will be cleared.  This will actually do the work
83					of modifying the bitmap for us.
84
85	BlockMarkAllocatedInternal
86					Mark a contiguous range of blocks as allocated.  The cor-
87					responding bits in the volume bitmap are set.  Also tests to see
88					if any of the blocks were previously unallocated.
89	BlockFindContiguous
90					Find a contiguous range of blocks of a given size.  The caller
91					specifies where to begin the search (by block number).  The
92					block number of the first block in the range is returned.  This is only
93					called by the bitmap scanning logic as the red-black tree should be able
94					to do this internally by searching its tree.
95	BlockAllocateAny
96					Find and allocate a contiguous range of blocks up to a given size.  The
97					first range of contiguous free blocks found are allocated, even if there
98					are fewer blocks than requested (and even if a contiguous range of blocks
99					of the given size exists elsewhere).
100	BlockAllocateAnyBitmap
101					Finds a range of blocks per the above requirements without using the
102					Allocation RB Tree.  This relies on the bitmap-scanning logic in order to find
103					any valid range of free space needed.
104	BlockAllocateContig
105					Find and allocate a contiguous range of blocks of a given size.  If
106					a contiguous range of free blocks of the given size isn't found, then
107					the allocation fails (i.e. it is "all or nothing").
108	BlockAllocateKnown
109					Try to allocate space from known free space in the volume's
110					free extent cache.
111	ReadBitmapBlock
112					Given an allocation block number, read the bitmap block that
113					contains that allocation block into a caller-supplied buffer.
114
115	ReleaseBitmapBlock
116					Release a bitmap block back into the buffer cache.
117
118	ReadBitmapRange
119					Given an allocation block number, read a range of bitmap that
120					must begin at that allocation block into a caller supplied buffer.
121
122	ReleaseBitmapRange
123					Release and invalidate a buf_t corresponding to the bitmap
124					back into the UBC in order to prevent coherency issues.
125
126	remove_free_extent_cache
127					Remove an extent from the free extent cache.  Handles overlaps
128					with multiple extents in the cache, and handles splitting an
129					extent in the cache if the extent to be removed is in the middle
130					of a cached extent.
131
132	add_free_extent_cache
133					Add an extent to the free extent cache.  It will merge the
134					input extent with extents already in the cache.
135	CheckUnmappedBytes
136					Check whether or not the current transaction
137					has allocated blocks that were recently freed. This may have data safety implications.
138
139
140
141Debug/Test Routines
142	hfs_isallocated
143					Test to see if any blocks in a range are allocated.  Journal or
144					allocation file lock must be held.
145
146	hfs_isallocated_scan
147					Test to see if any blocks in a range are allocated.  Releases and
148					invalidates the block used when finished.
149
150Optimization Routines
151	hfs_alloc_scan_block
152					Given a starting allocation block number, figures out which physical block contains that
153					allocation block's bit, and scans it from the starting bit until either the ending bit or
154					the end of the block.  Free space extents are inserted into the appropriate red-black tree.
155
156*/
157
158#include "../../hfs_macos_defs.h"
159
160#include <sys/types.h>
161#include <sys/buf.h>
162#include <sys/systm.h>
163#include <sys/sysctl.h>
164#include <sys/disk.h>
165#include <sys/ubc.h>
166#include <sys/uio.h>
167#include <kern/kalloc.h>
168#include <sys/malloc.h>
169
170/* For VM Page size */
171#include <libkern/libkern.h>
172
173#include "../../hfs.h"
174#include "../../hfs_dbg.h"
175#include "../../hfs_format.h"
176#include "../../hfs_endian.h"
177#include "../../hfs_macos_defs.h"
178#include "../headers/FileMgrInternal.h"
179#include "../../hfs_kdebug.h"
180
181/* Headers for unmap-on-mount support */
182#include <vfs/vfs_journal.h>
183#include <sys/disk.h>
184
185#ifndef CONFIG_HFS_TRIM
186#define CONFIG_HFS_TRIM 0
187#endif
188
189/*
190 * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
191 * KERNEL_DEBUG_CONSTANT events are enabled at runtime.  (They're
192 * disabled by default because there can be a lot of these events,
193 * and we don't want to overwhelm the kernel debug buffer.  If you
194 * want to watch these events in particular, just set the sysctl.)
195 */
196static int hfs_kdebug_allocation = 0;
197SYSCTL_DECL(_vfs_generic);
198SYSCTL_NODE(_vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system");
199SYSCTL_NODE(_vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug");
200SYSCTL_INT(_vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations");
201enum {
202	/*
203	 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
204	 * BlockDeallocate, including the internal BlockAllocateXxx
205	 * routines so we can see how an allocation was satisfied.
206	 *
207	 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
208	 * free extent cache.
209	 *
210	 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
211	 *
212	 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
213	 * or clearing bits, scanning the bitmap).
214	 */
215	HFSDBG_ALLOC_ENABLED		= 1,
216	HFSDBG_EXT_CACHE_ENABLED	= 2,
217	HFSDBG_UNMAP_ENABLED		= 4,
218	HFSDBG_BITMAP_ENABLED		= 8
219};
220
221enum {
222	kBytesPerWord			=	4,
223	kBitsPerByte			=	8,
224	kBitsPerWord			=	32,
225
226	kBitsWithinWordMask		=	kBitsPerWord-1
227};
228
229#define kLowBitInWordMask	0x00000001ul
230#define kHighBitInWordMask	0x80000000ul
231#define kAllBitsSetInWord	0xFFFFFFFFul
232
233#define HFS_MIN_SUMMARY_BLOCKSIZE 4096
234
235#define ALLOC_DEBUG 0
236
237static OSErr ReadBitmapBlock(
238		ExtendedVCB		*vcb,
239		u_int32_t		bit,
240		u_int32_t		**buffer,
241		uintptr_t		*blockRef);
242
243static OSErr ReleaseBitmapBlock(
244		ExtendedVCB		*vcb,
245		uintptr_t		blockRef,
246		Boolean			dirty);
247
248static OSErr BlockAllocateAny(
249		ExtendedVCB		*vcb,
250		u_int32_t		startingBlock,
251		u_int32_t		endingBlock,
252		u_int32_t		maxBlocks,
253		u_int32_t		flags,
254		Boolean			trustSummary,
255		u_int32_t		*actualStartBlock,
256		u_int32_t		*actualNumBlocks);
257
258static OSErr BlockAllocateAnyBitmap(
259		ExtendedVCB		*vcb,
260		u_int32_t		startingBlock,
261		u_int32_t		endingBlock,
262		u_int32_t		maxBlocks,
263		u_int32_t		flags,
264		u_int32_t		*actualStartBlock,
265		u_int32_t		*actualNumBlocks);
266
267static OSErr BlockAllocateContig(
268		ExtendedVCB		*vcb,
269		u_int32_t		startingBlock,
270		u_int32_t		minBlocks,
271		u_int32_t		maxBlocks,
272		u_int32_t		flags,
273		u_int32_t		*actualStartBlock,
274		u_int32_t		*actualNumBlocks);
275
276static OSErr BlockFindContiguous(
277		ExtendedVCB		*vcb,
278		u_int32_t		startingBlock,
279		u_int32_t		endingBlock,
280		u_int32_t		minBlocks,
281		u_int32_t		maxBlocks,
282		Boolean			useMetaZone,
283		Boolean			trustSummary,
284		u_int32_t		*actualStartBlock,
285		u_int32_t		*actualNumBlocks);
286
287static OSErr BlockAllocateKnown(
288		ExtendedVCB		*vcb,
289		u_int32_t		maxBlocks,
290		u_int32_t		*actualStartBlock,
291		u_int32_t		*actualNumBlocks);
292
293static OSErr BlockMarkAllocatedInternal (
294		ExtendedVCB		*vcb,
295		u_int32_t		startingBlock,
296		register u_int32_t	numBlocks);
297
298static OSErr BlockMarkFreeInternal(
299		ExtendedVCB	*vcb,
300		u_int32_t	startingBlock,
301		u_int32_t	numBlocks,
302		Boolean 	do_validate);
303
304
305static OSErr ReadBitmapRange (struct hfsmount *hfsmp, uint32_t offset, uint32_t iosize,
306		uint32_t **buffer, struct buf **blockRef);
307
308static OSErr ReleaseScanBitmapRange( struct buf *bp );
309
310static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset,
311		u_int32_t numBlocks, struct jnl_trim_list *list);
312
313static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list);
314
315static int hfs_alloc_scan_range(struct hfsmount *hfsmp,
316		u_int32_t startbit,
317		u_int32_t *bitToScan,
318		struct jnl_trim_list *list);
319
320static int hfs_scan_range_size (struct hfsmount* hfsmp, uint32_t start, uint32_t *iosize);
321static uint32_t CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recent, uint32_t *next);
322
323/* Bitmap Re-use Detection */
324static inline int extents_overlap (uint32_t start1, uint32_t len1,
325		uint32_t start2, uint32_t len2) {
326	return !( ((start1 + len1) <= start2) || ((start2 + len2) <= start1) );
327}
328
329
330int hfs_isallocated_scan (struct hfsmount *hfsmp,
331		u_int32_t startingBlock,
332		u_int32_t *bp_buf);
333
334/* Summary Table Functions */
335static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse);
336static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t *index);
337static int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
338static int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t *alloc);
339static int hfs_release_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t length);
340static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t *freeblocks);
341static int hfs_rebuild_summary (struct hfsmount *hfsmp);
342
343#if 0
344static int hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
345#endif
346
347/* Used in external mount code to initialize the summary table */
348int hfs_init_summary (struct hfsmount *hfsmp);
349
350#if ALLOC_DEBUG
351void hfs_validate_summary (struct hfsmount *hfsmp);
352#endif
353
354
355/* Functions for manipulating free extent cache */
356static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
357static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
358static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
359
360#if ALLOC_DEBUG
361/*
362 * Validation Routine to verify that the TRIM list maintained by the journal
363 * is in good shape relative to what we think the bitmap should have.  We should
364 * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
365 * we panic.
366 */
367int trim_validate_bitmap (struct hfsmount *hfsmp);
368int trim_validate_bitmap (struct hfsmount *hfsmp) {
369	u_int64_t blockno_offset;
370	u_int64_t numblocks;
371	int i;
372	int count;
373	u_int32_t startblk;
374	u_int32_t blks;
375	int err = 0;
376	uint32_t alloccount = 0;
377
378	if (hfsmp->jnl) {
379		struct journal *jnl = (struct journal*)hfsmp->jnl;
380		if (jnl->active_tr) {
381			struct jnl_trim_list *trim = &(jnl->active_tr->trim);
382			count = trim->extent_count;
383			for (i = 0; i < count; i++) {
384				blockno_offset = trim->extents[i].offset;
385				blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
386				blockno_offset = blockno_offset / hfsmp->blockSize;
387				numblocks = trim->extents[i].length / hfsmp->blockSize;
388
389				startblk = (u_int32_t)blockno_offset;
390				blks = (u_int32_t) numblocks;
391				err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
392
393				if (err == 0 && alloccount != 0) {
394					panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
395				}
396			}
397		}
398	}
399	return 0;
400}
401
402#endif
403
404
405/*
406 ;________________________________________________________________________________
407 ;
408 ; Routine:		hfs_unmap_free_extent
409 ;
410 ; Function:		Make note of a range of allocation blocks that should be
411 ;				unmapped (trimmed).  That is, the given range of blocks no
412 ;				longer have useful content, and the device can unmap the
413 ;				previous contents.  For example, a solid state disk may reuse
414 ;				the underlying storage for other blocks.
415 ;
416 ;				This routine is only supported for journaled volumes.  The extent
417 ;				being freed is passed to the journal code, and the extent will
418 ;				be unmapped after the current transaction is written to disk.
419 ;
420 ; Input Arguments:
421 ;	hfsmp			- The volume containing the allocation blocks.
422 ;	startingBlock	- The first allocation block of the extent being freed.
423 ;	numBlocks		- The number of allocation blocks of the extent being freed.
424 ;________________________________________________________________________________
425 */
426static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
427{
428	u_int64_t offset;
429	u_int64_t length;
430	u_int64_t device_sz;
431	int err = 0;
432
433	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
434		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
435
436	if (ALLOC_DEBUG) {
437		if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
438			panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks);
439		}
440	}
441
442	if (hfsmp->jnl != NULL) {
443		device_sz = hfsmp->hfs_logical_bytes;
444		offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
445		length = (u_int64_t) numBlocks * hfsmp->blockSize;
446
447		/* Validate that the trim is in a valid range of bytes */
448		if ((offset >= device_sz) || ((offset + length) > device_sz)) {
449			printf("hfs_unmap_free_ext: ignoring trim vol=%s @ off %lld len %lld \n", hfsmp->vcbVN, offset, length);
450			err = EINVAL;
451		}
452
453		if (err == 0) {
454			err = journal_trim_add_extent(hfsmp->jnl, offset, length);
455			if (err) {
456				printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent for vol=%s", err, hfsmp->vcbVN);
457			}
458		}
459	}
460
461	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
462		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
463}
464
465/*
466 ;________________________________________________________________________________
467 ;
468 ; Routine:		hfs_track_unmap_blocks
469 ;
470 ; Function:	Make note of a range of allocation blocks that should be
471 ;				unmapped (trimmed).  That is, the given range of blocks no
472 ;				longer have useful content, and the device can unmap the
473 ;				previous contents.  For example, a solid state disk may reuse
474 ;				the underlying storage for other blocks.
475 ;
476 ;				This routine is only supported for journaled volumes.
477 ;
478 ;              *****NOTE*****:
479 ;              This function should *NOT* be used when the volume is fully
480 ;              mounted.  This function is intended to support a bitmap iteration
481 ;              at mount time to fully inform the SSD driver of the state of all blocks
482 ;              at mount time, and assumes that there is no allocation/deallocation
483 ;              interference during its iteration.,
484 ;
485 ; Input Arguments:
486 ;	hfsmp			- The volume containing the allocation blocks.
487 ;	offset          - The first allocation block of the extent being freed.
488 ;	numBlocks		- The number of allocation blocks of the extent being freed.
489 ;  list            - The list of currently tracked trim ranges.
490 ;________________________________________________________________________________
491 */
492static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start,
493		u_int32_t numBlocks, struct jnl_trim_list *list) {
494
495	u_int64_t offset;
496	u_int64_t length;
497	int error = 0;
498
499	if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) {
500		int extent_no = list->extent_count;
501		offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
502		length = (u_int64_t) numBlocks * hfsmp->blockSize;
503
504
505		list->extents[extent_no].offset = offset;
506		list->extents[extent_no].length = length;
507		list->extent_count++;
508		if (list->extent_count == list->allocated_count) {
509			error = hfs_issue_unmap (hfsmp, list);
510		}
511	}
512
513	return error;
514}
515
516/*
517 ;________________________________________________________________________________
518 ;
519 ; Routine:		hfs_issue_unmap
520 ;
521 ; Function:	Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
522 ;
523 ; Input Arguments:
524 ;	hfsmp			- The volume containing the allocation blocks.
525 ;  list            - The list of currently tracked trim ranges.
526 ;________________________________________________________________________________
527 */
528
529static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list)
530{
531	dk_unmap_t unmap;
532	int error = 0;
533
534	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
535		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
536	}
537
538	if (list->extent_count > 0) {
539		bzero(&unmap, sizeof(unmap));
540		unmap.extents = list->extents;
541		unmap.extentsCount = list->extent_count;
542
543		if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
544			KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_NONE, hfsmp->hfs_raw_dev, unmap.extentsCount, 0, 0, 0);
545		}
546
547#if CONFIG_PROTECT
548		/*
549		 * If we have not yet completed the first scan through the bitmap, then
550		 * optionally inform the block driver below us that this is an initialization
551		 * TRIM scan, if it can deal with this information.
552		 */
553		if ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED) == 0) {
554			unmap.options |= _DK_UNMAP_INITIALIZE;
555		}
556#endif
557		/* Issue a TRIM and flush them out */
558		error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
559
560		bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t)));
561		bzero (&unmap, sizeof(unmap));
562		list->extent_count = 0;
563	}
564
565	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
566		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
567	}
568
569	return error;
570}
571
572/*
573 ;________________________________________________________________________________
574 ;
575 ; Routine:		hfs_unmap_alloc_extent
576 ;
577 ; Function:		Make note of a range of allocation blocks, some of
578 ;				which may have previously been passed to hfs_unmap_free_extent,
579 ;				is now in use on the volume.  The given blocks will be removed
580 ;				from any pending DKIOCUNMAP.
581 ;
582 ; Input Arguments:
583 ;	hfsmp			- The volume containing the allocation blocks.
584 ;	startingBlock	- The first allocation block of the extent being allocated.
585 ;	numBlocks		- The number of allocation blocks being allocated.
586 ;________________________________________________________________________________
587 */
588
589static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
590{
591	u_int64_t offset;
592	u_int64_t length;
593	int err;
594
595	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
596		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
597
598	if (hfsmp->jnl != NULL) {
599		offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
600		length = (u_int64_t) numBlocks * hfsmp->blockSize;
601
602		err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
603		if (err) {
604			printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent for vol=%s", err, hfsmp->vcbVN);
605		}
606	}
607
608	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
609		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
610}
611
612
613/*
614;________________________________________________________________________________
615;
616; Routine:		hfs_trim_callback
617;
618; Function:		This function is called when a transaction that freed extents
619;				(via hfs_unmap_free_extent/journal_trim_add_extent) has been
620;				written to the on-disk journal.  This routine will add those
621;				extents to the free extent cache so that they can be reused.
622;
623;				CAUTION: This routine is called while the journal's trim lock
624;				is held shared, so that no other thread can reuse any portion
625;				of those extents.  We must be very careful about which locks
626;				we take from within this callback, to avoid deadlock.  The
627;				call to add_free_extent_cache will end up taking the cache's
628;				lock (just long enough to add these extents to the cache).
629;
630;				CAUTION: If the journal becomes invalid (eg., due to an I/O
631;				error when trying to write to the journal), this callback
632;				will stop getting called, even if extents got freed before
633;				the journal became invalid!
634;
635; Input Arguments:
636;	arg				- The hfsmount of the volume containing the extents.
637;	extent_count	- The number of extents freed in the transaction.
638;	extents			- An array of extents (byte ranges) that were freed.
639;________________________________________________________________________________
640*/
641
642__private_extern__ void
643hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
644{
645	uint32_t i;
646	uint32_t startBlock, numBlocks;
647	struct hfsmount *hfsmp = arg;
648
649	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
650		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
651
652	for (i=0; i<extent_count; ++i) {
653		/* Convert the byte range in *extents back to a range of allocation blocks. */
654		startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
655		numBlocks = extents[i].length / hfsmp->blockSize;
656		(void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
657	}
658
659	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
660		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
661}
662
663
664/*
665   ;________________________________________________________________________________
666   ;
667   ; Routine:		CheckUnmappedBytes
668   ;
669   ; Function:	From the specified inputs, determine if the extent in question overlaps
670   ;				space that was recently freed, where the recently freed space may still be
671   ;				lingering in an uncommitted journal transaction.  This may have data safety
672   ;				implications.  The intended use is to decide whether or not to force a journal flush
673   ;				before allowing file data I/O to be issued.  If we did not do this
674   ;				then it would be possible to issue the file I/O ahead of the
675   ;				journal, resulting in data being overwritten if the transaction either
676   ;				is not committed or cannot be replayed.
677   ;
678   ;		NOTE: This function assumes that the journal and catalog/extent locks are held.
679   ;
680   ; Input Arguments:
681   ;	hfsmp			- The volume containing the allocation blocks.
682   ;	foffset			- start of the extent in question (in allocation blocks)
683   ;	numbytes		- number of blocks in the extent.
684   ;  recently_freed:	- output pointer containing whether or not the blocks were freed recently
685   ;  overlap_end 		- end of the overlap between the argument extent and the trim list (in allocation blocks)
686   ;
687   ; Output:
688   ;
689   ; 		Returns 0 if we could determine extent validity for this (or a previous transaction)
690   ; 		Returns errno if there was an error
691   ;
692   ;		If returned 0, then recently freed will contain a boolean that indicates
693   ;		that it was recently freed.
694   ;________________________________________________________________________________
695 */
696
697u_int32_t
698CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recently_freed, uint32_t *overlap_end) {
699	uint64_t device_offset;
700	uint64_t numbytes;
701	uint32_t err = 0;
702	uint64_t lba_overlap_end;
703
704	if (hfsmp->jnl != NULL) {
705		/*
706		 * Convert the allocation block # and the number of blocks into device-relative
707		 * offsets so that they can be compared using the TRIM list.
708		 */
709		uint64_t device_sz = hfsmp->hfs_logical_bytes;
710		device_offset = blockno * ((uint64_t)hfsmp->blockSize);
711		device_offset += hfsmp->hfsPlusIOPosOffset;
712		numbytes = (((uint64_t)hfsmp->blockSize) * numblocks);
713
714		/*
715		 * Since we check that the device_offset isn't too large, it's safe to subtract it
716		 * from the size in the second check.
717		 */
718		if ((device_offset >= device_sz) || (numbytes > (device_sz - device_offset))) {
719			return EINVAL;
720		}
721
722		/* Ask the journal if this extent overlaps with any pending TRIMs */
723		if (journal_trim_extent_overlap (hfsmp->jnl, device_offset, numbytes, &lba_overlap_end)) {
724			*recently_freed = 1;
725
726			/* Convert lba_overlap_end back into allocation blocks */
727			uint64_t end_offset = lba_overlap_end - hfsmp->hfsPlusIOPosOffset;
728			end_offset = end_offset / ((uint64_t) hfsmp->blockSize);
729			*overlap_end = (uint32_t) end_offset;
730		}
731		else {
732			*recently_freed = 0;
733		}
734		err = 0;
735	}
736	else {
737		/* There may not be a journal.  In that case, always return success.  */
738		*recently_freed = 0;
739	}
740	return err;
741
742}
743
744
745/*
746 ;________________________________________________________________________________
747 ;
748 ; Routine:		ScanUnmapBlocks
749 ;
750 ; Function:	Traverse the bitmap, and potentially issue DKIOCUNMAPs to the underlying
751 ;				device as needed so that the underlying disk device is as
752 ;				up-to-date as possible with which blocks are unmapped.
753 ;				Additionally build up the summary table as needed.
754 ;
755 ;				This function reads the bitmap in large block size
756 ; 				(up to 1MB) unlink the runtime which reads the bitmap
757 ; 				in 4K block size.  So if this function is being called
758 ;				after the volume is mounted and actively modified, the
759 ;				caller needs to invalidate all of the existing buffers
760 ;				associated with the bitmap vnode before calling this
761 ; 				function.  If the buffers are not invalidated, it can
762 ;				cause but_t collision and potential data corruption.
763 ;
764 ; Input Arguments:
765 ;	hfsmp			- The volume containing the allocation blocks.
766 ;________________________________________________________________________________
767 */
768
769__private_extern__
770u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp)
771{
772	u_int32_t blocks_scanned = 0;
773	int error = 0;
774	struct jnl_trim_list trimlist;
775
776	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
777		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
778	}
779
780	/*
781	 *struct jnl_trim_list {
782	 uint32_t    allocated_count;
783	 uint32_t    extent_count;
784	 dk_extent_t *extents;
785	 };
786	 */
787
788	/*
789	 * The scanning itself here is not tied to the presence of CONFIG_HFS_TRIM
790	 * which is now enabled for most architectures.  Instead, any trim related
791	 * work should be tied to whether the underlying storage media supports
792	 * UNMAP, as any solid state device would on desktop or embedded.
793	 *
794	 * We do this because we may want to scan the full bitmap on desktop
795	 * for spinning media for the purposes of building up the
796	 * summary table.
797	 *
798	 * We also avoid sending TRIMs down to the underlying media if the mount is read-only.
799	 */
800
801	if ((hfsmp->hfs_flags & HFS_UNMAP) &&
802			((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
803		/* If the underlying device supports unmap and the mount is read-write, initialize */
804		int alloc_count = PAGE_SIZE / sizeof(dk_extent_t);
805		void *extents = kalloc (alloc_count * sizeof(dk_extent_t));
806		if (extents == NULL) {
807			return ENOMEM;
808		}
809		bzero (&trimlist, sizeof(trimlist));
810		trimlist.extents = (dk_extent_t*)extents;
811		trimlist.allocated_count = alloc_count;
812		trimlist.extent_count = 0;
813	}
814
815	while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){
816
817		error = hfs_alloc_scan_range (hfsmp, blocks_scanned, &blocks_scanned, &trimlist);
818
819		if (error) {
820			printf("HFS: bitmap scan range error: %d on vol=%s\n", error, hfsmp->vcbVN);
821			break;
822		}
823	}
824
825	if ((hfsmp->hfs_flags & HFS_UNMAP) &&
826			((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
827		if (error == 0) {
828			hfs_issue_unmap(hfsmp, &trimlist);
829		}
830		if (trimlist.extents) {
831			kfree (trimlist.extents, (trimlist.allocated_count * sizeof(dk_extent_t)));
832		}
833	}
834
835	/*
836	 * This is in an #if block because hfs_validate_summary prototype and function body
837	 * will only show up if ALLOC_DEBUG is on, to save wired memory ever so slightly.
838	 */
839#if ALLOC_DEBUG
840	sanity_check_free_ext(hfsmp, 1);
841	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
842		/* Validate the summary table too! */
843		hfs_validate_summary(hfsmp);
844		printf("HFS: Summary validation complete on %s\n", hfsmp->vcbVN);
845	}
846#endif
847
848	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
849		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
850	}
851
852	return error;
853}
854
855/*
856 ;________________________________________________________________________________
857 ;
858 ; Routine:	   BlockAllocate
859 ;
860 ; Function:   Allocate space on a volume.	If contiguous allocation is requested,
861 ;			   at least the requested number of bytes will be allocated or an
862 ;			   error will be returned.	If contiguous allocation is not forced,
863 ;			   the space will be allocated with the first largest extent available
864 ;			   at the requested starting allocation block.  If there is not enough
865 ;			   room there, a block allocation of less than the requested size will be
866 ;			   allocated.
867 ;
868 ;			   If the requested starting block is 0 (for new file allocations),
869 ;			   the volume's allocation block pointer will be used as a starting
870 ;			   point.
871 ;
872 ; Input Arguments:
873 ;	 vcb			 - Pointer to ExtendedVCB for the volume to allocate space on
874 ;	 fcb			 - Pointer to FCB for the file for which storage is being allocated
875 ;	 startingBlock	 - Preferred starting allocation block, 0 = no preference
876 ;	 minBlocks	 	 - Number of blocks requested.	If the allocation is non-contiguous,
877 ;					   less than this may actually be allocated
878 ;	 maxBlocks	 	 - The maximum number of blocks to allocate.  If there is additional free
879 ;					   space after bytesRequested, then up to maxBlocks bytes should really
880 ;					   be allocated.  (Used by ExtendFileC to round up allocations to a multiple
881 ;					   of the file's clump size.)
882 ;	 flags           - Flags to specify options like contiguous, use metadata zone,
883 ;					   skip free block check, etc.
884 ;
885 ; Output:
886 ;	 (result)		 - Error code, zero for successful allocation
887 ;	 *startBlock	 - Actual starting allocation block
888 ;	 *actualBlccks	 - Actual number of allocation blocks allocated
889 ;
890 ; Side effects:
891 ;	 The volume bitmap is read and updated; the volume bitmap cache may be changed.
892 ;________________________________________________________________________________
893 */
894OSErr BlockAllocate (
895		ExtendedVCB		*vcb,				/* which volume to allocate space on */
896		u_int32_t		startingBlock,		/* preferred starting block, or 0 for no preference */
897		u_int32_t		minBlocks,		/* desired number of blocks to allocate */
898		u_int32_t		maxBlocks,		/* maximum number of blocks to allocate */
899		u_int32_t		flags,			/* option flags */
900		u_int32_t		*actualStartBlock,	/* actual first block of allocation */
901		u_int32_t		*actualNumBlocks)
902/*
903 *  actualNumBlocks is the number of blocks actually allocated;
904 * if forceContiguous was zero, then this may represent fewer than minBlocks
905 */
906{
907	u_int32_t  freeBlocks;
908	OSErr			err;
909	Boolean			updateAllocPtr = false;		//	true if nextAllocation needs to be updated
910	struct hfsmount	*hfsmp;
911	Boolean useMetaZone;
912	Boolean forceContiguous;
913	Boolean forceFlush;
914
915	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
916		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
917
918	if (flags & HFS_ALLOC_FORCECONTIG) {
919		forceContiguous = true;
920	} else {
921		forceContiguous = false;
922	}
923
924	if (flags & HFS_ALLOC_METAZONE) {
925		useMetaZone = true;
926	} else {
927		useMetaZone = false;
928	}
929
930	if (flags & HFS_ALLOC_FLUSHTXN) {
931		forceFlush = true;
932	}
933	else {
934		forceFlush = false;
935	}
936
937
938	//
939	//	Initialize outputs in case we get an error
940	//
941	*actualStartBlock = 0;
942	*actualNumBlocks = 0;
943	hfsmp = VCBTOHFS (vcb);
944	freeBlocks = hfs_freeblks(hfsmp, 0);
945
946
947	/* Skip free block check if blocks are being allocated for relocating
948	 * data during truncating a volume.
949	 *
950	 * During hfs_truncatefs(), the volume free block count is updated
951	 * before relocating data to reflect the total number of free blocks
952	 * that will exist on the volume after resize is successful.  This
953	 * means that we have reserved allocation blocks required for relocating
954	 * the data and hence there is no need to check the free blocks.
955	 * It will also prevent resize failure when the number of blocks in
956	 * an extent being relocated is more than the free blocks that will
957	 * exist after the volume is resized.
958	 */
959	if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
960		//	If the disk is already full, don't bother.
961		if (freeBlocks == 0) {
962			err = dskFulErr;
963			goto Exit;
964		}
965		if (forceContiguous && freeBlocks < minBlocks) {
966			err = dskFulErr;
967			goto Exit;
968		}
969
970		/*
971		 * Clip if necessary so we don't over-subscribe the free blocks.
972		 */
973		if (minBlocks > freeBlocks) {
974			minBlocks = freeBlocks;
975		}
976		if (maxBlocks > freeBlocks) {
977			maxBlocks = freeBlocks;
978		}
979	}
980
981	//
982	//	If caller didn't specify a starting block number, then use the volume's
983	//	next block to allocate from.
984	//
985	if (startingBlock == 0) {
986		hfs_lock_mount (hfsmp);
987
988		/* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
989		if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
990			startingBlock = vcb->sparseAllocation;
991		}
992		else {
993			startingBlock = vcb->nextAllocation;
994		}
995		hfs_unlock_mount(hfsmp);
996		updateAllocPtr = true;
997	}
998
999
1000	if (startingBlock >= vcb->allocLimit) {
1001		startingBlock = 0; /* overflow so start at beginning */
1002	}
1003
1004	//
1005	//	If the request must be contiguous, then find a sequence of free blocks
1006	//	that is long enough.  Otherwise, find the first free block.
1007	//
1008	if (forceContiguous) {
1009		err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks,
1010				flags, actualStartBlock, actualNumBlocks);
1011		/*
1012		 * If we allocated from a new position then also update the roving allocator.
1013		 * This will keep the roving allocation pointer up-to-date even
1014		 * if we are using the new R/B tree allocator, since
1015		 * it doesn't matter to us here, how the underlying allocator found
1016		 * the block to vend out.
1017		 */
1018		if ((err == noErr) &&
1019				(*actualStartBlock > startingBlock) &&
1020				((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
1021				 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
1022			updateAllocPtr = true;
1023		}
1024	} else {
1025		/*
1026		 * Scan the bitmap once, gather the N largest free extents, then
1027		 * allocate from these largest extents.  Repeat as needed until
1028		 * we get all the space we needed.  We could probably build up
1029		 * that list when the higher level caller tried (and failed) a
1030		 * contiguous allocation first.
1031		 *
1032		 * Note that the free-extent cache will be cease to be updated if
1033		 * we are using the red-black tree for allocations.  If we jettison
1034		 * the tree, then we will reset the free-extent cache and start over.
1035		 */
1036
1037		/* Disable HFS_ALLOC_FLUSHTXN if needed */
1038		if (forceFlush) {
1039			flags &= ~HFS_ALLOC_FLUSHTXN;
1040		}
1041
1042		/*
1043		 * BlockAllocateKnown only examines the free extent cache; anything in there will
1044		 * have been committed to stable storage already.
1045		 */
1046		err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks);
1047
1048		/* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */
1049
1050		if (err == dskFulErr) {
1051			/*
1052			 * Now we have to do a bigger scan.  Start at startingBlock and go up until the
1053			 * allocation limit.  We 'trust' the summary bitmap in this call, if it tells us
1054			 * that it could not find any free space.
1055			 */
1056			err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
1057					maxBlocks, flags, true,
1058					actualStartBlock, actualNumBlocks);
1059		}
1060		if (err == dskFulErr) {
1061			/*
1062			 * Vary the behavior here if the summary table is on or off.
1063			 * If it is on, then we don't trust it it if we get into this case and
1064			 * basically do a full scan for maximum coverage.
1065			 * If it is off, then we trust the above and go up until the startingBlock.
1066			 */
1067			if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1068				err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks,
1069						flags, false,
1070						actualStartBlock, actualNumBlocks);
1071			}
1072			else {
1073				err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks,
1074						flags, false,
1075						actualStartBlock, actualNumBlocks);
1076			}
1077
1078			/*
1079		     * Last Resort: Find/use blocks that may require a journal flush.
1080	 		 */
1081			if (err == dskFulErr && forceFlush) {
1082				flags |= HFS_ALLOC_FLUSHTXN;
1083				err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks,
1084						flags, false,
1085						actualStartBlock, actualNumBlocks);
1086			}
1087		}
1088	}
1089
1090Exit:
1091	if ((hfsmp->hfs_flags & HFS_CS) && *actualNumBlocks != 0) {
1092		errno_t ec;
1093		_dk_cs_map_t cm;
1094		uint64_t mapped_blocks;
1095
1096		cm.cm_extent.offset = (uint64_t)*actualStartBlock * hfsmp->blockSize + hfsmp->hfsPlusIOPosOffset;
1097		cm.cm_extent.length = (uint64_t)*actualNumBlocks * hfsmp->blockSize;
1098		cm.cm_bytes_mapped = 0;
1099		ec = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP, (caddr_t)&cm, 0, vfs_context_current());
1100		if (ec != 0 && ec != ENOSPC) {
1101			printf ("VNOP_IOCTL(_DKIOCCSMAP) returned an unexpected error code=%d\n", ec);
1102			err = ec;
1103			goto Exit_CS;
1104		}
1105		mapped_blocks = cm.cm_bytes_mapped / hfsmp->blockSize;
1106		/* CoreStorage returned more blocks than requested */
1107		if (mapped_blocks > *actualNumBlocks) {
1108			printf ("VNOP_IOCTL(_DKIOCCSMAP) mapped too many blocks, mapped=%lld, actual=%d\n",
1109					mapped_blocks, *actualNumBlocks);
1110		}
1111		if (*actualNumBlocks > mapped_blocks) {
1112			if (forceContiguous && mapped_blocks < minBlocks) {
1113				mapped_blocks = 0;
1114			}
1115		}
1116		uint64_t numBlocksToFree = *actualNumBlocks - mapped_blocks;
1117		uint64_t firstBlockToFree = *actualStartBlock + mapped_blocks;
1118		if (numBlocksToFree > 0) {
1119			err = BlockDeallocate(vcb, firstBlockToFree, numBlocksToFree, flags);
1120			if (err != noErr) {
1121				printf ("BlockDeallocate failed (err=%d)\n", err);
1122				goto Exit_CS;
1123			}
1124		}
1125		*actualNumBlocks = mapped_blocks;
1126		if (*actualNumBlocks == 0 && err == noErr) {
1127			err = dskFulErr;
1128		}
1129	}
1130Exit_CS:
1131	// if we actually allocated something then go update the
1132	// various bits of state that we maintain regardless of
1133	// whether there was an error (i.e. partial allocations
1134	// still need to update things like the free block count).
1135	//
1136	if (*actualNumBlocks != 0) {
1137		//
1138		//	If we used the volume's roving allocation pointer, then we need to update it.
1139		//	Adding in the length of the current allocation might reduce the next allocate
1140		//	call by avoiding a re-scan of the already allocated space.  However, the clump
1141		//	just allocated can quite conceivably end up being truncated or released when
1142		//	the file is closed or its EOF changed.  Leaving the allocation pointer at the
1143		//	start of the last allocation will avoid unnecessary fragmentation in this case.
1144		//
1145		hfs_lock_mount (hfsmp);
1146
1147		lck_spin_lock(&hfsmp->vcbFreeExtLock);
1148		if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) {
1149			vcb->sparseAllocation = *actualStartBlock;
1150		}
1151		lck_spin_unlock(&hfsmp->vcbFreeExtLock);
1152		if (*actualNumBlocks < vcb->hfs_freed_block_count) {
1153			vcb->hfs_freed_block_count -= *actualNumBlocks;
1154		} else {
1155			vcb->hfs_freed_block_count = 0;
1156		}
1157
1158		if (updateAllocPtr &&
1159				((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
1160				 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
1161			HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock);
1162		}
1163
1164		(void) remove_free_extent_cache(hfsmp, *actualStartBlock, *actualNumBlocks);
1165
1166		/*
1167		 * Update the number of free blocks on the volume
1168		 *
1169		 * Skip updating the free blocks count if the block are
1170		 * being allocated to relocate data as part of hfs_truncatefs()
1171		 */
1172		if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1173			vcb->freeBlocks -= *actualNumBlocks;
1174		}
1175		MarkVCBDirty(vcb);
1176		hfs_unlock_mount(hfsmp);
1177
1178		hfs_generate_volume_notifications(VCBTOHFS(vcb));
1179	}
1180
1181	if (ALLOC_DEBUG) {
1182		if (err == noErr) {
1183			if (*actualStartBlock >= hfsmp->totalBlocks) {
1184				panic ("BlockAllocate: vending invalid blocks!");
1185			}
1186			if (*actualStartBlock >= hfsmp->allocLimit) {
1187				panic ("BlockAllocate: vending block past allocLimit!");
1188			}
1189
1190			if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->totalBlocks) {
1191				panic ("BlockAllocate: vending too many invalid blocks!");
1192			}
1193
1194			if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->allocLimit) {
1195				panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
1196			}
1197		}
1198	}
1199
1200	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1201		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
1202
1203	return err;
1204}
1205
1206
1207/*
1208;________________________________________________________________________________
1209;
1210; Routine:	   BlockDeallocate
1211;
1212; Function:    Update the bitmap to deallocate a run of disk allocation blocks
1213;
1214; Input Arguments:
1215;	 vcb		- Pointer to ExtendedVCB for the volume to free space on
1216;	 firstBlock	- First allocation block to be freed
1217;	 numBlocks	- Number of allocation blocks to free up (must be > 0!)
1218;
1219; Output:
1220;	 (result)	- Result code
1221;
1222; Side effects:
1223;	 The volume bitmap is read and updated; the volume bitmap cache may be changed.
1224;	 The Allocator's red-black trees may also be modified as a result.
1225;________________________________________________________________________________
1226*/
1227
1228OSErr BlockDeallocate (
1229		ExtendedVCB		*vcb,			//	Which volume to deallocate space on
1230		u_int32_t		firstBlock,		//	First block in range to deallocate
1231		u_int32_t		numBlocks, 		//	Number of contiguous blocks to deallocate
1232		u_int32_t 		flags)
1233{
1234	OSErr			err;
1235	struct hfsmount *hfsmp;
1236	hfsmp = VCBTOHFS(vcb);
1237
1238	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1239		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);
1240
1241	//
1242	//	If no blocks to deallocate, then exit early
1243	//
1244	if (numBlocks == 0) {
1245		err = noErr;
1246		goto Exit;
1247	}
1248
1249
1250	if (ALLOC_DEBUG) {
1251		if (firstBlock >= hfsmp->totalBlocks) {
1252			panic ("BlockDeallocate: freeing invalid blocks!");
1253		}
1254
1255		if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {
1256			panic ("BlockDeallocate: freeing too many invalid blocks!");
1257		}
1258	}
1259
1260	/*
1261	 * If we're using the summary bitmap, then try to mark the bits
1262	 * as potentially usable/free before actually deallocating them.
1263	 * It is better to be slightly speculative here for correctness.
1264	 */
1265
1266	(void) hfs_release_summary (hfsmp, firstBlock, numBlocks);
1267
1268	err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
1269
1270	if (err) {
1271		goto Exit;
1272	}
1273
1274	//
1275	//	Update the volume's free block count, and mark the VCB as dirty.
1276	//
1277	hfs_lock_mount(hfsmp);
1278	/*
1279	 * Do not update the free block count.  This flags is specified
1280	 * when a volume is being truncated.
1281	 */
1282	if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1283		vcb->freeBlocks += numBlocks;
1284	}
1285
1286	vcb->hfs_freed_block_count += numBlocks;
1287
1288	if (vcb->nextAllocation == (firstBlock + numBlocks)) {
1289		HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
1290	}
1291
1292	if (hfsmp->jnl == NULL) {
1293		/*
1294		 * In the journal case, we'll add the free extent once the journal
1295		 * calls us back to tell us it wrote the transaction to disk.
1296		 */
1297		(void) add_free_extent_cache(vcb, firstBlock, numBlocks);
1298
1299		/*
1300		 * If the journal case, we'll only update sparseAllocation once the
1301		 * free extent cache becomes empty (when we remove the last entry
1302		 * from the cache).  Skipping it here means we're less likely to
1303		 * find a recently freed extent via the bitmap before it gets added
1304		 * to the free extent cache.
1305		 */
1306		if (firstBlock < vcb->sparseAllocation) {
1307			vcb->sparseAllocation = firstBlock;
1308		}
1309	}
1310
1311	MarkVCBDirty(vcb);
1312	hfs_unlock_mount(hfsmp);
1313
1314	hfs_generate_volume_notifications(VCBTOHFS(vcb));
1315Exit:
1316
1317	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1318		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);
1319
1320	return err;
1321}
1322
1323
1324u_int8_t freebitcount[16] = {
1325	4, 3, 3, 2, 3, 2, 2, 1,  /* 0 1 2 3 4 5 6 7 */
1326	3, 2, 2, 1, 2, 1, 1, 0,  /* 8 9 A B C D E F */
1327};
1328
1329u_int32_t
1330MetaZoneFreeBlocks(ExtendedVCB *vcb)
1331{
1332	u_int32_t freeblocks;
1333	u_int32_t *currCache;
1334	uintptr_t blockRef;
1335	u_int32_t bit;
1336	u_int32_t lastbit;
1337	int bytesleft;
1338	int bytesperblock;
1339	u_int8_t byte;
1340	u_int8_t *buffer;
1341
1342	blockRef = 0;
1343	bytesleft = freeblocks = 0;
1344	buffer = NULL;
1345	bit = VCBTOHFS(vcb)->hfs_metazone_start;
1346	if (bit == 1)
1347		bit = 0;
1348
1349	lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
1350	bytesperblock = vcb->vcbVBMIOSize;
1351
1352	/*
1353	 *  Count all the bits from bit to lastbit.
1354	 */
1355	while (bit < lastbit) {
1356		/*
1357		 *  Get next bitmap block.
1358		 */
1359		if (bytesleft == 0) {
1360			if (blockRef) {
1361				(void) ReleaseBitmapBlock(vcb, blockRef, false);
1362				blockRef = 0;
1363			}
1364			if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef) != 0) {
1365				return (0);
1366			}
1367			buffer = (u_int8_t *)currCache;
1368			bytesleft = bytesperblock;
1369		}
1370		byte = *buffer++;
1371		freeblocks += freebitcount[byte & 0x0F];
1372		freeblocks += freebitcount[(byte >> 4) & 0x0F];
1373		bit += kBitsPerByte;
1374		--bytesleft;
1375	}
1376	if (blockRef)
1377		(void) ReleaseBitmapBlock(vcb, blockRef, false);
1378
1379	return (freeblocks);
1380}
1381
1382
1383/*
1384 * Obtain the next allocation block (bit) that's
1385 * outside the metadata allocation zone.
1386 */
1387static u_int32_t NextBitmapBlock(
1388		ExtendedVCB		*vcb,
1389		u_int32_t		bit)
1390{
1391	struct  hfsmount *hfsmp = VCBTOHFS(vcb);
1392
1393	if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
1394		return (bit);
1395	/*
1396	 * Skip over metadata allocation zone.
1397	 */
1398	if ((bit >= hfsmp->hfs_metazone_start) &&
1399			(bit <= hfsmp->hfs_metazone_end)) {
1400		bit = hfsmp->hfs_metazone_end + 1;
1401	}
1402	return (bit);
1403}
1404
1405
1406/*
1407;_______________________________________________________________________
1408;
1409; Routine:	ReadBitmapBlock
1410;
1411; Function:	Read in a bitmap block corresponding to a given allocation
1412;			block (bit).  Return a pointer to the bitmap block.
1413;
1414; Inputs:
1415;	vcb			--	Pointer to ExtendedVCB
1416;	bit			--	Allocation block whose bitmap block is desired
1417;
1418; Outputs:
1419;	buffer		--	Pointer to bitmap block corresonding to "block"
1420;	blockRef
1421;_______________________________________________________________________
1422*/
1423static OSErr ReadBitmapBlock(
1424		ExtendedVCB		*vcb,
1425		u_int32_t		bit,
1426		u_int32_t		**buffer,
1427		uintptr_t		*blockRef)
1428{
1429	OSErr			err;
1430	struct buf *bp = NULL;
1431	struct vnode *vp = NULL;
1432	daddr64_t block;
1433	u_int32_t blockSize;
1434
1435	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1436		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);
1437
1438	/*
1439	 * volume bitmap blocks are protected by the allocation file lock
1440	 */
1441	REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1442
1443	blockSize = (u_int32_t)vcb->vcbVBMIOSize;
1444	block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
1445
1446	/* HFS+ / HFSX */
1447	if (vcb->vcbSigWord != kHFSSigWord) {
1448		vp = vcb->hfs_allocation_vp;	/* use allocation file vnode */
1449	}
1450#if CONFIG_HFS_STD
1451	else {
1452		/* HFS Standard */
1453		vp = VCBTOHFS(vcb)->hfs_devvp;	/* use device I/O vnode */
1454		block += vcb->vcbVBMSt;			/* map to physical block */
1455	}
1456#endif
1457
1458	err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
1459
1460	if (bp) {
1461		if (err) {
1462			buf_brelse(bp);
1463			*blockRef = 0;
1464			*buffer = NULL;
1465		} else {
1466			*blockRef = (uintptr_t)bp;
1467			*buffer = (u_int32_t *)buf_dataptr(bp);
1468		}
1469	}
1470
1471	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1472		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);
1473
1474	return err;
1475}
1476
1477
1478/*
1479;_______________________________________________________________________
1480;
1481; Routine:	ReadBitmapRange
1482;
1483; Function:	Read in a range of the bitmap starting at the given offset.
1484;			Use the supplied size to determine the amount of I/O to generate
1485;			against the bitmap file. Return a pointer to the bitmap block.
1486;
1487; Inputs:
1488;	hfsmp		--	Pointer to hfs mount
1489;	offset		--	byte offset into the bitmap file
1490;	size		--  How much I/O to generate against the bitmap file.
1491;
1492; Outputs:
1493;	buffer		--	Pointer to bitmap block data corresonding to "block"
1494;	blockRef	--  struct 'buf' pointer which MUST be released in a subsequent call.
1495;_______________________________________________________________________
1496*/
1497static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset,
1498		uint32_t iosize, uint32_t **buffer, struct buf **blockRef)
1499{
1500
1501	OSErr			err;
1502	struct buf *bp = NULL;
1503	struct vnode *vp = NULL;
1504	daddr64_t block;
1505
1506	/* This function isn't supported for HFS standard */
1507	if (hfsmp->vcbSigWord != kHFSPlusSigWord) {
1508		return EINVAL;
1509	}
1510
1511	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1512		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_START, offset, iosize, 0, 0, 0);
1513	}
1514
1515	/*
1516	 * volume bitmap blocks are protected by the allocation file lock
1517	 */
1518	REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1519
1520	vp = hfsmp->hfs_allocation_vp;	/* use allocation file vnode */
1521
1522	/*
1523	 * The byte offset argument must be converted into bitmap-relative logical
1524	 * block numbers before using it in buf_meta_bread.
1525	 *
1526	 * buf_meta_bread (and the things it calls) will eventually try to
1527	 * reconstruct the byte offset into the file by multiplying the logical
1528	 * block number passed in below by the vcbVBMIOSize field in the mount
1529	 * point.  So we prepare for that by converting the byte offset back into
1530	 * logical blocks in terms of VBMIOSize units.
1531	 *
1532	 * The amount of I/O requested and the byte offset should be computed
1533	 * based on the helper function in the frame that called us, so we can
1534	 * get away with just doing a simple divide here.
1535	 */
1536	block = (daddr64_t)(offset / hfsmp->vcbVBMIOSize);
1537
1538	err = (int) buf_meta_bread(vp, block, iosize, NOCRED, &bp);
1539
1540	if (bp) {
1541		if (err) {
1542			buf_brelse(bp);
1543			*blockRef = 0;
1544			*buffer = NULL;
1545		} else {
1546			*blockRef = bp;
1547			*buffer = (u_int32_t *)buf_dataptr(bp);
1548		}
1549	}
1550
1551	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1552		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_END, err, 0, 0, 0, 0);
1553	}
1554
1555	return err;
1556}
1557
1558
1559/*
1560;_______________________________________________________________________
1561;
1562; Routine:	ReleaseBitmapBlock
1563;
1564; Function:	Relase a bitmap block.
1565;
1566; Inputs:
1567;	vcb
1568;	blockRef
1569;	dirty
1570;_______________________________________________________________________
1571*/
1572static OSErr ReleaseBitmapBlock(
1573		ExtendedVCB		*vcb,
1574		uintptr_t		blockRef,
1575		Boolean			dirty)
1576{
1577	struct buf *bp = (struct buf *)blockRef;
1578
1579	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1580		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);
1581
1582	if (blockRef == 0) {
1583		if (dirty)
1584			panic("hfs: ReleaseBitmapBlock: missing bp");
1585		return (0);
1586	}
1587
1588	if (bp) {
1589		if (dirty) {
1590			// XXXdbg
1591			struct hfsmount *hfsmp = VCBTOHFS(vcb);
1592
1593			if (hfsmp->jnl) {
1594				journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
1595			} else {
1596				buf_bdwrite(bp);
1597			}
1598		} else {
1599			buf_brelse(bp);
1600		}
1601	}
1602
1603	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1604		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);
1605
1606	return (0);
1607}
1608
1609/*
1610 * ReleaseScanBitmapRange
1611 *
1612 * This is used to release struct bufs that were created for use by
1613 * bitmap scanning code.  Because they may be of sizes different than the
1614 * typical runtime manipulation code, we want to force them to be purged out
1615 * of the buffer cache ASAP, so we'll release them differently than in the
1616 * ReleaseBitmapBlock case.
1617 *
1618 * Additionally, because we know that we're only reading the blocks and that they
1619 * should have been clean prior to reading them, we will never
1620 * issue a write to them (thus dirtying them).
1621 */
1622
1623static OSErr ReleaseScanBitmapRange(struct buf *bp ) {
1624
1625	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1626		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, 0, 0, 0, 0, 0);
1627	}
1628
1629	if (bp) {
1630		/* Mark the buffer invalid if it isn't locked, then release it */
1631		if ((buf_flags(bp) & B_LOCKED) == 0) {
1632			buf_markinvalid(bp);
1633		}
1634		buf_brelse(bp);
1635	}
1636
1637	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1638		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_SCAN_BITMAP | DBG_FUNC_END, 0, 0, 0, 0, 0);
1639	}
1640
1641	return (0);
1642}
1643
1644/*
1645_______________________________________________________________________
1646
1647Routine:	BlockAllocateContig
1648
1649Function:	Allocate a contiguous group of allocation blocks.  The
1650			allocation is all-or-nothing.  The caller guarantees that
1651			there are enough free blocks (though they may not be
1652			contiguous, in which case this call will fail).
1653
1654Inputs:
1655	vcb				Pointer to volume where space is to be allocated
1656	startingBlock	Preferred first block for allocation
1657	minBlocks		Minimum number of contiguous blocks to allocate
1658	maxBlocks		Maximum number of contiguous blocks to allocate
1659	flags
1660
1661Outputs:
1662	actualStartBlock	First block of range allocated, or 0 if error
1663	actualNumBlocks		Number of blocks allocated, or 0 if error
1664_______________________________________________________________________
1665*/
1666static OSErr BlockAllocateContig(
1667		ExtendedVCB		*vcb,
1668		u_int32_t		startingBlock,
1669		u_int32_t		minBlocks,
1670		u_int32_t		maxBlocks,
1671		u_int32_t		flags,
1672		u_int32_t		*actualStartBlock,
1673		u_int32_t		*actualNumBlocks)
1674{
1675	OSErr retval = noErr;
1676	uint32_t currentStart = startingBlock;
1677
1678	uint32_t foundStart = 0; // values to emit to caller
1679	uint32_t foundCount = 0;
1680
1681	uint32_t collision_start = 0;  // if we have to re-allocate a recently deleted extent, use this
1682	uint32_t collision_count = 0;
1683
1684	int err;
1685	int allowReuse = (flags & HFS_ALLOC_FLUSHTXN);
1686	Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
1687
1688	int recently_deleted = 0;
1689	struct hfsmount *hfsmp = VCBTOHFS(vcb);
1690
1691	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1692		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
1693
1694	while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) {
1695
1696		/* Try and find something that works. */
1697		do {
1698			/*
1699			 * NOTE: If the only contiguous free extent of at least minBlocks
1700			 * crosses startingBlock (i.e. starts before, ends after), then we
1701			 * won't find it. Earlier versions *did* find this case by letting
1702			 * the second search look past startingBlock by minBlocks.  But
1703			 * with the free extent cache, this can lead to duplicate entries
1704			 * in the cache, causing the same blocks to be allocated twice.
1705			 */
1706			retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks,
1707					maxBlocks, useMetaZone, true, &foundStart, &foundCount);
1708
1709			if (retval == dskFulErr && currentStart != 0) {
1710				/*
1711				 * We constrain the endingBlock so we don't bother looking for ranges
1712				 * that would overlap those found in the previous call, if the summary bitmap
1713				 * is not on for this volume.  If it is, then we assume that it was not trust
1714				 * -worthy and do a full scan.
1715				 */
1716				if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1717					retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks,
1718							maxBlocks, useMetaZone, false, &foundStart, &foundCount);
1719				}
1720				else {
1721					retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks,
1722							maxBlocks, useMetaZone, false, &foundStart, &foundCount);
1723				}
1724			}
1725		} while (0);
1726
1727		if (retval != noErr) {
1728			goto bailout;
1729		}
1730
1731		/* Do we overlap with the recently found collision extent? */
1732		if (collision_start) {
1733			if (extents_overlap (foundStart, foundCount, collision_start, collision_count)) {
1734				/*
1735				 * We've looped around, and the only thing we could use was the collision extent.
1736				 * Since we are allowed to use it, go ahead and do so now.
1737				 */
1738				if(allowReuse) {
1739					/*
1740					 * then we couldn't find anything except values which might have been
1741					 * recently deallocated. just return our cached value if we are allowed to.
1742					 */
1743					foundStart = collision_start;
1744					foundCount = collision_count;
1745					goto bailout;
1746				}
1747				else {
1748					/* Otherwise, we looped around and couldn't find anything that wouldn't require a journal flush. */
1749					retval = dskFulErr;
1750					goto bailout;
1751				}
1752			}
1753		}
1754
1755		/* OK, we know we must not have collided . See if this one is recently deleted */
1756		if (hfsmp->jnl) {
1757			recently_deleted = 0;
1758			uint32_t nextStart;
1759			err = CheckUnmappedBytes (hfsmp, (uint64_t)foundStart,
1760					(uint64_t) foundCount, &recently_deleted, &nextStart);
1761			if (err == 0) {
1762				if(recently_deleted != 0) {
1763					/*
1764					 * these blocks were recently deleted/deallocated.  Cache the extent, but
1765					 * but keep searching to see if we can find one that won't collide here.
1766					 */
1767					if (collision_start == 0) {
1768						collision_start = foundStart;
1769						collision_count = foundCount;
1770					}
1771					recently_deleted = 0;
1772
1773					/*
1774					 * advance currentStart to the point just past the overlap we just found. Note that
1775					 * we will automatically loop around to start of the bitmap as needed.
1776					 */
1777					currentStart = nextStart;
1778					/* Unset foundStart/Count to allow it to loop around again. */
1779					foundStart = 0;
1780					foundCount = 0;
1781				}
1782			}
1783		} // end jnl/deleted case
1784
1785		/*
1786		 * If we found something good, we'd break out of the loop at the top; foundCount
1787		 * and foundStart should be set.
1788		 */
1789
1790	} // end while loop.
1791
1792bailout:
1793	/* mark the blocks as in-use */
1794	if (retval == noErr) {
1795		*actualStartBlock = foundStart;
1796		*actualNumBlocks = foundCount;
1797		err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
1798
1799		if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) {
1800			KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, *actualStartBlock, *actualNumBlocks, 0, 0, 0);
1801		}
1802	}
1803
1804	return retval;
1805
1806}
1807
1808
1809/*
1810_______________________________________________________________________
1811
1812Routine:	BlockAllocateAny
1813
1814Function:	Allocate one or more allocation blocks.  If there are fewer
1815			free blocks than requested, all free blocks will be
1816			allocated.  The caller guarantees that there is at least
1817			one free block.
1818
1819Inputs:
1820	vcb				Pointer to volume where space is to be allocated
1821	startingBlock	Preferred first block for allocation
1822	endingBlock		Last block to check + 1
1823	maxBlocks		Maximum number of contiguous blocks to allocate
1824	useMetaZone
1825
1826Outputs:
1827	actualStartBlock	First block of range allocated, or 0 if error
1828	actualNumBlocks		Number of blocks allocated, or 0 if error
1829_______________________________________________________________________
1830*/
1831
1832static OSErr BlockAllocateAny(
1833		ExtendedVCB		*vcb,
1834		u_int32_t		startingBlock,
1835		register u_int32_t	endingBlock,
1836		u_int32_t		maxBlocks,
1837		u_int32_t		flags,
1838		Boolean			trustSummary,
1839		u_int32_t		*actualStartBlock,
1840		u_int32_t		*actualNumBlocks)
1841{
1842
1843	/*
1844	 * If it is enabled, scan through the summary table to find the first free block.
1845	 *
1846	 * If it reports that there are not any free blocks, we could have a false
1847	 * positive, so in that case, use the input arguments as a pass through.
1848	 */
1849	uint32_t start_blk  = startingBlock;
1850	uint32_t end_blk = endingBlock;
1851	struct hfsmount *hfsmp;
1852	OSErr err;
1853
1854	hfsmp = (struct hfsmount*)vcb;
1855	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1856		uint32_t suggested_start;
1857
1858		/*
1859		 * If the summary table is enabled, scan through it to find the first free
1860		 * block.  If there was an error, or we couldn't find anything free in the
1861		 * summary table, then just leave the start_blk fields unmodified. We wouldn't
1862		 * have gotten to this point if the mount point made it look like there was possibly
1863		 * free space in the FS.
1864		 */
1865		err = hfs_find_summary_free (hfsmp, startingBlock, &suggested_start);
1866		if (err == 0) {
1867			start_blk = suggested_start;
1868		}
1869		else {
1870			/* Differentiate between ENOSPC and a more esoteric error in the above call. */
1871			if ((err == ENOSPC) && (trustSummary)) {
1872				/*
1873				 * The 'trustSummary' argument is for doing a full scan if we really
1874				 * really, need the space and we think it's somewhere but can't find it in the
1875				 * summary table. If it's true, then we trust the summary table and return
1876				 * dskFulErr if we couldn't find it above.
1877				 */
1878				return dskFulErr;
1879			}
1880			/*
1881			 * If either trustSummary was false or we got a different errno, then we
1882			 * want to fall through to the real bitmap single i/o code...
1883			 */
1884		}
1885	}
1886
1887	err =  BlockAllocateAnyBitmap(vcb, start_blk, end_blk, maxBlocks,
1888			flags, actualStartBlock, actualNumBlocks);
1889
1890	return err;
1891}
1892
1893
1894/*
1895 * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out
1896 * where the free allocation blocks are.  Inputs and outputs are the same as for
1897 * BlockAllocateAny and BlockAllocateAnyRBTree
1898 */
1899
1900static OSErr BlockAllocateAnyBitmap(
1901		ExtendedVCB		*vcb,
1902		u_int32_t		startingBlock,
1903		register u_int32_t	endingBlock,
1904		u_int32_t		maxBlocks,
1905		u_int32_t		flags,
1906		u_int32_t		*actualStartBlock,
1907		u_int32_t		*actualNumBlocks)
1908{
1909	OSErr			err;
1910	register u_int32_t	block;			//	current block number
1911	register u_int32_t	currentWord;	//	Pointer to current word within bitmap block
1912	register u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
1913	register u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
1914	u_int32_t  *buffer = NULL;
1915	u_int32_t  *currCache = NULL;
1916	uintptr_t  blockRef;
1917	u_int32_t  bitsPerBlock;
1918	u_int32_t  wordsPerBlock;
1919	Boolean dirty = false;
1920	struct hfsmount *hfsmp = VCBTOHFS(vcb);
1921	uint32_t summary_block_scan = 0;
1922	Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
1923	Boolean forceFlush = (flags & HFS_ALLOC_FLUSHTXN);
1924
1925	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1926		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
1927
1928restartSearchAny:
1929	/*
1930	 * When we're skipping the metadata zone and the start/end
1931	 * range overlaps with the metadata zone then adjust the
1932	 * start to be outside of the metadata zone.  If the range
1933	 * is entirely inside the metadata zone then we can deny the
1934	 * request (dskFulErr).
1935	 */
1936	if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
1937		if (startingBlock <= vcb->hfs_metazone_end) {
1938			if (endingBlock > (vcb->hfs_metazone_end + 2))
1939				startingBlock = vcb->hfs_metazone_end + 1;
1940			else {
1941				err = dskFulErr;
1942				goto Exit;
1943			}
1944		}
1945	}
1946
1947	//	Since this routine doesn't wrap around
1948	if (maxBlocks > (endingBlock - startingBlock)) {
1949		maxBlocks = endingBlock - startingBlock;
1950	}
1951
1952	//
1953	//	Pre-read the first bitmap block
1954	//
1955	err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef);
1956	if (err != noErr) goto Exit;
1957	buffer = currCache;
1958
1959	//
1960	//	Set up the current position within the block
1961	//
1962	{
1963		u_int32_t wordIndexInBlock;
1964
1965		bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
1966		wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
1967
1968		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
1969		buffer += wordIndexInBlock;
1970		wordsLeft = wordsPerBlock - wordIndexInBlock;
1971		currentWord = SWAP_BE32 (*buffer);
1972		bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
1973	}
1974
1975	/*
1976	 * While loop 1:
1977	 *		Find the first unallocated block starting at 'block'
1978	 */
1979	block=startingBlock;
1980	while (block < endingBlock) {
1981		if ((currentWord & bitMask) == 0)
1982			break;
1983
1984		//	Next bit
1985		++block;
1986		bitMask >>= 1;
1987		if (bitMask == 0) {
1988			//	Next word
1989			bitMask = kHighBitInWordMask;
1990			++buffer;
1991
1992			if (--wordsLeft == 0) {
1993				//	Next block
1994				buffer = currCache = NULL;
1995				if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1996					/*
1997					 * If summary_block_scan is non-zero, then we must have
1998					 * pulled a bitmap file block into core, and scanned through
1999					 * the entire thing.  Because we're in this loop, we are
2000					 * implicitly trusting that the bitmap didn't have any knowledge
2001					 * about this particular block.  As a result, update the bitmap
2002					 * (lazily, now that we've scanned it) with our findings that
2003					 * this particular block is completely used up.
2004					 */
2005					if (summary_block_scan != 0) {
2006						uint32_t summary_bit;
2007						(void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
2008						hfs_set_summary (hfsmp, summary_bit, 1);
2009						summary_block_scan = 0;
2010					}
2011				}
2012
2013				err = ReleaseBitmapBlock(vcb, blockRef, false);
2014				if (err != noErr) goto Exit;
2015
2016				/*
2017				 * Skip over metadata blocks.
2018				 */
2019				if (!useMetaZone) {
2020					block = NextBitmapBlock(vcb, block);
2021				}
2022				if (block >= endingBlock) {
2023					err = dskFulErr;
2024					goto Exit;
2025				}
2026
2027				err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
2028				if (err != noErr) goto Exit;
2029				buffer = currCache;
2030				summary_block_scan = block;
2031				wordsLeft = wordsPerBlock;
2032			}
2033			currentWord = SWAP_BE32 (*buffer);
2034		}
2035	}
2036
2037	//	Did we get to the end of the bitmap before finding a free block?
2038	//	If so, then couldn't allocate anything.
2039	if (block >= endingBlock) {
2040		err = dskFulErr;
2041		goto Exit;
2042	}
2043
2044
2045	/*
2046	 * Don't move forward just yet.  Verify that either one of the following
2047	 * two conditions is true:
2048	 * 1) journaling is not enabled
2049	 * 2) block is not currently on any pending TRIM list.
2050	 */
2051	if (hfsmp->jnl != NULL && (forceFlush == false)) {
2052		int recently_deleted = 0;
2053		uint32_t nextblk;
2054		err = CheckUnmappedBytes (hfsmp, (uint64_t) block, 1, &recently_deleted, &nextblk);
2055		if ((err == 0) && (recently_deleted)) {
2056
2057			/* release the bitmap block & unset currCache.  we may jump past it. */
2058			err = ReleaseBitmapBlock(vcb, blockRef, false);
2059			currCache = NULL;
2060			if (err != noErr) {
2061				goto Exit;
2062			}
2063			/* set our start to nextblk, and re-do the search. */
2064			startingBlock = nextblk;
2065			goto restartSearchAny;
2066		}
2067	}
2068
2069
2070	//	Return the first block in the allocated range
2071	*actualStartBlock = block;
2072	dirty = true;
2073
2074	//	If we could get the desired number of blocks before hitting endingBlock,
2075	//	then adjust endingBlock so we won't keep looking.  Ideally, the comparison
2076	//	would be (block + maxBlocks) < endingBlock, but that could overflow.  The
2077	//	comparison below yields identical results, but without overflow.
2078	if (block < (endingBlock-maxBlocks)) {
2079		endingBlock = block + maxBlocks;	//	if we get this far, we've found enough
2080	}
2081
2082	/*
2083	 * While loop 2:
2084	 *		Scan the bitmap, starting at 'currentWord' in the current
2085	 *		bitmap block.  Continue iterating through the bitmap until
2086	 * 		either we hit an allocated block, or until we have accumuluated
2087	 *		maxBlocks worth of bitmap.
2088	 */
2089
2090	/* Continue until we see an allocated block */
2091	while ((currentWord & bitMask) == 0) {
2092		//	Move to the next block.  If no more, then exit.
2093		++block;
2094		if (block == endingBlock) {
2095			break;
2096		}
2097
2098		//	Next bit
2099		bitMask >>= 1;
2100		if (bitMask == 0) {
2101			//	Next word
2102			bitMask = kHighBitInWordMask;
2103			++buffer;
2104
2105			if (--wordsLeft == 0) {
2106				//	Next block
2107				buffer = currCache = NULL;
2108
2109				/* We're only reading the bitmap here, so mark it as clean */
2110				err = ReleaseBitmapBlock(vcb, blockRef, false);
2111				if (err != noErr) {
2112					goto Exit;
2113				}
2114
2115				/*
2116				 * Skip over metadata blocks.
2117				 */
2118				if (!useMetaZone) {
2119					u_int32_t nextBlock;
2120					nextBlock = NextBitmapBlock(vcb, block);
2121					if (nextBlock != block) {
2122						goto Exit;  /* allocation gap, so stop */
2123					}
2124				}
2125
2126				if (block >= endingBlock) {
2127					goto Exit;
2128				}
2129
2130				err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
2131				if (err != noErr) {
2132					goto Exit;
2133				}
2134				buffer = currCache;
2135				wordsLeft = wordsPerBlock;
2136			}
2137			currentWord = SWAP_BE32 (*buffer);
2138		}
2139	}
2140
2141Exit:
2142	if (currCache) {
2143		/* Release the bitmap reference prior to marking bits in-use */
2144		(void) ReleaseBitmapBlock(vcb, blockRef, false);
2145		currCache = NULL;
2146	}
2147
2148	if (err == noErr) {
2149		*actualNumBlocks = block - *actualStartBlock;
2150
2151		// sanity check
2152		if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
2153			panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
2154		}
2155
2156		/* Mark the bits found as in-use */
2157		err = BlockMarkAllocatedInternal (vcb, *actualStartBlock, *actualNumBlocks);
2158	}
2159	else {
2160		*actualStartBlock = 0;
2161		*actualNumBlocks = 0;
2162	}
2163
2164	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2165		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2166
2167	return err;
2168}
2169
2170
2171/*
2172_______________________________________________________________________
2173
2174Routine:	BlockAllocateKnown
2175
2176Function:	Try to allocate space from known free space in the free
2177			extent cache.
2178
2179Inputs:
2180	vcb				Pointer to volume where space is to be allocated
2181	maxBlocks		Maximum number of contiguous blocks to allocate
2182
2183Outputs:
2184	actualStartBlock	First block of range allocated, or 0 if error
2185	actualNumBlocks		Number of blocks allocated, or 0 if error
2186
2187Returns:
2188	dskFulErr		Free extent cache is empty
2189_______________________________________________________________________
2190*/
2191
2192static OSErr BlockAllocateKnown(
2193		ExtendedVCB		*vcb,
2194		u_int32_t		maxBlocks,
2195		u_int32_t		*actualStartBlock,
2196		u_int32_t		*actualNumBlocks)
2197{
2198	OSErr			err;
2199	u_int32_t		foundBlocks;
2200	struct hfsmount *hfsmp = VCBTOHFS(vcb);
2201
2202	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2203		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
2204
2205	hfs_lock_mount (hfsmp);
2206	lck_spin_lock(&vcb->vcbFreeExtLock);
2207	if ( vcb->vcbFreeExtCnt == 0 ||
2208			vcb->vcbFreeExt[0].blockCount == 0) {
2209		lck_spin_unlock(&vcb->vcbFreeExtLock);
2210		hfs_unlock_mount(hfsmp);
2211		if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2212			KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
2213		return dskFulErr;
2214	}
2215	lck_spin_unlock(&vcb->vcbFreeExtLock);
2216	hfs_unlock_mount(hfsmp);
2217
2218	lck_spin_lock(&vcb->vcbFreeExtLock);
2219
2220	//	Just grab up to maxBlocks of the first (largest) free exent.
2221	*actualStartBlock = vcb->vcbFreeExt[0].startBlock;
2222	foundBlocks = vcb->vcbFreeExt[0].blockCount;
2223	if (foundBlocks > maxBlocks)
2224		foundBlocks = maxBlocks;
2225	*actualNumBlocks = foundBlocks;
2226
2227	lck_spin_unlock(&vcb->vcbFreeExtLock);
2228
2229	remove_free_extent_cache(vcb, *actualStartBlock, *actualNumBlocks);
2230
2231	// sanity check
2232	if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
2233	{
2234		printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
2235		hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
2236		*actualStartBlock = 0;
2237		*actualNumBlocks = 0;
2238		err = EIO;
2239	}
2240	else
2241	{
2242		//
2243		//	Now mark the found extent in the bitmap
2244		//
2245		err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
2246	}
2247
2248	sanity_check_free_ext(vcb, 0);
2249
2250	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2251		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2252
2253	return err;
2254}
2255
2256/*
2257 * BlockMarkAllocated
2258 *
2259 * This is a wrapper function around the internal calls which will actually mark the blocks
2260 * as in-use.  It will mark the blocks in the red-black tree if appropriate.  We need to do
2261 * this logic here to avoid callers having to deal with whether or not the red-black tree
2262 * is enabled.
2263 */
2264
2265OSErr BlockMarkAllocated(
2266		ExtendedVCB		*vcb,
2267		u_int32_t		startingBlock,
2268		register u_int32_t	numBlocks)
2269{
2270	struct hfsmount *hfsmp;
2271
2272	hfsmp = VCBTOHFS(vcb);
2273
2274	return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks);
2275
2276}
2277
2278
2279
2280/*
2281_______________________________________________________________________
2282
2283Routine:	BlockMarkAllocatedInternal
2284
2285Function:	Mark a contiguous group of blocks as allocated (set in the
2286			bitmap).  It assumes those bits are currently marked
2287			deallocated (clear in the bitmap).  Note that this function
2288			must be called regardless of whether or not the bitmap or
2289			tree-based allocator is used, as all allocations must correctly
2290			be marked on-disk.  If the tree-based approach is running, then
2291			this will be done before the node is removed from the tree.
2292
2293Inputs:
2294	vcb				Pointer to volume where space is to be allocated
2295	startingBlock	First block number to mark as allocated
2296	numBlocks		Number of blocks to mark as allocated
2297_______________________________________________________________________
2298*/
2299static
2300OSErr BlockMarkAllocatedInternal (
2301		ExtendedVCB		*vcb,
2302		u_int32_t		startingBlock,
2303		register u_int32_t	numBlocks)
2304{
2305	OSErr			err;
2306	register u_int32_t	*currentWord;	//	Pointer to current word within bitmap block
2307	register u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
2308	register u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
2309	u_int32_t		firstBit;		//	Bit index within word of first bit to allocate
2310	u_int32_t		numBits;		//	Number of bits in word to allocate
2311	u_int32_t		*buffer = NULL;
2312	uintptr_t  blockRef;
2313	u_int32_t  bitsPerBlock;
2314	u_int32_t  wordsPerBlock;
2315	// XXXdbg
2316	struct hfsmount *hfsmp = VCBTOHFS(vcb);
2317
2318	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2319		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
2320
2321	int force_flush = 0;
2322	/*
2323	 * Since we are about to mark these bits as in-use
2324	 * in the bitmap, decide if we need to alert the caller
2325	 * that a journal flush might be appropriate. It's safe to
2326	 * poke at the journal pointer here since we MUST have
2327	 * called start_transaction by the time this function is invoked.
2328	 * If the journal is enabled, then it will have taken the requisite
2329	 * journal locks.  If it is not enabled, then we have taken
2330	 * a shared lock on the global lock.
2331	 */
2332	if (hfsmp->jnl) {
2333		uint32_t ignore;
2334		err = CheckUnmappedBytes (hfsmp, (uint64_t) startingBlock, (uint64_t)numBlocks, &force_flush, &ignore);
2335		if ((err == 0) && (force_flush)) {
2336			journal_request_immediate_flush (hfsmp->jnl);
2337		}
2338	}
2339
2340	hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
2341
2342	//
2343	//	Pre-read the bitmap block containing the first word of allocation
2344	//
2345
2346	err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2347	if (err != noErr) goto Exit;
2348	//
2349	//	Initialize currentWord, and wordsLeft.
2350	//
2351	{
2352		u_int32_t wordIndexInBlock;
2353
2354		bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
2355		wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2356
2357		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2358		currentWord = buffer + wordIndexInBlock;
2359		wordsLeft = wordsPerBlock - wordIndexInBlock;
2360	}
2361
2362	// XXXdbg
2363	if (hfsmp->jnl) {
2364		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2365	}
2366
2367	//
2368	//	If the first block to allocate doesn't start on a word
2369	//	boundary in the bitmap, then treat that first word
2370	//	specially.
2371	//
2372
2373	firstBit = startingBlock % kBitsPerWord;
2374	if (firstBit != 0) {
2375		bitMask = kAllBitsSetInWord >> firstBit;	//	turn off all bits before firstBit
2376		numBits = kBitsPerWord - firstBit;			//	number of remaining bits in this word
2377		if (numBits > numBlocks) {
2378			numBits = numBlocks;					//	entire allocation is inside this one word
2379			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));	//	turn off bits after last
2380		}
2381#if DEBUG_BUILD
2382		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2383			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2384		}
2385#endif
2386		*currentWord |= SWAP_BE32 (bitMask);		//	set the bits in the bitmap
2387		numBlocks -= numBits;						//	adjust number of blocks left to allocate
2388
2389		++currentWord;								//	move to next word
2390		--wordsLeft;								//	one less word left in this block
2391	}
2392
2393	//
2394	//	Allocate whole words (32 blocks) at a time.
2395	//
2396
2397	bitMask = kAllBitsSetInWord;					//	put this in a register for 68K
2398	while (numBlocks >= kBitsPerWord) {
2399		if (wordsLeft == 0) {
2400			//	Read in the next bitmap block
2401			startingBlock += bitsPerBlock;			//	generate a block number in the next bitmap block
2402
2403			buffer = NULL;
2404			err = ReleaseBitmapBlock(vcb, blockRef, true);
2405			if (err != noErr) goto Exit;
2406
2407			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2408			if (err != noErr) goto Exit;
2409
2410			// XXXdbg
2411			if (hfsmp->jnl) {
2412				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2413			}
2414
2415			//	Readjust currentWord and wordsLeft
2416			currentWord = buffer;
2417			wordsLeft = wordsPerBlock;
2418		}
2419#if DEBUG_BUILD
2420		if (*currentWord != 0) {
2421			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2422		}
2423#endif
2424		*currentWord = SWAP_BE32 (bitMask);
2425		numBlocks -= kBitsPerWord;
2426
2427		++currentWord;								//	move to next word
2428		--wordsLeft;								//	one less word left in this block
2429	}
2430
2431	//
2432	//	Allocate any remaining blocks.
2433	//
2434
2435	if (numBlocks != 0) {
2436		bitMask = ~(kAllBitsSetInWord >> numBlocks);	//	set first numBlocks bits
2437		if (wordsLeft == 0) {
2438			//	Read in the next bitmap block
2439			startingBlock += bitsPerBlock;				//	generate a block number in the next bitmap block
2440
2441			buffer = NULL;
2442			err = ReleaseBitmapBlock(vcb, blockRef, true);
2443			if (err != noErr) goto Exit;
2444
2445			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2446			if (err != noErr) goto Exit;
2447
2448			// XXXdbg
2449			if (hfsmp->jnl) {
2450				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2451			}
2452
2453			//	Readjust currentWord and wordsLeft
2454			currentWord = buffer;
2455			wordsLeft = wordsPerBlock;
2456		}
2457#if DEBUG_BUILD
2458		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2459			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2460		}
2461#endif
2462		*currentWord |= SWAP_BE32 (bitMask);			//	set the bits in the bitmap
2463
2464		//	No need to update currentWord or wordsLeft
2465	}
2466
2467Exit:
2468
2469	if (buffer)
2470		(void)ReleaseBitmapBlock(vcb, blockRef, true);
2471
2472	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2473		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
2474
2475	return err;
2476}
2477
2478
2479/*
2480 * BlockMarkFree
2481 *
2482 * This is a wrapper function around the internal calls which will actually mark the blocks
2483 * as freed.  It will mark the blocks in the red-black tree if appropriate.  We need to do
2484 * this logic here to avoid callers having to deal with whether or not the red-black tree
2485 * is enabled.
2486 *
2487 */
2488OSErr BlockMarkFree(
2489		ExtendedVCB		*vcb,
2490		u_int32_t		startingBlock,
2491		register u_int32_t	numBlocks)
2492{
2493	struct hfsmount *hfsmp;
2494	hfsmp = VCBTOHFS(vcb);
2495
2496	return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
2497}
2498
2499
2500/*
2501 * BlockMarkFreeUnused
2502 *
2503 * Scan the bitmap block beyond end of current file system for bits
2504 * that are marked as used.  If any of the bits are marked as used,
2505 * this function marks them free.
2506 *
2507 * Note:  This was specifically written to mark all bits beyond
2508 * end of current file system during hfs_extendfs(), which makes
2509 * sure that all the new blocks added to the file system are
2510 * marked as free.   We expect that all the blocks beyond end of
2511 * current file system are always marked as free, but there might
2512 * be cases where are marked as used.  This function assumes that
2513 * the number of blocks marked as used incorrectly are relatively
2514 * small, otherwise this can overflow journal transaction size
2515 * on certain file system configurations (example, large unused
2516 * bitmap with relatively small journal).
2517 *
2518 * Input:
2519 * 	startingBlock: First block of the range to mark unused
2520 * 	numBlocks: Number of blocks in the range to mark unused
2521 *
2522 * Returns: zero on success, non-zero on error.
2523 */
2524OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t	numBlocks)
2525{
2526	int error = 0;
2527	struct hfsmount *hfsmp = VCBTOHFS(vcb);
2528	u_int32_t curNumBlocks;
2529	u_int32_t  bitsPerBlock;
2530	u_int32_t lastBit;
2531
2532	/* Use the optimal bitmap I/O size instead of bitmap block size */
2533	bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
2534
2535	/*
2536	 * First clear any non bitmap allocation block aligned bits
2537	 *
2538	 * Calculate the first bit in the bitmap block next to
2539	 * the bitmap block containing the bit for startingBlock.
2540	 * Using this value, we calculate the total number of
2541	 * bits to be marked unused from startingBlock to the
2542	 * end of bitmap block containing startingBlock.
2543	 */
2544	lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
2545	curNumBlocks = lastBit - startingBlock;
2546	if (curNumBlocks > numBlocks) {
2547		curNumBlocks = numBlocks;
2548	}
2549	error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2550	if (error) {
2551		return error;
2552	}
2553	startingBlock += curNumBlocks;
2554	numBlocks -= curNumBlocks;
2555
2556	/*
2557	 * Check a full bitmap block for any 'used' bit.  If any bit is used,
2558	 * mark all the bits only in that bitmap block as free.  This ensures
2559	 * that we do not write unmodified bitmap blocks and do not
2560	 * overwhelm the journal.
2561	 *
2562	 * The code starts by checking full bitmap block at a time, and
2563	 * marks entire bitmap block as free only if any bit in that bitmap
2564	 * block is marked as used.  In the end, it handles the last bitmap
2565	 * block which might be partially full by only checking till the
2566	 * caller-specified last bit and if any bit is set, only mark that
2567	 * range as free.
2568	 */
2569	while (numBlocks) {
2570		if (numBlocks >= bitsPerBlock) {
2571			curNumBlocks = bitsPerBlock;
2572		} else {
2573			curNumBlocks = numBlocks;
2574		}
2575		if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
2576			error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2577			if (error) {
2578				return error;
2579			}
2580		}
2581		startingBlock += curNumBlocks;
2582		numBlocks -= curNumBlocks;
2583	}
2584
2585	return error;
2586}
2587
2588/*
2589_______________________________________________________________________
2590
2591Routine:	BlockMarkFreeInternal
2592
2593Function:	Mark a contiguous group of blocks as free (clear in the
2594			bitmap).  It assumes those bits are currently marked
2595			allocated (set in the bitmap).
2596
2597Inputs:
2598	vcb				Pointer to volume where space is to be freed
2599	startingBlock	First block number to mark as freed
2600	numBlocks		Number of blocks to mark as freed
2601	do_validate 	If true, validate that the blocks being
2602					deallocated to check if they are within totalBlocks
2603					for current volume and whether they were allocated
2604					before they are marked free.
2605_______________________________________________________________________
2606*/
2607static
2608OSErr BlockMarkFreeInternal(
2609		ExtendedVCB		*vcb,
2610		u_int32_t		startingBlock_in,
2611		register u_int32_t	numBlocks_in,
2612		Boolean 		do_validate)
2613{
2614	OSErr		err;
2615	u_int32_t	startingBlock = startingBlock_in;
2616	u_int32_t	numBlocks = numBlocks_in;
2617	uint32_t	unmapStart = startingBlock_in;
2618	uint32_t	unmapCount = numBlocks_in;
2619	uint32_t	wordIndexInBlock;
2620	u_int32_t	*currentWord;	//	Pointer to current word within bitmap block
2621	u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
2622	u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
2623	u_int32_t	currentBit;		//	Bit index within word of current bit to allocate
2624	u_int32_t	numBits;		//	Number of bits in word to allocate
2625	u_int32_t	*buffer = NULL;
2626	uintptr_t	blockRef;
2627	u_int32_t	bitsPerBlock;
2628	u_int32_t	wordsPerBlock;
2629	// XXXdbg
2630	struct hfsmount *hfsmp = VCBTOHFS(vcb);
2631
2632	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2633		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
2634
2635	/*
2636	 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
2637	 * need to be able to free blocks being relocated during hfs_truncatefs.
2638	 */
2639	if ((do_validate == true) &&
2640			(startingBlock + numBlocks > vcb->totalBlocks)) {
2641		if (ALLOC_DEBUG) {
2642			panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
2643		}
2644
2645		printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
2646		hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
2647		err = EIO;
2648		goto Exit;
2649	}
2650
2651	//
2652	//	Pre-read the bitmap block containing the first word of allocation
2653	//
2654
2655	err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2656	if (err != noErr) goto Exit;
2657	// XXXdbg
2658	if (hfsmp->jnl) {
2659		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2660	}
2661
2662	//
2663	//	Figure out how many bits and words per bitmap block.
2664	//
2665	bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
2666	wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2667	wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2668
2669	//
2670	// Look for a range of free blocks immediately before startingBlock
2671	// (up to the start of the current bitmap block).  Set unmapStart to
2672	// the first free block.
2673	//
2674	currentWord = buffer + wordIndexInBlock;
2675	currentBit = startingBlock % kBitsPerWord;
2676	bitMask = kHighBitInWordMask >> currentBit;
2677	while (true) {
2678		// Move currentWord/bitMask back by one bit
2679		bitMask <<= 1;
2680		if (bitMask == 0) {
2681			if (--currentWord < buffer)
2682				break;
2683			bitMask = kLowBitInWordMask;
2684		}
2685
2686		if (*currentWord & SWAP_BE32(bitMask))
2687			break;	// Found an allocated block.  Stop searching.
2688		--unmapStart;
2689		++unmapCount;
2690	}
2691
2692	//
2693	//	If the first block to free doesn't start on a word
2694	//	boundary in the bitmap, then treat that first word
2695	//	specially.
2696	//
2697
2698	currentWord = buffer + wordIndexInBlock;
2699	wordsLeft = wordsPerBlock - wordIndexInBlock;
2700	currentBit = startingBlock % kBitsPerWord;
2701	if (currentBit != 0) {
2702		bitMask = kAllBitsSetInWord >> currentBit;	//	turn off all bits before currentBit
2703		numBits = kBitsPerWord - currentBit;		//	number of remaining bits in this word
2704		if (numBits > numBlocks) {
2705			numBits = numBlocks;					//	entire allocation is inside this one word
2706			bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits));	//	turn off bits after last
2707		}
2708		if ((do_validate == true) &&
2709				(*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
2710			goto Corruption;
2711		}
2712		*currentWord &= SWAP_BE32 (~bitMask);		//	clear the bits in the bitmap
2713		numBlocks -= numBits;						//	adjust number of blocks left to free
2714
2715		++currentWord;								//	move to next word
2716		--wordsLeft;								//	one less word left in this block
2717	}
2718
2719	//
2720	//	Free whole words (32 blocks) at a time.
2721	//
2722
2723	while (numBlocks >= kBitsPerWord) {
2724		if (wordsLeft == 0) {
2725			//	Read in the next bitmap block
2726			startingBlock += bitsPerBlock;			//	generate a block number in the next bitmap block
2727
2728			buffer = NULL;
2729			err = ReleaseBitmapBlock(vcb, blockRef, true);
2730			if (err != noErr) goto Exit;
2731
2732			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2733			if (err != noErr) goto Exit;
2734
2735			// XXXdbg
2736			if (hfsmp->jnl) {
2737				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2738			}
2739
2740			//	Readjust currentWord and wordsLeft
2741			currentWord = buffer;
2742			wordsLeft = wordsPerBlock;
2743		}
2744		if ((do_validate == true) &&
2745				(*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
2746			goto Corruption;
2747		}
2748		*currentWord = 0;							//	clear the entire word
2749		numBlocks -= kBitsPerWord;
2750
2751		++currentWord;								//	move to next word
2752		--wordsLeft;									//	one less word left in this block
2753	}
2754
2755	//
2756	//	Free any remaining blocks.
2757	//
2758
2759	if (numBlocks != 0) {
2760		bitMask = ~(kAllBitsSetInWord >> numBlocks);	//	set first numBlocks bits
2761		if (wordsLeft == 0) {
2762			//	Read in the next bitmap block
2763			startingBlock += bitsPerBlock;				//	generate a block number in the next bitmap block
2764
2765			buffer = NULL;
2766			err = ReleaseBitmapBlock(vcb, blockRef, true);
2767			if (err != noErr) goto Exit;
2768
2769			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2770			if (err != noErr) goto Exit;
2771
2772			// XXXdbg
2773			if (hfsmp->jnl) {
2774				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2775			}
2776
2777			//	Readjust currentWord and wordsLeft
2778			currentWord = buffer;
2779			wordsLeft = wordsPerBlock;
2780		}
2781		if ((do_validate == true) &&
2782				(*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
2783			goto Corruption;
2784		}
2785		*currentWord &= SWAP_BE32 (~bitMask);			//	clear the bits in the bitmap
2786
2787		//	No need to update currentWord or wordsLeft
2788	}
2789
2790	//
2791	// Look for a range of free blocks immediately after the range we just freed
2792	// (up to the end of the current bitmap block).
2793	//
2794	wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord;
2795	wordsLeft = wordsPerBlock - wordIndexInBlock;
2796	currentWord = buffer + wordIndexInBlock;
2797	currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
2798	bitMask = kHighBitInWordMask >> currentBit;
2799	while (true) {
2800		// Move currentWord/bitMask/wordsLeft forward one bit
2801		bitMask >>= 1;
2802		if (bitMask == 0) {
2803			if (--wordsLeft == 0)
2804				break;
2805			++currentWord;
2806			bitMask = kHighBitInWordMask;
2807		}
2808
2809		if (*currentWord & SWAP_BE32(bitMask))
2810			break;	// Found an allocated block.  Stop searching.
2811		++unmapCount;
2812	}
2813
2814Exit:
2815
2816	if (buffer)
2817		(void)ReleaseBitmapBlock(vcb, blockRef, true);
2818
2819	if (err == noErr) {
2820		hfs_unmap_free_extent(vcb, unmapStart, unmapCount);
2821	}
2822
2823	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2824		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
2825
2826	return err;
2827
2828Corruption:
2829#if DEBUG_BUILD
2830	panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
2831#else
2832	printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s\n", vcb->vcbVN);
2833	hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
2834	err = EIO;
2835	goto Exit;
2836#endif
2837}
2838
2839
2840/*
2841_______________________________________________________________________
2842
2843Routine:	BlockFindContiguous
2844
2845Function:	Find a contiguous range of blocks that are free (bits
2846			clear in the bitmap).  If a contiguous range of the
2847			minimum size can't be found, an error will be returned.
2848			This is only needed to support the bitmap-scanning logic,
2849			as the red-black tree should be able to do this by internally
2850			searching its tree.
2851
2852Inputs:
2853	vcb				Pointer to volume where space is to be allocated
2854	startingBlock	Preferred first block of range
2855	endingBlock		Last possible block in range + 1
2856	minBlocks		Minimum number of blocks needed.  Must be > 0.
2857	maxBlocks		Maximum (ideal) number of blocks desired
2858	useMetaZone	OK to dip into metadata allocation zone
2859
2860Outputs:
2861	actualStartBlock	First block of range found, or 0 if error
2862	actualNumBlocks		Number of blocks found, or 0 if error
2863
2864Returns:
2865	noErr			Found at least minBlocks contiguous
2866	dskFulErr		No contiguous space found, or all less than minBlocks
2867_______________________________________________________________________
2868*/
2869
2870static OSErr BlockFindContiguous(
2871		ExtendedVCB		*vcb,
2872		u_int32_t		startingBlock,
2873		u_int32_t		endingBlock,
2874		u_int32_t		minBlocks,
2875		u_int32_t		maxBlocks,
2876		Boolean			useMetaZone,
2877		Boolean			trustSummary,
2878		u_int32_t		*actualStartBlock,
2879		u_int32_t		*actualNumBlocks)
2880{
2881	OSErr			err;
2882	register u_int32_t	currentBlock;		//	Block we're currently looking at.
2883	u_int32_t			firstBlock;			//	First free block in current extent.
2884	u_int32_t			stopBlock;			//	If we get to this block, stop searching for first free block.
2885	u_int32_t			foundBlocks;		//	Number of contiguous free blocks in current extent.
2886	u_int32_t			*buffer = NULL;
2887	register u_int32_t	*currentWord;
2888	register u_int32_t	bitMask;
2889	register u_int32_t	wordsLeft;
2890	register u_int32_t	tempWord;
2891	uintptr_t  blockRef;
2892	u_int32_t  wordsPerBlock;
2893	u_int32_t  updated_free_extent = 0;
2894	struct hfsmount *hfsmp = (struct hfsmount*) vcb;
2895
2896	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2897		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
2898
2899	/*
2900	 * When we're skipping the metadata zone and the start/end
2901	 * range overlaps with the metadata zone then adjust the
2902	 * start to be outside of the metadata zone.  If the range
2903	 * is entirely inside the metadata zone then we can deny the
2904	 * request (dskFulErr).
2905	 */
2906	if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
2907		if (startingBlock <= vcb->hfs_metazone_end) {
2908			if (endingBlock > (vcb->hfs_metazone_end + 2))
2909				startingBlock = vcb->hfs_metazone_end + 1;
2910			else
2911				goto DiskFull;
2912		}
2913	}
2914
2915	if ((endingBlock - startingBlock) < minBlocks)
2916	{
2917		//	The set of blocks we're checking is smaller than the minimum number
2918		//	of blocks, so we couldn't possibly find a good range.
2919		goto DiskFull;
2920	}
2921
2922	stopBlock = endingBlock - minBlocks + 1;
2923	currentBlock = startingBlock;
2924	firstBlock = 0;
2925
2926	/*
2927	 * Skip over metadata blocks.
2928	 */
2929	if (!useMetaZone)
2930		currentBlock = NextBitmapBlock(vcb, currentBlock);
2931
2932	/*
2933	 * Use the summary table if we can.  Skip over any totally
2934	 * allocated blocks.  currentBlock should now point to the first
2935	 * block beyond the metadata zone if the metazone allocations are not
2936	 * allowed in this invocation.
2937	 */
2938	if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
2939		uint32_t suggestion;
2940		if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) {
2941			currentBlock = suggestion;
2942		}
2943	}
2944
2945
2946	//
2947	//	Pre-read the first bitmap block.
2948	//
2949	err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
2950	if ( err != noErr ) goto ErrorExit;
2951
2952	//
2953	//	Figure out where currentBlock is within the buffer.
2954	//
2955	wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2956
2957	wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1);	// Current index into buffer
2958	currentWord = buffer + wordsLeft;
2959	wordsLeft = wordsPerBlock - wordsLeft;
2960
2961	/*
2962	 * This outer do-while loop is the main body of this function.  Its job is
2963	 * to search through the blocks (until we hit 'stopBlock'), and iterate
2964	 * through swaths of allocated bitmap until it finds free regions.
2965	 */
2966
2967	do
2968	{
2969		foundBlocks = 0;
2970		uint32_t summary_block_scan = 0;
2971		/*
2972		 * Inner while loop 1:
2973		 *		Look for free blocks, skipping over allocated ones.
2974		 *
2975		 * Initialization starts with checking the initial partial word
2976		 * if applicable.
2977		 */
2978		bitMask = currentBlock & kBitsWithinWordMask;
2979		if (bitMask)
2980		{
2981			tempWord = SWAP_BE32(*currentWord);			//	Fetch the current word only once
2982			bitMask = kHighBitInWordMask >> bitMask;
2983			while (tempWord & bitMask)
2984			{
2985				bitMask >>= 1;
2986				++currentBlock;
2987			}
2988
2989			//	Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)?
2990			if (bitMask)
2991				goto FoundUnused;
2992
2993			//	Didn't find any unused bits, so we're done with this word.
2994			++currentWord;
2995			--wordsLeft;
2996		}
2997
2998		//
2999		//	Check whole words
3000		//
3001		while (currentBlock < stopBlock)
3002		{
3003			//	See if it's time to read another block.
3004			if (wordsLeft == 0)
3005			{
3006				buffer = NULL;
3007				if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3008					/*
3009					 * If summary_block_scan is non-zero, then we must have
3010					 * pulled a bitmap file block into core, and scanned through
3011					 * the entire thing.  Because we're in this loop, we are
3012					 * implicitly trusting that the bitmap didn't have any knowledge
3013					 * about this particular block.  As a result, update the bitmap
3014					 * (lazily, now that we've scanned it) with our findings that
3015					 * this particular block is completely used up.
3016					 */
3017					if (summary_block_scan != 0) {
3018						uint32_t summary_bit;
3019						(void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
3020						hfs_set_summary (hfsmp, summary_bit, 1);
3021						summary_block_scan = 0;
3022					}
3023				}
3024				err = ReleaseBitmapBlock(vcb, blockRef, false);
3025				if (err != noErr) goto ErrorExit;
3026
3027				/*
3028				 * Skip over metadata blocks.
3029				 */
3030				if (!useMetaZone) {
3031					currentBlock = NextBitmapBlock(vcb, currentBlock);
3032					if (currentBlock >= stopBlock) {
3033						goto LoopExit;
3034					}
3035				}
3036
3037				/* Skip over fully allocated bitmap blocks if we can */
3038				if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
3039					uint32_t suggestion;
3040					if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) {
3041						if (suggestion < stopBlock) {
3042							currentBlock = suggestion;
3043						}
3044					}
3045				}
3046
3047				err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
3048				if ( err != noErr ) goto ErrorExit;
3049
3050				/*
3051				 * Set summary_block_scan to be the block we just read into the block cache.
3052				 *
3053				 * At this point, we've just read an allocation block worth of bitmap file
3054				 * into the buffer above, but we don't know if it is completely allocated or not.
3055				 * If we find that it is completely allocated/full then we will jump
3056				 * through this loop again and set the appropriate summary bit as fully allocated.
3057				 */
3058				summary_block_scan = currentBlock;
3059				currentWord = buffer;
3060				wordsLeft = wordsPerBlock;
3061			}
3062
3063			//	See if any of the bits are clear
3064			if ((tempWord = SWAP_BE32(*currentWord)) + 1)	//	non-zero if any bits were clear
3065			{
3066				//	Figure out which bit is clear
3067				bitMask = kHighBitInWordMask;
3068				while (tempWord & bitMask)
3069				{
3070					bitMask >>= 1;
3071					++currentBlock;
3072				}
3073
3074				break;		//	Found the free bit; break out to FoundUnused.
3075			}
3076
3077			//	Keep looking at the next word
3078			currentBlock += kBitsPerWord;
3079			++currentWord;
3080			--wordsLeft;
3081		}
3082
3083FoundUnused:
3084		//	Make sure the unused bit is early enough to use
3085		if (currentBlock >= stopBlock)
3086		{
3087			break;
3088		}
3089
3090		//	Remember the start of the extent
3091		firstBlock = currentBlock;
3092
3093
3094		/*
3095		 * Inner while loop 2:
3096		 *		We get here if we find a free block. Count the number
3097		 * 		of contiguous free blocks observed.
3098		 *
3099		 * Initialization starts with checking the initial partial word
3100		 * if applicable.
3101		 */
3102		bitMask = currentBlock & kBitsWithinWordMask;
3103		if (bitMask)
3104		{
3105			tempWord = SWAP_BE32(*currentWord);			//	Fetch the current word only once
3106			bitMask = kHighBitInWordMask >> bitMask;
3107			while (bitMask && !(tempWord & bitMask))
3108			{
3109				bitMask >>= 1;
3110				++currentBlock;
3111			}
3112
3113			//	Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)?
3114			if (bitMask)
3115				goto FoundUsed;
3116
3117			//	Didn't find any used bits, so we're done with this word.
3118			++currentWord;
3119			--wordsLeft;
3120		}
3121
3122		//
3123		//	Check whole words
3124		//
3125		while (currentBlock < endingBlock)
3126		{
3127			//	See if it's time to read another block.
3128			if (wordsLeft == 0)
3129			{
3130				buffer = NULL;
3131				err = ReleaseBitmapBlock(vcb, blockRef, false);
3132				if (err != noErr) goto ErrorExit;
3133
3134				/*
3135				 * Skip over metadata blocks.
3136				 */
3137				if (!useMetaZone) {
3138					u_int32_t nextBlock;
3139
3140					nextBlock = NextBitmapBlock(vcb, currentBlock);
3141					if (nextBlock != currentBlock) {
3142						goto LoopExit;  /* allocation gap, so stop */
3143					}
3144				}
3145
3146				err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
3147				if ( err != noErr ) goto ErrorExit;
3148
3149				currentWord = buffer;
3150				wordsLeft = wordsPerBlock;
3151			}
3152
3153			//	See if any of the bits are set
3154			if ((tempWord = SWAP_BE32(*currentWord)) != 0)
3155			{
3156				//	Figure out which bit is set
3157				bitMask = kHighBitInWordMask;
3158				while (!(tempWord & bitMask))
3159				{
3160					bitMask >>= 1;
3161					++currentBlock;
3162				}
3163
3164				break;		//	Found the used bit; break out to FoundUsed.
3165			}
3166
3167			//	Keep looking at the next word
3168			currentBlock += kBitsPerWord;
3169			++currentWord;
3170			--wordsLeft;
3171
3172			//	If we found at least maxBlocks, we can quit early.
3173			if ((currentBlock - firstBlock) >= maxBlocks)
3174				break;
3175		}
3176
3177FoundUsed:
3178		//	Make sure we didn't run out of bitmap looking for a used block.
3179		//	If so, pin to the end of the bitmap.
3180		if (currentBlock > endingBlock)
3181			currentBlock = endingBlock;
3182
3183		//	Figure out how many contiguous free blocks there were.
3184		//	Pin the answer to maxBlocks.
3185		foundBlocks = currentBlock - firstBlock;
3186		if (foundBlocks > maxBlocks)
3187			foundBlocks = maxBlocks;
3188		if (foundBlocks >= minBlocks)
3189			break;		//	Found what we needed!
3190
3191		/*
3192		 * We did not find the total blocks were were looking for, but
3193		 * add this free block run to our free extent cache list, if possible.
3194		 */
3195		if (hfsmp->jnl == NULL) {
3196			/* If there is no journal, go ahead and add to the free ext cache. */
3197			updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3198		}
3199		else {
3200			/*
3201			 * If journaled, only add to the free extent cache if this block is not
3202			 * waiting for a TRIM to complete; that implies that the transaction that freed it
3203			 * has not yet been committed to stable storage.
3204			 */
3205			int recently_deleted = 0;
3206			uint32_t nextblock;
3207			err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock,
3208					(uint64_t)foundBlocks, &recently_deleted, &nextblock);
3209			if ((err) || (recently_deleted == 0))  {
3210				/* if we hit an error, or the blocks not recently freed, go ahead and insert it */
3211				updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3212			}
3213			err = 0;
3214		}
3215
3216	} while (currentBlock < stopBlock);
3217LoopExit:
3218
3219	//	Return the outputs.
3220	if (foundBlocks < minBlocks)
3221	{
3222DiskFull:
3223		err = dskFulErr;
3224ErrorExit:
3225		*actualStartBlock = 0;
3226		*actualNumBlocks = 0;
3227	}
3228	else
3229	{
3230		err = noErr;
3231		*actualStartBlock = firstBlock;
3232		*actualNumBlocks = foundBlocks;
3233		/*
3234		 * Sanity check for overflow
3235		 */
3236		if ((firstBlock + foundBlocks) > vcb->allocLimit) {
3237			panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
3238					vcb->vcbVN, startingBlock, endingBlock, currentBlock,
3239					firstBlock, stopBlock, minBlocks, foundBlocks);
3240		}
3241	}
3242
3243	if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
3244		int i;
3245		u_int32_t min_start = vcb->totalBlocks;
3246
3247		// set the nextAllocation pointer to the smallest free block number
3248		// we've seen so on the next mount we won't rescan unnecessarily
3249		lck_spin_lock(&vcb->vcbFreeExtLock);
3250		for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
3251			if (vcb->vcbFreeExt[i].startBlock < min_start) {
3252				min_start = vcb->vcbFreeExt[i].startBlock;
3253			}
3254		}
3255		lck_spin_unlock(&vcb->vcbFreeExtLock);
3256		if (min_start != vcb->totalBlocks) {
3257			if (min_start < vcb->nextAllocation) {
3258				vcb->nextAllocation = min_start;
3259			}
3260			if (min_start < vcb->sparseAllocation) {
3261				vcb->sparseAllocation = min_start;
3262			}
3263		}
3264	}
3265
3266	if (buffer)
3267		(void) ReleaseBitmapBlock(vcb, blockRef, false);
3268
3269	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3270		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
3271
3272	return err;
3273}
3274
3275
3276/*
3277 * Count number of bits set in the given 32-bit unsigned number
3278 *
3279 * Returns:
3280 * 	Number of bits set
3281 */
3282static int num_bits_set(u_int32_t num)
3283{
3284	int count;
3285
3286	for (count = 0; num; count++) {
3287		num &= num - 1;
3288	}
3289
3290	return count;
3291}
3292
3293/*
3294 * For a given range of blocks, find the total number of blocks
3295 * allocated.  If 'stop_on_first' is true, it stops as soon as it
3296 * encounters the first allocated block.  This option is useful
3297 * to determine if any block is allocated or not.
3298 *
3299 * Inputs:
3300 * 	startingBlock	First allocation block number of the range to be scanned.
3301 * 	numBlocks	Total number of blocks that need to be scanned.
3302 * 	stop_on_first	Stop the search after the first allocated block is found.
3303 *
3304 * Output:
3305 * 	allocCount	Total number of allocation blocks allocated in the given range.
3306 *
3307 * 			On error, it is the number of allocated blocks found
3308 * 			before the function got an error.
3309 *
3310 * 			If 'stop_on_first' is set,
3311 * 				allocCount = 1 if any allocated block was found.
3312 * 				allocCount = 0 if no allocated block was found.
3313 *
3314 * Returns:
3315 * 	0 on success, non-zero on failure.
3316 */
3317static int
3318hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
3319		u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
3320{
3321	u_int32_t  *currentWord;   // Pointer to current word within bitmap block
3322	u_int32_t  wordsLeft;      // Number of words left in this bitmap block
3323	u_int32_t  bitMask;        // Word with given bits already set (ready to test)
3324	u_int32_t  firstBit;       // Bit index within word of first bit to allocate
3325	u_int32_t  numBits;        // Number of bits in word to allocate
3326	u_int32_t  *buffer = NULL;
3327	uintptr_t  blockRef;
3328	u_int32_t  bitsPerBlock;
3329	u_int32_t  wordsPerBlock;
3330	u_int32_t  blockCount = 0;
3331	int  error;
3332
3333	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3334		KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);
3335
3336	/*
3337	 * Pre-read the bitmap block containing the first word of allocation
3338	 */
3339	error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3340	if (error)
3341		goto JustReturn;
3342
3343	/*
3344	 * Initialize currentWord, and wordsLeft.
3345	 */
3346	{
3347		u_int32_t wordIndexInBlock;
3348
3349		bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
3350		wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
3351
3352		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3353		currentWord = buffer + wordIndexInBlock;
3354		wordsLeft = wordsPerBlock - wordIndexInBlock;
3355	}
3356
3357	/*
3358	 * First test any non word aligned bits.
3359	 */
3360	firstBit = startingBlock % kBitsPerWord;
3361	if (firstBit != 0) {
3362		bitMask = kAllBitsSetInWord >> firstBit;
3363		numBits = kBitsPerWord - firstBit;
3364		if (numBits > numBlocks) {
3365			numBits = numBlocks;
3366			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
3367		}
3368		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3369			if (stop_on_first) {
3370				blockCount = 1;
3371				goto Exit;
3372			}
3373			blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3374		}
3375		numBlocks -= numBits;
3376		++currentWord;
3377		--wordsLeft;
3378	}
3379
3380	/*
3381	 * Test whole words (32 blocks) at a time.
3382	 */
3383	while (numBlocks >= kBitsPerWord) {
3384		if (wordsLeft == 0) {
3385			/* Read in the next bitmap block. */
3386			startingBlock += bitsPerBlock;
3387
3388			buffer = NULL;
3389			error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3390			if (error) goto Exit;
3391
3392			error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3393			if (error) goto Exit;
3394
3395			/* Readjust currentWord and wordsLeft. */
3396			currentWord = buffer;
3397			wordsLeft = wordsPerBlock;
3398		}
3399		if (*currentWord != 0) {
3400			if (stop_on_first) {
3401				blockCount = 1;
3402				goto Exit;
3403			}
3404			blockCount += num_bits_set(*currentWord);
3405		}
3406		numBlocks -= kBitsPerWord;
3407		++currentWord;
3408		--wordsLeft;
3409	}
3410
3411	/*
3412	 * Test any remaining blocks.
3413	 */
3414	if (numBlocks != 0) {
3415		bitMask = ~(kAllBitsSetInWord >> numBlocks);
3416		if (wordsLeft == 0) {
3417			/* Read in the next bitmap block */
3418			startingBlock += bitsPerBlock;
3419
3420			buffer = NULL;
3421			error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3422			if (error) goto Exit;
3423
3424			error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3425			if (error) goto Exit;
3426
3427			currentWord = buffer;
3428			wordsLeft = wordsPerBlock;
3429		}
3430		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3431			if (stop_on_first) {
3432				blockCount = 1;
3433				goto Exit;
3434			}
3435			blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3436		}
3437	}
3438Exit:
3439	if (buffer) {
3440		(void)ReleaseBitmapBlock(hfsmp, blockRef, false);
3441	}
3442	if (allocCount) {
3443		*allocCount = blockCount;
3444	}
3445
3446JustReturn:
3447	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3448		KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);
3449
3450	return (error);
3451}
3452
3453/*
3454 * Count total number of blocks that are allocated in the given
3455 * range from the bitmap.  This is used to preflight total blocks
3456 * that need to be relocated during volume resize.
3457 *
3458 * The journal or allocation file lock must be held.
3459 *
3460 * Returns:
3461 * 	0 on success, non-zero on failure.
3462 * 	On failure, allocCount is zero.
3463 */
3464	int
3465hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
3466		u_int32_t numBlocks, u_int32_t *allocCount)
3467{
3468	return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
3469}
3470
3471/*
3472 * Test to see if any blocks in a range are allocated.
3473 *
3474 * Note:  On error, this function returns 1, which means that
3475 * one or more blocks in the range are allocated.  This function
3476 * is primarily used for volume resize and we do not want
3477 * to report to the caller that the blocks are free when we
3478 * were not able to deterministically find it out.  So on error,
3479 * we always report that the blocks are allocated.
3480 *
3481 * The journal or allocation file lock must be held.
3482 *
3483 * Returns
3484 *	0 if all blocks in the range are free.
3485 *	1 if blocks in the range are allocated, or there was an error.
3486 */
3487	int
3488hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
3489{
3490	int error;
3491	u_int32_t allocCount;
3492
3493	error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
3494	if (error) {
3495		/* On error, we always say that the blocks are allocated
3496		 * so that volume resize does not return false success.
3497		 */
3498		return 1;
3499	} else {
3500		/* The function was deterministically able to find out
3501		 * if there was any block allocated or not.  In that case,
3502		 * the value in allocCount is good enough to be returned
3503		 * back to the caller.
3504		 */
3505		return allocCount;
3506	}
3507}
3508
3509/*
3510 * CONFIG_HFS_RBTREE
3511 * Check to see if the red-black tree is live.  Allocation file lock must be held
3512 * shared or exclusive to call this function. Note that we may call this even if
3513 * HFS is built without activating the red-black tree code.
3514 */
3515__private_extern__
3516int
3517hfs_isrbtree_active(struct hfsmount *hfsmp){
3518
3519#pragma unused (hfsmp)
3520
3521	/* Just return 0 for now */
3522	return 0;
3523}
3524
3525
3526
3527/* Summary Table Functions */
3528/*
3529 * hfs_check_summary:
3530 *
3531 * This function should be used to query the summary table to see if we can
3532 * bypass a bitmap block or not when we're trying to find a free allocation block.
3533 *
3534 *
3535 * Inputs:
3536 * 		allocblock - allocation block number. Will be used to infer the correct summary bit.
3537 * 		hfsmp -- filesystem in question.
3538 *
3539 * Output Arg:
3540 *		*freeblocks - set to 1 if we believe at least one free blocks in this vcbVBMIOSize
3541 * 		page of bitmap file.
3542 *
3543 *
3544 * Returns:
3545 * 		0 on success
3546 *		EINVAL on error
3547 *
3548 */
3549
3550static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t allocblock, uint32_t *freeblocks) {
3551
3552	int err = EINVAL;
3553	if (hfsmp->vcbVBMIOSize) {
3554		if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3555			uint32_t index;
3556			if (hfs_get_summary_index (hfsmp, allocblock, &index)) {
3557				*freeblocks = 0;
3558				return EINVAL;
3559			}
3560
3561			/* Ok, now that we have the bit index into the array, what byte is it in ? */
3562			uint32_t byteindex = index / kBitsPerByte;
3563			uint8_t current_byte = hfsmp->hfs_summary_table[byteindex];
3564			uint8_t bit_in_byte = index % kBitsPerByte;
3565
3566			if (current_byte & (1 << bit_in_byte)) {
3567				/*
3568				 * We do not believe there is anything free in the
3569				 * entire vcbVBMIOSize'd block.
3570				 */
3571				*freeblocks = 0;
3572			}
3573			else {
3574				/* Looks like there might be a free block here... */
3575				*freeblocks = 1;
3576			}
3577		}
3578		err = 0;
3579	}
3580
3581	return err;
3582}
3583
3584
3585#if 0
3586/*
3587 * hfs_get_next_summary
3588 *
3589 * From a given allocation block, jump to the allocation block at the start of the
3590 * next vcbVBMIOSize boundary.  This is useful when trying to quickly skip over
3591 * large swaths of bitmap once we have determined that the bitmap is relatively full.
3592 *
3593 * Inputs: hfsmount, starting allocation block number
3594 * Output Arg: *newblock will contain the allocation block number to start
3595 * querying.
3596 *
3597 * Returns:
3598 *		0 on success
3599 * 		EINVAL if the block argument is too large to be used, or the summary table not live.
3600 * 		EFBIG if there are no more summary bits to be queried
3601 */
3602static int
3603hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
3604
3605	u_int32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
3606	u_int32_t start_offset;
3607	u_int32_t next_offset;
3608	int err = EINVAL;
3609
3610	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3611		if ((err = hfs_get_summary_index(hfsmp, block, &start_offset))) {
3612			return err;
3613		}
3614
3615		next_offset = start_offset++;
3616
3617		if ((start_offset >= hfsmp->hfs_summary_size) || (next_offset >= hfsmp->hfs_summary_size)) {
3618			/* Can't jump to the next summary bit. */
3619			return EINVAL;
3620		}
3621
3622		/* Otherwise, compute and return */
3623		*newblock = next_offset * bits_per_iosize;
3624		if (*newblock >= hfsmp->totalBlocks) {
3625			return EINVAL;
3626		}
3627		err = 0;
3628	}
3629
3630	return err;
3631}
3632
3633#endif
3634
3635/*
3636 * hfs_release_summary
3637 *
3638 * Given an extent that is about to be de-allocated on-disk, determine the number
3639 * of summary bitmap bits that need to be marked as 'potentially available'.
3640 * Then go ahead and mark them as free.
3641 *
3642 *	Inputs:
3643 * 		hfsmp 		- hfs mount
3644 * 		block 		- starting allocation block.
3645 * 		length		- length of the extent.
3646 *
3647 * 	Returns:
3648 *		EINVAL upon any errors.
3649 */
3650static int hfs_release_summary(struct hfsmount *hfsmp, uint32_t start_blk, uint32_t length) {
3651	int err = EINVAL;
3652	uint32_t end_blk = (start_blk + length) - 1;
3653
3654	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3655		/* Figure out what the starting / ending block's summary bits are */
3656		uint32_t start_bit;
3657		uint32_t end_bit;
3658		uint32_t current_bit;
3659
3660		err = hfs_get_summary_index (hfsmp, start_blk, &start_bit);
3661		if (err) {
3662			goto release_err;
3663		}
3664		err = hfs_get_summary_index (hfsmp, end_blk, &end_bit);
3665		if (err) {
3666			goto release_err;
3667		}
3668
3669		if (ALLOC_DEBUG) {
3670			if (start_bit > end_bit) {
3671				panic ("HFS: start > end!, %d %d ", start_bit, end_bit);
3672			}
3673		}
3674		current_bit = start_bit;
3675		while (current_bit <= end_bit) {
3676			err = hfs_set_summary (hfsmp, current_bit, 0);
3677			current_bit++;
3678		}
3679	}
3680
3681release_err:
3682	return err;
3683}
3684
3685/*
3686 * hfs_find_summary_free
3687 *
3688 * Given a allocation block as input, returns an allocation block number as output as a
3689 * suggestion for where to start scanning the bitmap in order to find free blocks.  It will
3690 * determine the vcbVBMIOsize of the input allocation block, convert that into a summary
3691 * bit, then keep iterating over the summary bits in order to find the first free one.
3692 *
3693 * Inputs:
3694 *		hfsmp 		- hfs mount
3695 * 		block		- starting allocation block
3696 * 		newblock 	- output block as suggestion
3697 *
3698 * Returns:
3699 * 		0 on success
3700 * 		ENOSPC if we could not find a free block
3701 */
3702
3703int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block,  uint32_t *newblock) {
3704
3705	int err = ENOSPC;
3706	uint32_t bit_index = 0;
3707	uint32_t maybe_has_blocks = 0;
3708
3709	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3710		uint32_t byte_index;
3711		uint8_t curbyte;
3712		uint8_t bit_in_byte;
3713		uint32_t summary_cap;
3714
3715		/*
3716		 * We generate a cap for the summary search because the summary table
3717		 * always represents a full summary of the bitmap FILE, which may
3718		 * be way more bits than are necessary for the actual filesystem
3719		 * whose allocations are mapped by the bitmap.
3720		 *
3721		 * Compute how much of hfs_summary_size is useable for the given number
3722		 * of allocation blocks eligible on this FS.
3723		 */
3724		err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit, &summary_cap);
3725		if (err) {
3726			goto summary_exit;
3727		}
3728
3729		/* Check the starting block first */
3730		err = hfs_check_summary (hfsmp, block, &maybe_has_blocks);
3731		if (err) {
3732			goto summary_exit;
3733		}
3734
3735		if (maybe_has_blocks) {
3736			/*
3737			 * It looks like the initial start block could have something.
3738			 * Short-circuit and just use that.
3739			 */
3740			*newblock = block;
3741			goto summary_exit;
3742		}
3743
3744		/*
3745		 * OK, now we know that the first block was useless.
3746		 * Get the starting summary bit, and find it in the array
3747		 */
3748		maybe_has_blocks = 0;
3749		err = hfs_get_summary_index (hfsmp, block, &bit_index);
3750		if (err) {
3751			goto summary_exit;
3752		}
3753
3754		/* Iterate until we find something. */
3755		while (bit_index <= summary_cap) {
3756			byte_index = bit_index / kBitsPerByte;
3757			curbyte = hfsmp->hfs_summary_table[byte_index];
3758			bit_in_byte = bit_index % kBitsPerByte;
3759
3760			if (curbyte & (1 << bit_in_byte)) {
3761				/* nothing here.  increment and move on */
3762				bit_index++;
3763			}
3764			else {
3765				/*
3766				 * found something! convert bit_index back into
3767				 * an allocation block for use. 'newblock' will now
3768				 * contain the proper allocation block # based on the bit
3769				 * index.
3770				 */
3771				err = hfs_get_summary_allocblock (hfsmp, bit_index, newblock);
3772				if (err) {
3773					goto summary_exit;
3774				}
3775				maybe_has_blocks = 1;
3776				break;
3777			}
3778		}
3779
3780		/* If our loop didn't find anything, set err to ENOSPC */
3781		if (maybe_has_blocks == 0) {
3782			err = ENOSPC;
3783		}
3784	}
3785
3786	/* If the summary table is not active for this mount, we'll just return ENOSPC */
3787summary_exit:
3788	if (maybe_has_blocks) {
3789		err = 0;
3790	}
3791
3792	return err;
3793}
3794
3795/*
3796 * hfs_get_summary_allocblock
3797 *
3798 * Convert a summary bit into an allocation block number to use to start searching for free blocks.
3799 *
3800 * Inputs:
3801 *		hfsmp 			- hfs mount
3802 * 		summarybit 		- summmary bit index
3803 *		*alloc			- allocation block number in the bitmap file.
3804 *
3805 * Output:
3806 *		0 on success
3807 * 		EINVAL on failure
3808 */
3809int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t
3810		summarybit, uint32_t *alloc) {
3811	uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
3812	uint32_t allocblk;
3813
3814	allocblk = summarybit * bits_per_iosize;
3815
3816	if (allocblk >= hfsmp->totalBlocks) {
3817		return EINVAL;
3818	}
3819	else {
3820		*alloc = allocblk;
3821	}
3822
3823	return 0;
3824}
3825
3826
3827/*
3828 * hfs_set_summary:
3829 *
3830 * This function should be used to manipulate the summary table
3831 *
3832 * The argument 'inuse' will set the value of the bit in question to one or zero
3833 * depending on its value.
3834 *
3835 * Inputs:
3836 * 		hfsmp 		- hfs mount
3837 *		summarybit	- the bit index into the summary table to set/unset.
3838 * 		inuse		- the value to assign to the bit.
3839 *
3840 * Returns:
3841 * 		0 on success
3842 *		EINVAL on error
3843 *
3844 */
3845
3846static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse) {
3847
3848	int err = EINVAL;
3849	if (hfsmp->vcbVBMIOSize) {
3850		if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3851
3852			if (ALLOC_DEBUG) {
3853				if (hfsmp->hfs_summary_table == NULL) {
3854					panic ("hfs_set_summary: no table for %p ", hfsmp);
3855				}
3856			}
3857
3858			/* Ok, now that we have the bit index into the array, what byte is it in ? */
3859			uint32_t byte_index = summarybit / kBitsPerByte;
3860			uint8_t current_byte = hfsmp->hfs_summary_table[byte_index];
3861			uint8_t bit_in_byte = summarybit % kBitsPerByte;
3862
3863			if (inuse) {
3864				current_byte = (current_byte | (1 << bit_in_byte));
3865			}
3866			else {
3867				current_byte = (current_byte & ~(1 << bit_in_byte));
3868			}
3869
3870			hfsmp->hfs_summary_table[byte_index] = current_byte;
3871		}
3872		err = 0;
3873	}
3874
3875	return err;
3876}
3877
3878
3879/*
3880 * hfs_get_summary_index:
3881 *
3882 * This is a helper function which determines what summary bit represents the vcbVBMIOSize worth
3883 * of IO against the bitmap file.
3884 *
3885 * Returns:
3886 *		0 on success
3887 * 		EINVAL on failure
3888 */
3889static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t* index) {
3890	uint32_t summary_bit;
3891	uint32_t bits_per_iosize;
3892	int err = EINVAL;
3893
3894	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3895		/* Is the input block bigger than the total number of blocks? */
3896		if (block >= hfsmp->totalBlocks) {
3897			return EINVAL;
3898		}
3899
3900		/* Is there even a vbmIOSize set? */
3901		if (hfsmp->vcbVBMIOSize == 0) {
3902			return EINVAL;
3903		}
3904
3905		bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
3906
3907		summary_bit = block / bits_per_iosize;
3908
3909		*index = summary_bit;
3910		err = 0;
3911	}
3912
3913	return err;
3914}
3915
3916/*
3917 * hfs_init_summary
3918 *
3919 * From a given mount structure, compute how big the summary table should be for the given
3920 * filesystem, then allocate and bzero the memory.
3921 *
3922 * Returns:
3923 * 0 on success
3924 * EINVAL on failure
3925 */
3926int
3927hfs_init_summary (struct hfsmount *hfsmp) {
3928
3929	uint32_t summary_size;
3930	uint32_t summary_size_bytes;
3931	uint8_t *summary_table;
3932
3933	if (hfsmp->hfs_allocation_cp == NULL) {
3934		if (ALLOC_DEBUG) {
3935			printf("hfs: summary table cannot progress without a bitmap cnode! \n");
3936		}
3937		return EINVAL;
3938	}
3939	/*
3940	 * The practical maximum size of the summary table is 16KB:
3941	 *
3942	 *		(512MB maximum bitmap size / (4k -- min alloc block size)) / 8 bits/byte.
3943	 *
3944	 * HFS+ will allow filesystems with allocation block sizes smaller than 4k, but
3945	 * the end result is that we'll start to issue I/O in 2k or 1k sized chunks, which makes
3946	 * supporting this much worse.  The math would instead look like this:
3947	 * (512MB / 2k) / 8 == 32k.
3948	 *
3949	 * So, we will disallow the summary table if the allocation block size is < 4k.
3950	 */
3951
3952	if (hfsmp->blockSize < HFS_MIN_SUMMARY_BLOCKSIZE) {
3953		printf("hfs: summary table not allowed on FS with block size of %d\n", hfsmp->blockSize);
3954		return EINVAL;
3955	}
3956
3957	summary_size = hfsmp->hfs_allocation_cp->c_blocks;
3958
3959	if (ALLOC_DEBUG) {
3960		printf("HFS Summary Table Initialization: Bitmap %u blocks\n",
3961				hfsmp->hfs_allocation_cp->c_blocks);
3962	}
3963
3964	/*
3965	 * If the bitmap IO size is not the same as the allocation block size then
3966	 * then re-compute the number of summary bits necessary.  Note that above, the
3967	 * the default size is the number of allocation blocks in the bitmap *FILE*
3968	 * (not the number of bits in the bitmap itself).  If the allocation block size
3969	 * is large enough though, we may need to increase this.
3970	 */
3971	if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
3972		uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
3973		lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
3974
3975		/* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
3976		summary_size = (uint32_t) lrg_size;
3977	}
3978
3979	/*
3980	 * If the block size is the same as the IO Size, then the total number of blocks
3981	 * is already equal to the number of IO units, which is our number of summary bits.
3982	 */
3983
3984	summary_size_bytes = summary_size / kBitsPerByte;
3985	/* Always add one byte, just in case we have a dangling number of bits */
3986	summary_size_bytes++;
3987
3988	if (ALLOC_DEBUG) {
3989		printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, summary_size);
3990		printf("HFS Summary Table Size (in bytes) %d \n", summary_size_bytes);
3991	}
3992
3993	/* Store the field in the mount point, and then MALLOC/bzero the memory */
3994	hfsmp->hfs_summary_size = summary_size;
3995	hfsmp->hfs_summary_bytes = summary_size_bytes;
3996
3997	MALLOC (summary_table, uint8_t*, summary_size_bytes, M_TEMP, M_WAITOK);
3998	if (summary_table == NULL) {
3999		return ENOMEM;
4000	}
4001	bzero (summary_table, summary_size_bytes);
4002
4003	/* enable the summary table */
4004	hfsmp->hfs_flags |= HFS_SUMMARY_TABLE;
4005	hfsmp->hfs_summary_table = summary_table;
4006
4007	if (ALLOC_DEBUG) {
4008		if (hfsmp->hfs_summary_table == NULL) {
4009			panic ("HFS Summary Init: no table for %p\n", hfsmp);
4010		}
4011	}
4012	return 0;
4013}
4014
4015/*
4016 * hfs_rebuild_summary
4017 *
4018 * This function should be used to allocate a new hunk of memory for use as a summary
4019 * table, then copy the existing data into it.  We use it whenever the filesystem's size
4020 * changes.  When a resize is in progress, you can still use the extant summary
4021 * table if it is active.
4022 *
4023 * Inputs:
4024 * 		hfsmp 		-- FS in question
4025 * 		newlength	-- new length of the FS in allocation blocks.
4026 *
4027 * Outputs:
4028 *		0 on success, EINVAL on failure.  If this function fails,  the summary table
4029 * 		will be disabled for future use.
4030 *
4031 */
4032static int hfs_rebuild_summary (struct hfsmount *hfsmp) {
4033
4034	uint32_t new_summary_size;
4035
4036	new_summary_size = hfsmp->hfs_allocation_cp->c_blocks;
4037
4038
4039	if (ALLOC_DEBUG) {
4040		printf("HFS Summary Table Re-init: bitmap %u blocks\n", new_summary_size);
4041	}
4042
4043	/*
4044	 * If the bitmap IO size is not the same as the allocation block size, then re-compute
4045	 * the number of summary bits necessary.  Note that above, the default size is the number
4046	 * of allocation blocks in the bitmap *FILE* (not the number of bits that the bitmap manages).
4047	 * If the allocation block size is large enough though, we may need to increase this, as
4048	 * bitmap IO is capped at 64k per IO
4049	 */
4050	if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
4051		uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
4052		lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
4053
4054		/* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
4055		new_summary_size = (uint32_t) lrg_size;
4056	}
4057
4058	/*
4059	 * Ok, we have the new summary bitmap theoretical max size.  See if it's the same as
4060	 * what we've got already...
4061	 */
4062	if (new_summary_size != hfsmp->hfs_summary_size) {
4063		uint32_t summarybytes = new_summary_size / kBitsPerByte;
4064		uint32_t copysize;
4065		uint8_t *newtable;
4066		/* Add one byte for slop */
4067		summarybytes++;
4068
4069		if (ALLOC_DEBUG) {
4070			printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, new_summary_size);
4071			printf("HFS Summary Table Size (in bytes) %d \n", summarybytes);
4072		}
4073
4074		/* Attempt to MALLOC the memory */
4075		MALLOC (newtable, uint8_t*, summarybytes, M_TEMP, M_WAITOK);
4076		if (newtable == NULL) {
4077			/*
4078			 * ERROR!  We need to disable the table now
4079			 */
4080			FREE (hfsmp->hfs_summary_table, M_TEMP);
4081			hfsmp->hfs_summary_table = NULL;
4082			hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
4083			return EINVAL;
4084		}
4085		bzero (newtable, summarybytes);
4086
4087		/*
4088		 * The new table may be smaller than the old one. If this is true, then
4089		 * we can't copy the full size of the existing summary table into the new
4090		 * one.
4091		 *
4092		 * The converse is not an issue since we bzeroed the table above.
4093		 */
4094		copysize = hfsmp->hfs_summary_bytes;
4095		if (summarybytes < hfsmp->hfs_summary_bytes) {
4096			copysize = summarybytes;
4097		}
4098		memcpy (newtable, hfsmp->hfs_summary_table, copysize);
4099
4100		/* We're all good.  Destroy the old copy and update ptrs */
4101		FREE (hfsmp->hfs_summary_table, M_TEMP);
4102
4103		hfsmp->hfs_summary_table = newtable;
4104		hfsmp->hfs_summary_size = new_summary_size;
4105		hfsmp->hfs_summary_bytes = summarybytes;
4106	}
4107
4108	return 0;
4109}
4110
4111
4112#if ALLOC_DEBUG
4113/*
4114 * hfs_validate_summary
4115 *
4116 * Validation routine for the summary table.  Debug-only function.
4117 *
4118 * Bitmap lock must be held.
4119 *
4120 */
4121void hfs_validate_summary (struct hfsmount *hfsmp) {
4122	uint32_t i;
4123	int err;
4124
4125	/*
4126	 * Iterate over all of the bits in the summary table, and verify if
4127	 * there really are free blocks in the pages that we believe may
4128	 * may contain free blocks.
4129	 */
4130
4131	if (hfsmp->hfs_summary_table == NULL) {
4132		panic ("HFS Summary: No HFS summary table!");
4133	}
4134
4135	/* 131072 bits == 16384 bytes.  This is the theoretical max size of the summary table. we add 1 byte for slop */
4136	if (hfsmp->hfs_summary_size == 0 || hfsmp->hfs_summary_size > 131080) {
4137		panic("HFS Summary: Size is bad! %d", hfsmp->hfs_summary_size);
4138	}
4139
4140	if (hfsmp->vcbVBMIOSize == 0) {
4141		panic("HFS Summary: no VCB VBM IO Size !");
4142	}
4143
4144	printf("hfs: summary validation beginning on %s\n", hfsmp->vcbVN);
4145	printf("hfs: summary validation %d summary bits, %d summary blocks\n", hfsmp->hfs_summary_size, hfsmp->totalBlocks);
4146
4147
4148	/* iterate through all possible summary bits */
4149	for (i = 0; i < hfsmp->hfs_summary_size ; i++) {
4150
4151		uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4152		uint32_t byte_offset = hfsmp->vcbVBMIOSize * i;
4153
4154		/* Compute the corresponding allocation block for the summary bit. */
4155		uint32_t alloc_block = i * bits_per_iosize;
4156
4157		/*
4158		 * We use a uint32_t pointer here because it will speed up
4159		 * access to the real bitmap data on disk.
4160		 */
4161		uint32_t *block_data;
4162		struct buf *bp;
4163		int counter;
4164		int counter_max;
4165		int saw_free_bits = 0;
4166
4167		/* Get the block */
4168		if ((err = ReadBitmapRange (hfsmp, byte_offset, hfsmp->vcbVBMIOSize, &block_data,  &bp))) {
4169			panic ("HFS Summary: error (%d) in ReadBitmapRange!", err);
4170		}
4171
4172		/* Query the status of the bit and then make sure we match */
4173		uint32_t maybe_has_free_blocks;
4174		err = hfs_check_summary (hfsmp, alloc_block, &maybe_has_free_blocks);
4175		if (err) {
4176			panic ("HFS Summary: hfs_check_summary returned error (%d) ", err);
4177		}
4178		counter_max = hfsmp->vcbVBMIOSize / kBytesPerWord;
4179
4180		for (counter = 0; counter < counter_max; counter++) {
4181			uint32_t word = block_data[counter];
4182
4183			/* We assume that we'll not find any free bits here. */
4184			if (word != kAllBitsSetInWord) {
4185				if (maybe_has_free_blocks) {
4186					/* All done */
4187					saw_free_bits = 1;
4188					break;
4189				}
4190				else {
4191					panic ("HFS Summary: hfs_check_summary saw free bits!");
4192				}
4193			}
4194		}
4195
4196		if (maybe_has_free_blocks && (saw_free_bits == 0)) {
4197			panic ("HFS Summary: did not see free bits !");
4198		}
4199
4200		/* Release the block. */
4201		if ((err =  ReleaseScanBitmapRange (bp))) {
4202			panic ("HFS Summary: Error (%d) in ReleaseScanBitmapRange", err);
4203		}
4204	}
4205
4206	printf("hfs: summary validation completed successfully on %s\n", hfsmp->vcbVN);
4207
4208	return;
4209}
4210#endif
4211
4212/*
4213 * hfs_alloc_scan_range:
4214 *
4215 * This function should be used to scan large ranges of the allocation bitmap
4216 * at one time.  It makes two key assumptions:
4217 *
4218 * 		1) Bitmap lock is held during the duration of the call (exclusive)
4219 * 		2) There are no pages in the buffer cache for any of the bitmap
4220 * 		blocks that we may encounter.  It *MUST* be completely empty.
4221 *
4222 * The expected use case is when we are scanning the bitmap in full while we are
4223 * still mounting the filesystem in order to issue TRIMs or build up the summary
4224 * table for the mount point. It should be done after any potential journal replays
4225 * are completed and their I/Os fully issued.
4226 *
4227 * The key reason for assumption (2) above is that this function will try to issue
4228 * I/O against the bitmap file in chunks as large a possible -- essentially as
4229 * much as the buffer layer will handle (1MB).  Because the size of these I/Os
4230 * is larger than what would be expected during normal runtime we must invalidate
4231 * the buffers as soon as we are done with them so that they do not persist in
4232 * the buffer cache for other threads to find, as they'll typically be doing
4233 * allocation-block size I/Os instead.
4234 *
4235 * Input Args:
4236 *		hfsmp 		- hfs mount data structure
4237 * 		startbit 	- allocation block # to start our scan. It must be aligned
4238 *					on a vcbVBMIOsize boundary.
4239 *		list		- journal trim list data structure for issuing TRIMs
4240 *
4241 * Output Args:
4242 *		bitToScan 	- Return the next bit to scan if this function is called again.
4243 *					Caller will supply this into the next invocation
4244 *					of this call as 'startbit'.
4245 */
4246
4247static int hfs_alloc_scan_range(struct hfsmount *hfsmp, u_int32_t startbit,
4248		u_int32_t *bitToScan, struct jnl_trim_list *list) {
4249
4250	int error;
4251	int readwrite = 1;
4252	u_int32_t curAllocBlock;
4253	struct buf *blockRef = NULL;
4254	u_int32_t *buffer = NULL;
4255	u_int32_t free_offset = 0; //tracks the start of the current free range
4256	u_int32_t size = 0; // tracks the length of the current free range.
4257	u_int32_t iosize = 0; //how much io we should generate against the bitmap
4258	u_int32_t byte_off; // byte offset into the bitmap file.
4259	u_int32_t completed_size; // how much io was actually completed
4260	u_int32_t last_bitmap_block;
4261	u_int32_t current_word;
4262	u_int32_t word_index = 0;
4263
4264	/* summary table building */
4265	uint32_t summary_bit = 0;
4266	uint32_t saw_free_blocks = 0;
4267	uint32_t last_marked = 0;
4268
4269	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
4270		readwrite = 0;
4271	}
4272
4273	/*
4274	 * Compute how much I/O we should generate here.
4275	 * hfs_scan_range_size will validate that the start bit
4276	 * converted into a byte offset into the bitmap file,
4277	 * is aligned on a VBMIOSize boundary.
4278	 */
4279	error = hfs_scan_range_size (hfsmp, startbit, &iosize);
4280	if (error) {
4281		if (ALLOC_DEBUG) {
4282			panic ("hfs_alloc_scan_range: hfs_scan_range_size error %d\n", error);
4283		}
4284		return error;
4285	}
4286
4287	if (iosize < hfsmp->vcbVBMIOSize) {
4288		if (ALLOC_DEBUG) {
4289			panic ("hfs_alloc_scan_range: iosize too small! (iosize %d)\n", iosize);
4290		}
4291		return EINVAL;
4292	}
4293
4294	/* hfs_scan_range_size should have verified startbit.  Convert it to bytes */
4295	byte_off = startbit / kBitsPerByte;
4296
4297	/*
4298	 * When the journal replays blocks, it does so by writing directly to the disk
4299	 * device (bypassing any filesystem vnodes and such).  When it finishes its I/Os
4300	 * it also immediately re-reads and invalidates the range covered by the bp so
4301	 * it does not leave anything lingering in the cache (for iosize reasons).
4302	 *
4303	 * As such, it is safe to do large I/Os here with ReadBitmapRange.
4304	 *
4305	 * NOTE: It is not recommended, but it is possible to call the function below
4306	 * on sections of the bitmap that may be in core already as long as the pages are not
4307	 * dirty.  In that case, we'd notice that something starting at that
4308	 * logical block of the bitmap exists in the metadata cache, and we'd check
4309	 * if the iosize requested is the same as what was already allocated for it.
4310	 * Odds are pretty good we're going to request something larger.  In that case,
4311	 * we just free the existing memory associated with the buf and reallocate a
4312	 * larger range. This function should immediately invalidate it as soon as we're
4313	 * done scanning, so this shouldn't cause any coherency issues.
4314	 */
4315
4316	error = ReadBitmapRange(hfsmp, byte_off, iosize, &buffer, &blockRef);
4317	if (error) {
4318		if (ALLOC_DEBUG) {
4319			panic ("hfs_alloc_scan_range: start %d iosize %d ReadBitmapRange error %d\n", startbit, iosize, error);
4320		}
4321		return error;
4322	}
4323
4324	/*
4325	 * At this point, we have a giant wired buffer that represents some portion of
4326	 * the bitmap file that we want to analyze.   We may not have gotten all 'iosize'
4327	 * bytes though, so clip our ending bit to what we actually read in.
4328	 */
4329	completed_size = buf_count(blockRef);
4330	last_bitmap_block = completed_size * kBitsPerByte;
4331	last_bitmap_block = last_bitmap_block + startbit;
4332
4333	/* Cap the last block to the total number of blocks if required */
4334	if (last_bitmap_block > hfsmp->totalBlocks) {
4335		last_bitmap_block = hfsmp->totalBlocks;
4336	}
4337
4338	/* curAllocBlock represents the logical block we're analyzing. */
4339	curAllocBlock = startbit;
4340	word_index = 0;
4341	size = 0;
4342
4343	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4344		if (hfs_get_summary_index (hfsmp, startbit, &summary_bit)) {
4345			error = EINVAL;
4346			if (ALLOC_DEBUG) {
4347				panic ("hfs_alloc_scan_range: Could not acquire summary index for %u", startbit);
4348			}
4349			return error;
4350		}
4351		/*
4352		 * summary_bit should now be set to the summary bit corresponding to
4353		 * the allocation block of the first bit that we're supposed to scan
4354		 */
4355	}
4356	saw_free_blocks = 0;
4357
4358	while (curAllocBlock < last_bitmap_block) {
4359		u_int32_t bit;
4360
4361		/* Update the summary table as needed */
4362		if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4363			if (ALLOC_DEBUG) {
4364				if (hfsmp->hfs_summary_table == NULL) {
4365					panic ("hfs_alloc_scan_range: no summary table!");
4366				}
4367			}
4368
4369			uint32_t temp_summary;
4370			error = hfs_get_summary_index (hfsmp, curAllocBlock, &temp_summary);
4371			if (error) {
4372				if (ALLOC_DEBUG) {
4373					panic ("hfs_alloc_scan_range: could not get summary index for %u", curAllocBlock);
4374				}
4375				return EINVAL;
4376			}
4377
4378			if (ALLOC_DEBUG) {
4379				if (temp_summary < summary_bit) {
4380					panic ("hfs_alloc_scan_range: backwards summary bit?\n");
4381				}
4382			}
4383
4384			/*
4385			 * If temp_summary is greater than summary_bit, then this
4386			 * means that the next allocation block crosses a vcbVBMIOSize boundary
4387			 * and we should treat this range of on-disk data as part of a new summary
4388			 * bit.
4389			 */
4390			if (temp_summary > summary_bit) {
4391				if (saw_free_blocks == 0) {
4392					/* Mark the bit as totally consumed in the summary table */
4393					hfs_set_summary (hfsmp, summary_bit, 1);
4394				}
4395				else {
4396					/* Mark the bit as potentially free in summary table */
4397					hfs_set_summary (hfsmp, summary_bit, 0);
4398				}
4399				last_marked = summary_bit;
4400				/*
4401				 * Any time we set the summary table, update our counter which tracks
4402				 * what the last bit that was fully marked in the summary table.
4403				 *
4404				 * Then reset our marker which says we haven't seen a free bit yet.
4405				 */
4406				saw_free_blocks = 0;
4407				summary_bit = temp_summary;
4408			}
4409		} /* End summary table conditions */
4410
4411		current_word = SWAP_BE32(buffer[word_index]);
4412		/* Iterate through the word 1 bit at a time... */
4413		for (bit = 0 ; bit < kBitsPerWord ; bit++, curAllocBlock++) {
4414			if (curAllocBlock >= last_bitmap_block) {
4415				break;
4416			}
4417			u_int32_t allocated = (current_word & (kHighBitInWordMask >> bit));
4418
4419			if (allocated) {
4420				if (size != 0) {
4421					if (readwrite) {
4422						/* Insert the previously tracked range of free blocks to the trim list */
4423						hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
4424					}
4425					add_free_extent_cache (hfsmp, free_offset, size);
4426					size = 0;
4427					free_offset = 0;
4428				}
4429			}
4430			else {
4431				/* Not allocated */
4432				size++;
4433				if (free_offset == 0) {
4434					/* Start a new run of free spcae at curAllocBlock */
4435					free_offset = curAllocBlock;
4436				}
4437				if (saw_free_blocks == 0) {
4438					saw_free_blocks = 1;
4439				}
4440			}
4441		} /* end for loop iterating through the word */
4442
4443		if (curAllocBlock < last_bitmap_block) {
4444			word_index++;
4445		}
4446
4447	} /* End while loop (iterates through last_bitmap_block) */
4448
4449
4450	/*
4451	 * We've (potentially) completed our pass through this region of bitmap,
4452	 * but one thing we may not have done is updated that last summary bit for
4453	 * the last page we scanned, because we would have never transitioned across
4454	 * a vcbVBMIOSize boundary again.  Check for that and update the last bit
4455	 * as needed.
4456	 *
4457	 * Note that 'last_bitmap_block' is *not* inclusive WRT the very last bit in the bitmap
4458	 * for the region of bitmap on-disk that we were scanning. (it is one greater).
4459	 */
4460	if ((curAllocBlock >= last_bitmap_block) &&
4461			(hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
4462		uint32_t temp_summary;
4463		/* temp_block should be INSIDE the region we just scanned, so subtract 1 */
4464		uint32_t temp_block = last_bitmap_block - 1;
4465		error = hfs_get_summary_index (hfsmp, temp_block, &temp_summary);
4466		if (error) {
4467			if (ALLOC_DEBUG) {
4468				panic ("hfs_alloc_scan_range: end bit curAllocBlock %u, last_bitmap_block %u", curAllocBlock, last_bitmap_block);
4469			}
4470			return EINVAL;
4471		}
4472
4473		/* Did we already update this in the table? */
4474		if (temp_summary > last_marked) {
4475			if (saw_free_blocks == 0) {
4476				hfs_set_summary (hfsmp, temp_summary, 1);
4477			}
4478			else {
4479				hfs_set_summary (hfsmp, temp_summary, 0);
4480			}
4481		}
4482	}
4483
4484	/*
4485	 * We may have been tracking a range of free blocks that hasn't been inserted yet.
4486	 * Keep the logic for the TRIM and free extent separate from that of the summary
4487	 * table management even though they are closely linked.
4488	 */
4489	if (size != 0) {
4490		if (readwrite) {
4491			hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
4492		}
4493		add_free_extent_cache (hfsmp, free_offset, size);
4494	}
4495
4496	/*
4497	 * curAllocBlock represents the next block we need to scan when we return
4498	 * to this function.
4499	 */
4500	*bitToScan = curAllocBlock;
4501	ReleaseScanBitmapRange(blockRef);
4502
4503	return 0;
4504
4505}
4506
4507
4508
4509/*
4510 * Compute the maximum I/O size to generate against the bitmap file
4511 * Will attempt to generate at LEAST VBMIOsize I/Os for interior ranges of the bitmap.
4512 *
4513 * Inputs:
4514 *		hfsmp		-- hfsmount to look at
4515 *		bitmap_off 	-- bit offset into the bitmap file
4516 *
4517 * Outputs:
4518 * 		iosize	-- iosize to generate.
4519 *
4520 * Returns:
4521 *		0 on success; EINVAL otherwise
4522 */
4523static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint32_t *iosize) {
4524
4525	/*
4526	 * The maximum bitmap size is 512MB regardless of ABN size, so we can get away
4527	 * with 32 bit math in this function.
4528	 */
4529
4530	uint32_t bitmap_len;
4531	uint32_t remaining_bitmap;
4532	uint32_t target_iosize;
4533	uint32_t bitmap_off;
4534
4535	/* Is this bit index not word aligned?  If so, immediately fail. */
4536	if (bitmap_st % kBitsPerWord) {
4537		if (ALLOC_DEBUG) {
4538			panic ("hfs_scan_range_size unaligned start bit! bitmap_st %d \n", bitmap_st);
4539		}
4540		return EINVAL;
4541	}
4542
4543	/* bitmap_off is in bytes, not allocation blocks/bits */
4544	bitmap_off = bitmap_st / kBitsPerByte;
4545
4546	if ((hfsmp->totalBlocks <= bitmap_st) || (bitmap_off > (512 * 1024 * 1024))) {
4547		if (ALLOC_DEBUG) {
4548			panic ("hfs_scan_range_size: invalid start! bitmap_st %d, bitmap_off %d\n", bitmap_st, bitmap_off);
4549		}
4550		return EINVAL;
4551	}
4552
4553	/*
4554	 * Also invalid if it's not at least aligned to HFS bitmap logical
4555	 * block boundaries.  We don't have to emit an iosize that's an
4556	 * exact multiple of the VBMIOSize, but it must start on such
4557	 * a boundary.
4558	 *
4559	 * The vcbVBMIOSize may be SMALLER than the allocation block size
4560	 * on a FS with giant allocation blocks, but it will never be
4561	 * greater than it, so it should be safe to start I/O
4562	 * aligned on a VBMIOsize boundary.
4563	 */
4564	if (bitmap_off & (hfsmp->vcbVBMIOSize - 1)) {
4565		if (ALLOC_DEBUG) {
4566			panic ("hfs_scan_range_size: unaligned start! bitmap_off %d\n", bitmap_off);
4567		}
4568		return EINVAL;
4569	}
4570
4571	/*
4572	 * Generate the total bitmap file length in bytes, then round up
4573	 * that value to the end of the last allocation block, if needed (It
4574	 * will probably be needed).  We won't scan past the last actual
4575	 * allocation block.
4576	 *
4577	 * Unless we're completing the bitmap scan (or bitmap < 1MB), we
4578	 * have to complete the I/O on VBMIOSize boundaries, but we can only read
4579	 * up until the end of the bitmap file.
4580	 */
4581	bitmap_len = hfsmp->totalBlocks / kBitsPerByte;
4582	if (bitmap_len % (hfsmp->blockSize)) {
4583		bitmap_len = (bitmap_len / hfsmp->blockSize);
4584		/* round up to the end of the next alloc block */
4585		bitmap_len++;
4586
4587		/* Convert the # of alloc blocks back to bytes. */
4588		bitmap_len = bitmap_len * hfsmp->blockSize;
4589	}
4590
4591	remaining_bitmap = bitmap_len - bitmap_off;
4592
4593	/*
4594	 * io size is the MIN of the maximum I/O we can generate or the
4595	 * remaining amount of bitmap.
4596	 */
4597	target_iosize = MIN((MAXBSIZE), remaining_bitmap);
4598	*iosize = target_iosize;
4599
4600	return 0;
4601}
4602
4603
4604
4605
4606/*
4607 * This function is basically the same as hfs_isallocated, except it's designed for
4608 * use with the red-black tree validation code.  It assumes we're only checking whether
4609 * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
4610 * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
4611 *
4612 * This should not be called in general purpose scanning code.
4613 */
4614int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
4615
4616	u_int32_t  *currentWord;   // Pointer to current word within bitmap block
4617	u_int32_t  bitMask;        // Word with given bits already set (ready to test)
4618	u_int32_t  firstBit;       // Bit index within word of first bit to allocate
4619	u_int32_t  numBits;        // Number of bits in word to allocate
4620	u_int32_t  bitsPerBlock;
4621	uintptr_t  blockRef;
4622	u_int32_t  wordsPerBlock;
4623	u_int32_t  numBlocks = 1;
4624	u_int32_t  *buffer = NULL;
4625
4626	int  inuse = 0;
4627	int error;
4628
4629
4630	if (bp_buf) {
4631		/* just use passed-in buffer if avail. */
4632		buffer = bp_buf;
4633	}
4634	else {
4635		/*
4636		 * Pre-read the bitmap block containing the first word of allocation
4637		 */
4638		error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
4639		if (error)
4640			return (error);
4641	}
4642
4643	/*
4644	 * Initialize currentWord, and wordsLeft.
4645	 */
4646	u_int32_t wordIndexInBlock;
4647
4648	bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
4649	wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
4650
4651	wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
4652	currentWord = buffer + wordIndexInBlock;
4653
4654	/*
4655	 * First test any non word aligned bits.
4656	 */
4657	firstBit = startingBlock % kBitsPerWord;
4658	bitMask = kAllBitsSetInWord >> firstBit;
4659	numBits = kBitsPerWord - firstBit;
4660	if (numBits > numBlocks) {
4661		numBits = numBlocks;
4662		bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
4663	}
4664	if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
4665		inuse = 1;
4666		goto Exit;
4667	}
4668	numBlocks -= numBits;
4669	++currentWord;
4670
4671Exit:
4672	if(bp_buf == NULL) {
4673		if (buffer) {
4674			(void)ReleaseBitmapBlock(hfsmp, blockRef, false);
4675		}
4676	}
4677	return (inuse);
4678
4679
4680
4681}
4682
4683/*
4684 * This function resets all of the data structures relevant to the
4685 * free extent cache stored in the hfsmount struct.
4686 *
4687 * If we are using the red-black tree code then we need to account for the fact that
4688 * we may encounter situations where we need to jettison the tree.  If that is the
4689 * case, then we fail-over to the bitmap scanning logic, but we need to ensure that
4690 * the free ext cache is zeroed before we start using it.
4691 *
4692 * We also reset and disable the cache when allocLimit is updated... which
4693 * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()).
4694 * It is independent of the type of allocator being used currently.
4695 */
4696void ResetVCBFreeExtCache(struct hfsmount *hfsmp)
4697{
4698	int bytes;
4699	void *freeExt;
4700
4701	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4702		KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);
4703
4704	lck_spin_lock(&hfsmp->vcbFreeExtLock);
4705
4706	/* reset Free Extent Count */
4707	hfsmp->vcbFreeExtCnt = 0;
4708
4709	/* reset the actual array */
4710	bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
4711	freeExt = (void*)(hfsmp->vcbFreeExt);
4712
4713	bzero (freeExt, bytes);
4714
4715	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4716
4717	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4718		KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);
4719
4720	return;
4721}
4722
4723/*
4724 * This function is used to inform the allocator if we have to effectively shrink
4725 * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs.
4726 *
4727 * The bitmap lock must be held when calling this function.  This function also modifies the
4728 * allocLimit field in the hfs mount point structure in the general case.
4729 *
4730 * In the shrinking case, we'll have to remove all free extents from the red-black
4731 * tree past the specified offset new_end_block.  In the growth case, we'll have to force
4732 * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
4733 *
4734 * new_end_block represents the total number of blocks available for allocation in the resized
4735 * filesystem.  Block #new_end_block should not be allocatable in the resized filesystem since it
4736 * will be out of the (0, n-1) range that are indexable in the bitmap.
4737 *
4738 * Returns	0 on success
4739 *			errno on failure
4740 */
4741__private_extern__
4742u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
4743
4744	/*
4745	 * Update allocLimit to the argument specified
4746	 */
4747	hfsmp->allocLimit = new_end_block;
4748
4749	/* Invalidate the free extent cache completely so that
4750	 * it does not have any extents beyond end of current
4751	 * volume.
4752	 */
4753	ResetVCBFreeExtCache(hfsmp);
4754
4755	/* Force a rebuild of the summary table. */
4756	(void) hfs_rebuild_summary (hfsmp);
4757
4758	return 0;
4759
4760}
4761
4762
4763/*
4764 * Remove an extent from the list of free extents.
4765 *
4766 * This is a low-level routine.	 It does not handle overlaps or splitting;
4767 * that is the responsibility of the caller.  The input extent must exactly
4768 * match an extent already in the list; it will be removed, and any following
4769 * extents in the list will be shifted up.
4770 *
4771 * Inputs:
4772 *	startBlock - Start of extent to remove
4773 *	blockCount - Number of blocks in extent to remove
4774 *
4775 * Result:
4776 *	The index of the extent that was removed.
4777 */
4778static void remove_free_extent_list(struct hfsmount *hfsmp, int index)
4779{
4780	if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) {
4781		if (ALLOC_DEBUG)
4782			panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
4783		else
4784			printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
4785		return;
4786	}
4787	int shift_count = hfsmp->vcbFreeExtCnt - index - 1;
4788	if (shift_count > 0) {
4789		memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
4790	}
4791	hfsmp->vcbFreeExtCnt--;
4792}
4793
4794
4795/*
4796 * Add an extent to the list of free extents.
4797 *
4798 * This is a low-level routine.	 It does not handle overlaps or coalescing;
4799 * that is the responsibility of the caller.  This routine *does* make
4800 * sure that the extent it is adding is inserted in the correct location.
4801 * If the list is full, this routine will handle either removing the last
4802 * extent in the list to make room for the new extent, or ignoring the
4803 * new extent if it is "worse" than the last extent in the list.
4804 *
4805 * Inputs:
4806 *	startBlock - Start of extent to add
4807 *	blockCount - Number of blocks in extent to add
4808 *
4809 * Result:
4810 *	The index where the extent that was inserted, or kMaxFreeExtents
4811 *	if the extent was not inserted (the list was full, and the extent
4812 *	being added was "worse" than everything in the list).
4813 */
4814static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4815{
4816	uint32_t i;
4817
4818	/* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
4819	if (ALLOC_DEBUG) {
4820		uint32_t endBlock = startBlock + blockCount;
4821		for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4822			if (endBlock < hfsmp->vcbFreeExt[i].startBlock ||
4823					startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) {
4824				continue;
4825			}
4826			panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d",
4827					hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i);
4828		}
4829	}
4830
4831	/* Figure out what index the new extent should be inserted at. */
4832	for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4833		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4834			/* The list is sorted by increasing offset. */
4835			if (startBlock < hfsmp->vcbFreeExt[i].startBlock) {
4836				break;
4837			}
4838		} else {
4839			/* The list is sorted by decreasing size. */
4840			if (blockCount > hfsmp->vcbFreeExt[i].blockCount) {
4841				break;
4842			}
4843		}
4844	}
4845
4846	/* When we get here, i is the index where the extent should be inserted. */
4847	if (i == kMaxFreeExtents) {
4848		/*
4849		 * The new extent is worse than anything already in the list,
4850		 * and the list is full, so just ignore the extent to be added.
4851		 */
4852		return i;
4853	}
4854
4855	/*
4856	 * Grow the list (if possible) to make room for an insert.
4857	 */
4858	if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents)
4859		hfsmp->vcbFreeExtCnt++;
4860
4861	/*
4862	 * If we'll be keeping any extents after the insert position, then shift them.
4863	 */
4864	int shift_count = hfsmp->vcbFreeExtCnt - i - 1;
4865	if (shift_count > 0) {
4866		memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
4867	}
4868
4869	/* Finally, store the new extent at its correct position. */
4870	hfsmp->vcbFreeExt[i].startBlock = startBlock;
4871	hfsmp->vcbFreeExt[i].blockCount = blockCount;
4872	return i;
4873}
4874
4875
4876/*
4877 * Remove an entry from free extent cache after it has been allocated.
4878 *
4879 * This is a high-level routine.  It handles removing a portion of a
4880 * cached extent, potentially splitting it into two (if the cache was
4881 * already full, throwing away the extent that would sort last).  It
4882 * also handles removing an extent that overlaps multiple extents in
4883 * the cache.
4884 *
4885 * Inputs:
4886 *	hfsmp		- mount point structure
4887 *	startBlock	- starting block of the extent to be removed.
4888 *	blockCount	- number of blocks of the extent to be removed.
4889 */
4890static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4891{
4892	u_int32_t i, insertedIndex;
4893	u_int32_t currentStart, currentEnd, endBlock;
4894	int extentsRemoved = 0;
4895
4896	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4897		KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
4898
4899	endBlock = startBlock + blockCount;
4900
4901	lck_spin_lock(&hfsmp->vcbFreeExtLock);
4902
4903	/*
4904	 * Iterate over all of the extents in the free extent cache, removing or
4905	 * updating any entries that overlap with the input extent.
4906	 */
4907	for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4908		currentStart = hfsmp->vcbFreeExt[i].startBlock;
4909		currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount;
4910
4911		/*
4912		 * If the current extent is entirely before or entirely after the
4913		 * the extent to be removed, then we keep it as-is.
4914		 */
4915		if (currentEnd <= startBlock || currentStart >= endBlock) {
4916			continue;
4917		}
4918
4919		/*
4920		 * If the extent being removed entirely contains the current extent,
4921		 * then remove the current extent.
4922		 */
4923		if (startBlock <= currentStart && endBlock >= currentEnd) {
4924			remove_free_extent_list(hfsmp, i);
4925
4926			/*
4927			 * We just removed the extent at index i.  The extent at
4928			 * index i+1 just got shifted to index i.  So decrement i
4929			 * to undo the loop's "++i", and the next iteration will
4930			 * examine index i again, which contains the next extent
4931			 * in the list.
4932			 */
4933			--i;
4934			++extentsRemoved;
4935			continue;
4936		}
4937
4938		/*
4939		 * If the extent being removed is strictly "in the middle" of the
4940		 * current extent, then we need to split the current extent into
4941		 * two discontiguous extents (the "head" and "tail").  The good
4942		 * news is that we don't need to examine any other extents in
4943		 * the list.
4944		 */
4945		if (startBlock > currentStart && endBlock < currentEnd) {
4946			remove_free_extent_list(hfsmp, i);
4947			add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
4948			add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
4949			break;
4950		}
4951
4952		/*
4953		 * The only remaining possibility is that the extent to be removed
4954		 * overlaps the start or end (but not both!) of the current extent.
4955		 * So we need to replace the current extent with a shorter one.
4956		 *
4957		 * The only tricky part is that the updated extent might be at a
4958		 * different index than the original extent.  If the updated extent
4959		 * was inserted after the current extent, then we need to re-examine
4960		 * the entry at index i, since it now contains the extent that was
4961		 * previously at index i+1.	 If the updated extent was inserted
4962		 * before or at the same index as the removed extent, then the
4963		 * following extents haven't changed position.
4964		 */
4965		remove_free_extent_list(hfsmp, i);
4966		if (startBlock > currentStart) {
4967			/* Remove the tail of the current extent. */
4968			insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
4969		} else {
4970			/* Remove the head of the current extent. */
4971			insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
4972		}
4973		if (insertedIndex > i) {
4974			--i;	/* Undo the "++i" in the loop, so we examine the entry at index i again. */
4975		}
4976	}
4977
4978	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4979
4980	sanity_check_free_ext(hfsmp, 0);
4981
4982	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4983		KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
4984
4985	return;
4986}
4987
4988
4989/*
4990 * Add an entry to free extent cache after it has been deallocated.
4991 *
4992 * This is a high-level routine.  It will merge overlapping or contiguous
4993 * extents into a single, larger extent.
4994 *
4995 * If the extent provided has blocks beyond current allocLimit, it is
4996 * clipped to allocLimit (so that we won't accidentally find and allocate
4997 * space beyond allocLimit).
4998 *
4999 * Inputs:
5000 *	hfsmp		- mount point structure
5001 *	startBlock	- starting block of the extent to be removed.
5002 *	blockCount	- number of blocks of the extent to be removed.
5003 *
5004 * Returns:
5005 *	true		- if the extent was added successfully to the list
5006 *	false		- if the extent was not added to the list, maybe because
5007 *			  the extent was beyond allocLimit, or is not best
5008 *			  candidate to be put in the cache.
5009 */
5010static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
5011{
5012	Boolean retval = false;
5013	uint32_t endBlock;
5014	uint32_t currentEnd;
5015	uint32_t i;
5016
5017	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5018		KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
5019
5020	/* No need to add extent that is beyond current allocLimit */
5021	if (startBlock >= hfsmp->allocLimit) {
5022		goto out_not_locked;
5023	}
5024
5025	/* If end of the free extent is beyond current allocLimit, clip the extent */
5026	if ((startBlock + blockCount) > hfsmp->allocLimit) {
5027		blockCount = hfsmp->allocLimit - startBlock;
5028	}
5029
5030	lck_spin_lock(&hfsmp->vcbFreeExtLock);
5031
5032	/*
5033	 * Make a pass through the free extent cache, looking for known extents that
5034	 * overlap or are contiguous with the extent to be added.  We'll remove those
5035	 * extents from the cache, and incorporate them into the new extent to be added.
5036	 */
5037	endBlock = startBlock + blockCount;
5038	for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) {
5039		currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount;
5040		if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) {
5041			/* Extent i does not overlap and is not contiguous, so keep it. */
5042			continue;
5043		} else {
5044			/* We need to remove extent i and combine it with the input extent. */
5045			if (hfsmp->vcbFreeExt[i].startBlock < startBlock)
5046				startBlock = hfsmp->vcbFreeExt[i].startBlock;
5047			if (currentEnd > endBlock)
5048				endBlock = currentEnd;
5049
5050			remove_free_extent_list(hfsmp, i);
5051			/*
5052			 * We just removed the extent at index i.  The extent at
5053			 * index i+1 just got shifted to index i.  So decrement i
5054			 * to undo the loop's "++i", and the next iteration will
5055			 * examine index i again, which contains the next extent
5056			 * in the list.
5057			 */
5058			--i;
5059		}
5060	}
5061	add_free_extent_list(hfsmp, startBlock, endBlock - startBlock);
5062
5063	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5064
5065out_not_locked:
5066	sanity_check_free_ext(hfsmp, 0);
5067
5068	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5069		KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
5070
5071	return retval;
5072}
5073
5074/* Debug function to check if the free extent cache is good or not */
5075static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
5076{
5077	u_int32_t i, j;
5078
5079	/* Do not do anything if debug is not on */
5080	if (ALLOC_DEBUG == 0) {
5081		return;
5082	}
5083
5084	lck_spin_lock(&hfsmp->vcbFreeExtLock);
5085
5086	if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents)
5087		panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt);
5088
5089	/*
5090	 * Iterate the Free extent cache and ensure no entries are bogus or refer to
5091	 * allocated blocks.
5092	 */
5093	for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
5094		u_int32_t start, nblocks;
5095
5096		start   = hfsmp->vcbFreeExt[i].startBlock;
5097		nblocks = hfsmp->vcbFreeExt[i].blockCount;
5098
5099		/* Check if any of the blocks in free extent cache are allocated.
5100		 * This should not be enabled always because it might take
5101		 * very long for large extents that get added to the list.
5102		 *
5103		 * We have to drop vcbFreeExtLock while we call hfs_isallocated
5104		 * because it is going to do I/O.  Note that the free extent
5105		 * cache could change.  That's a risk we take when using this
5106		 * debugging code.  (Another alternative would be to try to
5107		 * detect when the free extent cache changed, and perhaps
5108		 * restart if the list changed while we dropped the lock.)
5109		 */
5110		if (check_allocated) {
5111			lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5112			if (hfs_isallocated(hfsmp, start, nblocks)) {
5113				panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
5114						hfsmp, i, start, nblocks);
5115			}
5116			lck_spin_lock(&hfsmp->vcbFreeExtLock);
5117		}
5118
5119		/* Check if any part of the extent is beyond allocLimit */
5120		if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
5121			panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
5122					hfsmp, i, start, nblocks, hfsmp->allocLimit);
5123		}
5124
5125		/* Check if there are any duplicate start blocks */
5126		for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
5127			if (start == hfsmp->vcbFreeExt[j].startBlock) {
5128				panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n",
5129						hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock,
5130						hfsmp->vcbFreeExt[j].blockCount);
5131			}
5132		}
5133
5134		/* Check if the entries are out of order */
5135		if ((i+1) != hfsmp->vcbFreeExtCnt) {
5136			if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
5137				/* sparse devices are sorted by starting block number (ascending) */
5138				if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
5139					panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n",
5140							hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
5141							hfsmp->vcbFreeExt[i+1].blockCount);
5142				}
5143			} else {
5144				/* normally sorted by block count (descending) */
5145				if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
5146					panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n",
5147							hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
5148							hfsmp->vcbFreeExt[i+1].blockCount);
5149				}
5150			}
5151		}
5152	}
5153	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5154}
5155
5156