1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29	File:		VolumeAllocation.c
30
31	Contains:	Routines for accessing and modifying the volume bitmap.
32
33	Version:	HFS Plus 1.0
34
35	Copyright:	� 1996-2009 by Apple Computer, Inc., all rights reserved.
36
37*/
38
39/*
40Public routines:
41	BlockAllocate
42					Allocate space on a volume.  Can allocate space contiguously.
43					If not contiguous, then allocation may be less than what was
44					asked for.  Returns the starting block number, and number of
45					blocks.  (Will only do a single extent???)
46	BlockDeallocate
47					Deallocate a contiguous run of allocation blocks.
48
49	BlockMarkAllocated
50					Exported wrapper to mark blocks as in-use.  This will correctly determine
51					whether or not the red-black tree is enabled and call the appropriate function
52					if applicable.
53	BlockMarkFree
54					Exported wrapper to mark blocks as freed.  This will correctly determine whether or
55					not the red-black tree is enabled and call the appropriate function if applicable.
56
57
58	ResetVCBFreeExtCache
59					Since the red-black tree obviates the need to maintain the free extent cache, we do
60					not update it if the tree is also live.  As a result, if we ever need to destroy the trees
61					we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
62					bitmap scanning allocator.
63					We also reset and disable the free extent cache when volume resizing is
64					in flight.
65
66	UpdateAllocLimit
67					Adjusts the AllocLimit field in the hfs mount point.  This is used when we need to prevent
68					allocations from occupying space in the region we are modifying during a filesystem resize.
69					At other times, it should be consistent with the total number of allocation blocks in the
70					filesystem.  It is also used to shrink or grow the number of blocks that the red-black tree should
71					know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
72					number of items in the tree that we can allocate from.
73
74	ScanUnmapBlocks
75					Traverse the entire allocation bitmap.  Potentially issue DKIOCUNMAPs to the device as it
76					tracks unallocated ranges when iterating the volume bitmap.  Additionally, build up the in-core
77					summary table of the allocation bitmap.
78
79Internal routines:
80	BlockMarkFreeInternal
81					Mark a contiguous range of blocks as free.  The corresponding
82					bits in the volume bitmap will be cleared.  This will actually do the work
83					of modifying the bitmap for us.
84
85	BlockMarkAllocatedInternal
86					Mark a contiguous range of blocks as allocated.  The cor-
87					responding bits in the volume bitmap are set.  Also tests to see
88					if any of the blocks were previously unallocated.
89	BlockFindContiguous
90					Find a contiguous range of blocks of a given size.  The caller
91					specifies where to begin the search (by block number).  The
92					block number of the first block in the range is returned.  This is only
93					called by the bitmap scanning logic as the red-black tree should be able
94					to do this internally by searching its tree.
95	BlockAllocateAny
96					Find and allocate a contiguous range of blocks up to a given size.  The
97					first range of contiguous free blocks found are allocated, even if there
98					are fewer blocks than requested (and even if a contiguous range of blocks
99					of the given size exists elsewhere).
100	BlockAllocateAnyBitmap
101					Finds a range of blocks per the above requirements without using the
102					Allocation RB Tree.  This relies on the bitmap-scanning logic in order to find
103					any valid range of free space needed.
104	BlockAllocateContig
105					Find and allocate a contiguous range of blocks of a given size.  If
106					a contiguous range of free blocks of the given size isn't found, then
107					the allocation fails (i.e. it is "all or nothing").
108	BlockAllocateKnown
109					Try to allocate space from known free space in the volume's
110					free extent cache.
111	ReadBitmapBlock
112					Given an allocation block number, read the bitmap block that
113					contains that allocation block into a caller-supplied buffer.
114
115	ReleaseBitmapBlock
116					Release a bitmap block back into the buffer cache.
117
118	ReadBitmapRange
119					Given an allocation block number, read a range of bitmap that
120					must begin at that allocation block into a caller supplied buffer.
121
122	ReleaseBitmapRange
123					Release and invalidate a buf_t corresponding to the bitmap
124					back into the UBC in order to prevent coherency issues.
125
126	remove_free_extent_cache
127					Remove an extent from the free extent cache.  Handles overlaps
128					with multiple extents in the cache, and handles splitting an
129					extent in the cache if the extent to be removed is in the middle
130					of a cached extent.
131
132	add_free_extent_cache
133					Add an extent to the free extent cache.  It will merge the
134					input extent with extents already in the cache.
135	CheckUnmappedBytes
136					Check whether or not the current transaction
137					has allocated blocks that were recently freed. This may have data safety implications.
138
139
140
141Debug/Test Routines
142	hfs_isallocated
143					Test to see if any blocks in a range are allocated.  Journal or
144					allocation file lock must be held.
145
146	hfs_isallocated_scan
147					Test to see if any blocks in a range are allocated.  Releases and
148					invalidates the block used when finished.
149
150Optimization Routines
151	hfs_alloc_scan_block
152					Given a starting allocation block number, figures out which physical block contains that
153					allocation block's bit, and scans it from the starting bit until either the ending bit or
154					the end of the block.  Free space extents are inserted into the appropriate red-black tree.
155
156*/
157
158#include "../../hfs_macos_defs.h"
159
160#include <sys/types.h>
161#include <sys/buf.h>
162#include <sys/systm.h>
163#include <sys/sysctl.h>
164#include <sys/disk.h>
165#include <sys/ubc.h>
166#include <sys/uio.h>
167#include <kern/kalloc.h>
168#include <sys/malloc.h>
169
170/* For VM Page size */
171#include <libkern/libkern.h>
172
173#include "../../hfs.h"
174#include "../../hfs_dbg.h"
175#include "../../hfs_format.h"
176#include "../../hfs_endian.h"
177#include "../../hfs_macos_defs.h"
178#include "../headers/FileMgrInternal.h"
179#include "../../hfs_kdebug.h"
180
181/* Headers for unmap-on-mount support */
182#include <vfs/vfs_journal.h>
183#include <sys/disk.h>
184
185#ifndef CONFIG_HFS_TRIM
186#define CONFIG_HFS_TRIM 0
187#endif
188
189/*
190 * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
191 * KERNEL_DEBUG_CONSTANT events are enabled at runtime.  (They're
192 * disabled by default because there can be a lot of these events,
193 * and we don't want to overwhelm the kernel debug buffer.  If you
194 * want to watch these events in particular, just set the sysctl.)
195 */
196static int hfs_kdebug_allocation = 0;
197SYSCTL_DECL(_vfs_generic);
198SYSCTL_NODE(_vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system");
199SYSCTL_NODE(_vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug");
200SYSCTL_INT(_vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations");
201enum {
202	/*
203	 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
204	 * BlockDeallocate, including the internal BlockAllocateXxx
205	 * routines so we can see how an allocation was satisfied.
206	 *
207	 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
208	 * free extent cache.
209	 *
210	 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
211	 *
212	 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
213	 * or clearing bits, scanning the bitmap).
214	 */
215	HFSDBG_ALLOC_ENABLED		= 1,
216	HFSDBG_EXT_CACHE_ENABLED	= 2,
217	HFSDBG_UNMAP_ENABLED		= 4,
218	HFSDBG_BITMAP_ENABLED		= 8
219};
220
221enum {
222	kBytesPerWord			=	4,
223	kBitsPerByte			=	8,
224	kBitsPerWord			=	32,
225
226	kBitsWithinWordMask		=	kBitsPerWord-1
227};
228
229#define kLowBitInWordMask	0x00000001ul
230#define kHighBitInWordMask	0x80000000ul
231#define kAllBitsSetInWord	0xFFFFFFFFul
232
233#define HFS_MIN_SUMMARY_BLOCKSIZE 4096
234
235#define ALLOC_DEBUG 0
236
237static OSErr ReadBitmapBlock(
238		ExtendedVCB		*vcb,
239		u_int32_t		bit,
240		u_int32_t		**buffer,
241		uintptr_t		*blockRef);
242
243static OSErr ReleaseBitmapBlock(
244		ExtendedVCB		*vcb,
245		uintptr_t		blockRef,
246		Boolean			dirty);
247
248static OSErr BlockAllocateAny(
249		ExtendedVCB		*vcb,
250		u_int32_t		startingBlock,
251		u_int32_t		endingBlock,
252		u_int32_t		maxBlocks,
253		u_int32_t		flags,
254		Boolean			trustSummary,
255		u_int32_t		*actualStartBlock,
256		u_int32_t		*actualNumBlocks);
257
258static OSErr BlockAllocateAnyBitmap(
259		ExtendedVCB		*vcb,
260		u_int32_t		startingBlock,
261		u_int32_t		endingBlock,
262		u_int32_t		maxBlocks,
263		u_int32_t		flags,
264		u_int32_t		*actualStartBlock,
265		u_int32_t		*actualNumBlocks);
266
267static OSErr BlockAllocateContig(
268		ExtendedVCB		*vcb,
269		u_int32_t		startingBlock,
270		u_int32_t		minBlocks,
271		u_int32_t		maxBlocks,
272		u_int32_t		flags,
273		u_int32_t		*actualStartBlock,
274		u_int32_t		*actualNumBlocks);
275
276static OSErr BlockFindContiguous(
277		ExtendedVCB		*vcb,
278		u_int32_t		startingBlock,
279		u_int32_t		endingBlock,
280		u_int32_t		minBlocks,
281		u_int32_t		maxBlocks,
282		Boolean			useMetaZone,
283		Boolean			trustSummary,
284		u_int32_t		*actualStartBlock,
285		u_int32_t		*actualNumBlocks);
286
287static OSErr BlockAllocateKnown(
288		ExtendedVCB		*vcb,
289		u_int32_t		maxBlocks,
290		u_int32_t		*actualStartBlock,
291		u_int32_t		*actualNumBlocks);
292
293static OSErr BlockMarkAllocatedInternal (
294		ExtendedVCB		*vcb,
295		u_int32_t		startingBlock,
296		register u_int32_t	numBlocks);
297
298static OSErr BlockMarkFreeInternal(
299		ExtendedVCB	*vcb,
300		u_int32_t	startingBlock,
301		u_int32_t	numBlocks,
302		Boolean 	do_validate);
303
304
305static OSErr ReadBitmapRange (struct hfsmount *hfsmp, uint32_t offset, uint32_t iosize,
306		uint32_t **buffer, struct buf **blockRef);
307
308static OSErr ReleaseScanBitmapRange( struct buf *bp );
309
310static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset,
311		u_int32_t numBlocks, struct jnl_trim_list *list);
312
313static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list);
314
315static int hfs_alloc_scan_range(struct hfsmount *hfsmp,
316		u_int32_t startbit,
317		u_int32_t *bitToScan,
318		struct jnl_trim_list *list);
319
320static int hfs_scan_range_size (struct hfsmount* hfsmp, uint32_t start, uint32_t *iosize);
321static uint32_t CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recent, uint32_t *next);
322
323/* Bitmap Re-use Detection */
324static inline int extents_overlap (uint32_t start1, uint32_t len1,
325		uint32_t start2, uint32_t len2) {
326	return !( ((start1 + len1) <= start2) || ((start2 + len2) <= start1) );
327}
328
329
330int hfs_isallocated_scan (struct hfsmount *hfsmp,
331		u_int32_t startingBlock,
332		u_int32_t *bp_buf);
333
334/* Summary Table Functions */
335static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse);
336static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t *index);
337static int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
338static int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t *alloc);
339static int hfs_release_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t length);
340static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t *freeblocks);
341static int hfs_rebuild_summary (struct hfsmount *hfsmp);
342
343#if 0
344static int hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
345#endif
346
347/* Used in external mount code to initialize the summary table */
348int hfs_init_summary (struct hfsmount *hfsmp);
349
350#if ALLOC_DEBUG
351void hfs_validate_summary (struct hfsmount *hfsmp);
352#endif
353
354
355/* Functions for manipulating free extent cache */
356static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
357static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
358static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
359
360#if ALLOC_DEBUG
361/*
362 * Validation Routine to verify that the TRIM list maintained by the journal
363 * is in good shape relative to what we think the bitmap should have.  We should
364 * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
365 * we panic.
366 */
367int trim_validate_bitmap (struct hfsmount *hfsmp);
368int trim_validate_bitmap (struct hfsmount *hfsmp) {
369	u_int64_t blockno_offset;
370	u_int64_t numblocks;
371	int i;
372	int count;
373	u_int32_t startblk;
374	u_int32_t blks;
375	int err = 0;
376	uint32_t alloccount = 0;
377
378	if (hfsmp->jnl) {
379		struct journal *jnl = (struct journal*)hfsmp->jnl;
380		if (jnl->active_tr) {
381			struct jnl_trim_list *trim = &(jnl->active_tr->trim);
382			count = trim->extent_count;
383			for (i = 0; i < count; i++) {
384				blockno_offset = trim->extents[i].offset;
385				blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
386				blockno_offset = blockno_offset / hfsmp->blockSize;
387				numblocks = trim->extents[i].length / hfsmp->blockSize;
388
389				startblk = (u_int32_t)blockno_offset;
390				blks = (u_int32_t) numblocks;
391				err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
392
393				if (err == 0 && alloccount != 0) {
394					panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
395				}
396			}
397		}
398	}
399	return 0;
400}
401
402#endif
403
404
405/*
406 ;________________________________________________________________________________
407 ;
408 ; Routine:		hfs_unmap_free_extent
409 ;
410 ; Function:		Make note of a range of allocation blocks that should be
411 ;				unmapped (trimmed).  That is, the given range of blocks no
412 ;				longer have useful content, and the device can unmap the
413 ;				previous contents.  For example, a solid state disk may reuse
414 ;				the underlying storage for other blocks.
415 ;
416 ;				This routine is only supported for journaled volumes.  The extent
417 ;				being freed is passed to the journal code, and the extent will
418 ;				be unmapped after the current transaction is written to disk.
419 ;
420 ; Input Arguments:
421 ;	hfsmp			- The volume containing the allocation blocks.
422 ;	startingBlock	- The first allocation block of the extent being freed.
423 ;	numBlocks		- The number of allocation blocks of the extent being freed.
424 ;________________________________________________________________________________
425 */
426static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
427{
428	u_int64_t offset;
429	u_int64_t length;
430	u_int64_t device_sz;
431	int err = 0;
432
433	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
434		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
435
436	if (ALLOC_DEBUG) {
437		if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
438			panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks);
439		}
440	}
441
442	if (hfsmp->jnl != NULL) {
443		device_sz = hfsmp->hfs_logical_bytes;
444		offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
445		length = (u_int64_t) numBlocks * hfsmp->blockSize;
446
447		/* Validate that the trim is in a valid range of bytes */
448		if ((offset >= device_sz) || ((offset + length) > device_sz)) {
449			printf("hfs_unmap_free_ext: ignoring trim vol=%s @ off %lld len %lld \n", hfsmp->vcbVN, offset, length);
450			err = EINVAL;
451		}
452
453		if (err == 0) {
454			err = journal_trim_add_extent(hfsmp->jnl, offset, length);
455			if (err) {
456				printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent for vol=%s", err, hfsmp->vcbVN);
457			}
458		}
459	}
460
461	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
462		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
463}
464
465/*
466 ;________________________________________________________________________________
467 ;
468 ; Routine:		hfs_track_unmap_blocks
469 ;
470 ; Function:	Make note of a range of allocation blocks that should be
471 ;				unmapped (trimmed).  That is, the given range of blocks no
472 ;				longer have useful content, and the device can unmap the
473 ;				previous contents.  For example, a solid state disk may reuse
474 ;				the underlying storage for other blocks.
475 ;
476 ;				This routine is only supported for journaled volumes.
477 ;
478 ;              *****NOTE*****:
479 ;              This function should *NOT* be used when the volume is fully
480 ;              mounted.  This function is intended to support a bitmap iteration
481 ;              at mount time to fully inform the SSD driver of the state of all blocks
482 ;              at mount time, and assumes that there is no allocation/deallocation
483 ;              interference during its iteration.,
484 ;
485 ; Input Arguments:
486 ;	hfsmp			- The volume containing the allocation blocks.
487 ;	offset          - The first allocation block of the extent being freed.
488 ;	numBlocks		- The number of allocation blocks of the extent being freed.
489 ;  list            - The list of currently tracked trim ranges.
490 ;________________________________________________________________________________
491 */
492static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start,
493		u_int32_t numBlocks, struct jnl_trim_list *list) {
494
495	u_int64_t offset;
496	u_int64_t length;
497	int error = 0;
498
499	if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) {
500		int extent_no = list->extent_count;
501		offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
502		length = (u_int64_t) numBlocks * hfsmp->blockSize;
503
504
505		list->extents[extent_no].offset = offset;
506		list->extents[extent_no].length = length;
507		list->extent_count++;
508		if (list->extent_count == list->allocated_count) {
509			error = hfs_issue_unmap (hfsmp, list);
510		}
511	}
512
513	return error;
514}
515
516/*
517 ;________________________________________________________________________________
518 ;
519 ; Routine:		hfs_issue_unmap
520 ;
521 ; Function:	Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
522 ;
523 ; Input Arguments:
524 ;	hfsmp			- The volume containing the allocation blocks.
525 ;  list            - The list of currently tracked trim ranges.
526 ;________________________________________________________________________________
527 */
528
529static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list) {
530	dk_unmap_t unmap;
531	int error = 0;
532
533	if (list->extent_count > 0) {
534		bzero(&unmap, sizeof(unmap));
535		unmap.extents = list->extents;
536		unmap.extentsCount = list->extent_count;
537
538		/* Issue a TRIM and flush them out */
539		error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
540
541		bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t)));
542		list->extent_count = 0;
543	}
544	return error;
545}
546
547/*
548 ;________________________________________________________________________________
549 ;
550 ; Routine:		hfs_unmap_alloc_extent
551 ;
552 ; Function:		Make note of a range of allocation blocks, some of
553 ;				which may have previously been passed to hfs_unmap_free_extent,
554 ;				is now in use on the volume.  The given blocks will be removed
555 ;				from any pending DKIOCUNMAP.
556 ;
557 ; Input Arguments:
558 ;	hfsmp			- The volume containing the allocation blocks.
559 ;	startingBlock	- The first allocation block of the extent being allocated.
560 ;	numBlocks		- The number of allocation blocks being allocated.
561 ;________________________________________________________________________________
562 */
563
564static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
565{
566	u_int64_t offset;
567	u_int64_t length;
568	int err;
569
570	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
571		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
572
573	if (hfsmp->jnl != NULL) {
574		offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
575		length = (u_int64_t) numBlocks * hfsmp->blockSize;
576
577		err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
578		if (err) {
579			printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent for vol=%s", err, hfsmp->vcbVN);
580		}
581	}
582
583	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
584		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
585}
586
587
588/*
589;________________________________________________________________________________
590;
591; Routine:		hfs_trim_callback
592;
593; Function:		This function is called when a transaction that freed extents
594;				(via hfs_unmap_free_extent/journal_trim_add_extent) has been
595;				written to the on-disk journal.  This routine will add those
596;				extents to the free extent cache so that they can be reused.
597;
598;				CAUTION: This routine is called while the journal's trim lock
599;				is held shared, so that no other thread can reuse any portion
600;				of those extents.  We must be very careful about which locks
601;				we take from within this callback, to avoid deadlock.  The
602;				call to add_free_extent_cache will end up taking the cache's
603;				lock (just long enough to add these extents to the cache).
604;
605;				CAUTION: If the journal becomes invalid (eg., due to an I/O
606;				error when trying to write to the journal), this callback
607;				will stop getting called, even if extents got freed before
608;				the journal became invalid!
609;
610; Input Arguments:
611;	arg				- The hfsmount of the volume containing the extents.
612;	extent_count	- The number of extents freed in the transaction.
613;	extents			- An array of extents (byte ranges) that were freed.
614;________________________________________________________________________________
615*/
616
617__private_extern__ void
618hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
619{
620	uint32_t i;
621	uint32_t startBlock, numBlocks;
622	struct hfsmount *hfsmp = arg;
623
624	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
625		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
626
627	for (i=0; i<extent_count; ++i) {
628		/* Convert the byte range in *extents back to a range of allocation blocks. */
629		startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
630		numBlocks = extents[i].length / hfsmp->blockSize;
631		(void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
632	}
633
634	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
635		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
636}
637
638
639/*
640   ;________________________________________________________________________________
641   ;
642   ; Routine:		CheckUnmappedBytes
643   ;
644   ; Function:	From the specified inputs, determine if the extent in question overlaps
645   ;				space that was recently freed, where the recently freed space may still be
646   ;				lingering in an uncommitted journal transaction.  This may have data safety
647   ;				implications.  The intended use is to decide whether or not to force a journal flush
648   ;				before allowing file data I/O to be issued.  If we did not do this
649   ;				then it would be possible to issue the file I/O ahead of the
650   ;				journal, resulting in data being overwritten if the transaction either
651   ;				is not committed or cannot be replayed.
652   ;
653   ;		NOTE: This function assumes that the journal and catalog/extent locks are held.
654   ;
655   ; Input Arguments:
656   ;	hfsmp			- The volume containing the allocation blocks.
657   ;	foffset			- start of the extent in question (in allocation blocks)
658   ;	numbytes		- number of blocks in the extent.
659   ;  recently_freed:	- output pointer containing whether or not the blocks were freed recently
660   ;  overlap_end 		- end of the overlap between the argument extent and the trim list (in allocation blocks)
661   ;
662   ; Output:
663   ;
664   ; 		Returns 0 if we could determine extent validity for this (or a previous transaction)
665   ; 		Returns errno if there was an error
666   ;
667   ;		If returned 0, then recently freed will contain a boolean that indicates
668   ;		that it was recently freed.
669   ;________________________________________________________________________________
670 */
671
672u_int32_t
673CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recently_freed, uint32_t *overlap_end) {
674	uint64_t device_offset;
675	uint64_t numbytes;
676	uint32_t err = 0;
677	uint64_t lba_overlap_end;
678
679	if (hfsmp->jnl != NULL) {
680		/*
681		 * Convert the allocation block # and the number of blocks into device-relative
682		 * offsets so that they can be compared using the TRIM list.
683		 */
684		uint64_t device_sz = hfsmp->hfs_logical_bytes;
685		device_offset = blockno * ((uint64_t)hfsmp->blockSize);
686		device_offset += hfsmp->hfsPlusIOPosOffset;
687		numbytes = (((uint64_t)hfsmp->blockSize) * numblocks);
688
689		/*
690		 * Since we check that the device_offset isn't too large, it's safe to subtract it
691		 * from the size in the second check.
692		 */
693		if ((device_offset >= device_sz) || (numbytes > (device_sz - device_offset))) {
694			return EINVAL;
695		}
696
697		/* Ask the journal if this extent overlaps with any pending TRIMs */
698		if (journal_trim_extent_overlap (hfsmp->jnl, device_offset, numbytes, &lba_overlap_end)) {
699			*recently_freed = 1;
700
701			/* Convert lba_overlap_end back into allocation blocks */
702			uint64_t end_offset = lba_overlap_end - hfsmp->hfsPlusIOPosOffset;
703			end_offset = end_offset / ((uint64_t) hfsmp->blockSize);
704			*overlap_end = (uint32_t) end_offset;
705		}
706		else {
707			*recently_freed = 0;
708		}
709		err = 0;
710	}
711	else {
712		/* There may not be a journal.  In that case, always return success.  */
713		*recently_freed = 0;
714	}
715	return err;
716
717}
718
719
720/*
721 ;________________________________________________________________________________
722 ;
723 ; Routine:		ScanUnmapBlocks
724 ;
725 ; Function:	Traverse the bitmap, and potentially issue DKIOCUNMAPs to the underlying
726 ;				device as needed so that the underlying disk device is as
727 ;				up-to-date as possible with which blocks are unmapped.
728 ;				Additionally build up the summary table as needed.
729 ;
730 ; Input Arguments:
731 ;	hfsmp			- The volume containing the allocation blocks.
732 ;________________________________________________________________________________
733 */
734
735__private_extern__
736u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp)
737{
738	u_int32_t blocks_scanned = 0;
739	int error = 0;
740	struct jnl_trim_list trimlist;
741
742	/*
743	 *struct jnl_trim_list {
744	 uint32_t    allocated_count;
745	 uint32_t    extent_count;
746	 dk_extent_t *extents;
747	 };
748	 */
749
750	/*
751	 * The scanning itself here is not tied to the presence of CONFIG_HFS_TRIM
752	 * which is now enabled for most architectures.  Instead, any trim related
753	 * work should be tied to whether the underlying storage media supports
754	 * UNMAP, as any solid state device would on desktop or embedded.
755	 *
756	 * We do this because we may want to scan the full bitmap on desktop
757	 * for spinning media for the purposes of building up the
758	 * summary table.
759	 *
760	 * We also avoid sending TRIMs down to the underlying media if the mount is read-only.
761	 */
762
763	if ((hfsmp->hfs_flags & HFS_UNMAP) &&
764			((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
765		/* If the underlying device supports unmap and the mount is read-write, initialize */
766		int alloc_count = PAGE_SIZE / sizeof(dk_extent_t);
767		void *extents = kalloc (alloc_count * sizeof(dk_extent_t));
768		if (extents == NULL) {
769			return ENOMEM;
770		}
771		bzero (&trimlist, sizeof(trimlist));
772		trimlist.extents = (dk_extent_t*)extents;
773		trimlist.allocated_count = alloc_count;
774		trimlist.extent_count = 0;
775	}
776
777	while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){
778
779		error = hfs_alloc_scan_range (hfsmp, blocks_scanned, &blocks_scanned, &trimlist);
780
781		if (error) {
782			printf("HFS: bitmap scan range error: %d on vol=%s\n", error, hfsmp->vcbVN);
783			break;
784		}
785	}
786
787	if ((hfsmp->hfs_flags & HFS_UNMAP) &&
788			((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
789		if (error == 0) {
790			hfs_issue_unmap(hfsmp, &trimlist);
791		}
792		if (trimlist.extents) {
793			kfree (trimlist.extents, (trimlist.allocated_count * sizeof(dk_extent_t)));
794		}
795	}
796
797	/*
798	 * This is in an #if block because hfs_validate_summary prototype and function body
799	 * will only show up if ALLOC_DEBUG is on, to save wired memory ever so slightly.
800	 */
801#if ALLOC_DEBUG
802	sanity_check_free_ext(hfsmp, 1);
803	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
804		/* Validate the summary table too! */
805		hfs_validate_summary(hfsmp);
806		printf("HFS: Summary validation complete on %s\n", hfsmp->vcbVN);
807	}
808#endif
809
810	return error;
811}
812
813/*
814 ;________________________________________________________________________________
815 ;
816 ; Routine:	   BlockAllocate
817 ;
818 ; Function:   Allocate space on a volume.	If contiguous allocation is requested,
819 ;			   at least the requested number of bytes will be allocated or an
820 ;			   error will be returned.	If contiguous allocation is not forced,
821 ;			   the space will be allocated with the first largest extent available
822 ;			   at the requested starting allocation block.  If there is not enough
823 ;			   room there, a block allocation of less than the requested size will be
824 ;			   allocated.
825 ;
826 ;			   If the requested starting block is 0 (for new file allocations),
827 ;			   the volume's allocation block pointer will be used as a starting
828 ;			   point.
829 ;
830 ; Input Arguments:
831 ;	 vcb			 - Pointer to ExtendedVCB for the volume to allocate space on
832 ;	 fcb			 - Pointer to FCB for the file for which storage is being allocated
833 ;	 startingBlock	 - Preferred starting allocation block, 0 = no preference
834 ;	 minBlocks	 	 - Number of blocks requested.	If the allocation is non-contiguous,
835 ;					   less than this may actually be allocated
836 ;	 maxBlocks	 	 - The maximum number of blocks to allocate.  If there is additional free
837 ;					   space after bytesRequested, then up to maxBlocks bytes should really
838 ;					   be allocated.  (Used by ExtendFileC to round up allocations to a multiple
839 ;					   of the file's clump size.)
840 ;	 flags           - Flags to specify options like contiguous, use metadata zone,
841 ;					   skip free block check, etc.
842 ;
843 ; Output:
844 ;	 (result)		 - Error code, zero for successful allocation
845 ;	 *startBlock	 - Actual starting allocation block
846 ;	 *actualBlccks	 - Actual number of allocation blocks allocated
847 ;
848 ; Side effects:
849 ;	 The volume bitmap is read and updated; the volume bitmap cache may be changed.
850 ;________________________________________________________________________________
851 */
852OSErr BlockAllocate (
853		ExtendedVCB		*vcb,				/* which volume to allocate space on */
854		u_int32_t		startingBlock,		/* preferred starting block, or 0 for no preference */
855		u_int32_t		minBlocks,		/* desired number of blocks to allocate */
856		u_int32_t		maxBlocks,		/* maximum number of blocks to allocate */
857		u_int32_t		flags,			/* option flags */
858		u_int32_t		*actualStartBlock,	/* actual first block of allocation */
859		u_int32_t		*actualNumBlocks)
860/*
861 *  actualNumBlocks is the number of blocks actually allocated;
862 * if forceContiguous was zero, then this may represent fewer than minBlocks
863 */
864{
865	u_int32_t  freeBlocks;
866	OSErr			err;
867	Boolean			updateAllocPtr = false;		//	true if nextAllocation needs to be updated
868	struct hfsmount	*hfsmp;
869	Boolean useMetaZone;
870	Boolean forceContiguous;
871	Boolean forceFlush;
872
873	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
874		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
875
876	if (flags & HFS_ALLOC_FORCECONTIG) {
877		forceContiguous = true;
878	} else {
879		forceContiguous = false;
880	}
881
882	if (flags & HFS_ALLOC_METAZONE) {
883		useMetaZone = true;
884	} else {
885		useMetaZone = false;
886	}
887
888	if (flags & HFS_ALLOC_FLUSHTXN) {
889		forceFlush = true;
890	}
891	else {
892		forceFlush = false;
893	}
894
895
896	//
897	//	Initialize outputs in case we get an error
898	//
899	*actualStartBlock = 0;
900	*actualNumBlocks = 0;
901	hfsmp = VCBTOHFS (vcb);
902	freeBlocks = hfs_freeblks(hfsmp, 0);
903
904
905	/* Skip free block check if blocks are being allocated for relocating
906	 * data during truncating a volume.
907	 *
908	 * During hfs_truncatefs(), the volume free block count is updated
909	 * before relocating data to reflect the total number of free blocks
910	 * that will exist on the volume after resize is successful.  This
911	 * means that we have reserved allocation blocks required for relocating
912	 * the data and hence there is no need to check the free blocks.
913	 * It will also prevent resize failure when the number of blocks in
914	 * an extent being relocated is more than the free blocks that will
915	 * exist after the volume is resized.
916	 */
917	if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
918		//	If the disk is already full, don't bother.
919		if (freeBlocks == 0) {
920			err = dskFulErr;
921			goto Exit;
922		}
923		if (forceContiguous && freeBlocks < minBlocks) {
924			err = dskFulErr;
925			goto Exit;
926		}
927
928		/*
929		 * Clip if necessary so we don't over-subscribe the free blocks.
930		 */
931		if (minBlocks > freeBlocks) {
932			minBlocks = freeBlocks;
933		}
934		if (maxBlocks > freeBlocks) {
935			maxBlocks = freeBlocks;
936		}
937	}
938
939	//
940	//	If caller didn't specify a starting block number, then use the volume's
941	//	next block to allocate from.
942	//
943	if (startingBlock == 0) {
944		hfs_lock_mount (hfsmp);
945
946		/* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
947		if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
948			startingBlock = vcb->sparseAllocation;
949		}
950		else {
951			startingBlock = vcb->nextAllocation;
952		}
953		hfs_unlock_mount(hfsmp);
954		updateAllocPtr = true;
955	}
956
957
958	if (startingBlock >= vcb->allocLimit) {
959		startingBlock = 0; /* overflow so start at beginning */
960	}
961
962	//
963	//	If the request must be contiguous, then find a sequence of free blocks
964	//	that is long enough.  Otherwise, find the first free block.
965	//
966	if (forceContiguous) {
967		err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks,
968				flags, actualStartBlock, actualNumBlocks);
969		/*
970		 * If we allocated from a new position then also update the roving allocator.
971		 * This will keep the roving allocation pointer up-to-date even
972		 * if we are using the new R/B tree allocator, since
973		 * it doesn't matter to us here, how the underlying allocator found
974		 * the block to vend out.
975		 */
976		if ((err == noErr) &&
977				(*actualStartBlock > startingBlock) &&
978				((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
979				 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
980			updateAllocPtr = true;
981		}
982	} else {
983		/*
984		 * Scan the bitmap once, gather the N largest free extents, then
985		 * allocate from these largest extents.  Repeat as needed until
986		 * we get all the space we needed.  We could probably build up
987		 * that list when the higher level caller tried (and failed) a
988		 * contiguous allocation first.
989		 *
990		 * Note that the free-extent cache will be cease to be updated if
991		 * we are using the red-black tree for allocations.  If we jettison
992		 * the tree, then we will reset the free-extent cache and start over.
993		 */
994
995		/* Disable HFS_ALLOC_FLUSHTXN if needed */
996		if (forceFlush) {
997			flags &= ~HFS_ALLOC_FLUSHTXN;
998		}
999
1000		/*
1001		 * BlockAllocateKnown only examines the free extent cache; anything in there will
1002		 * have been committed to stable storage already.
1003		 */
1004		err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks);
1005
1006		/* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */
1007
1008		if (err == dskFulErr) {
1009			/*
1010			 * Now we have to do a bigger scan.  Start at startingBlock and go up until the
1011			 * allocation limit.  We 'trust' the summary bitmap in this call, if it tells us
1012			 * that it could not find any free space.
1013			 */
1014			err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
1015					maxBlocks, flags, true,
1016					actualStartBlock, actualNumBlocks);
1017		}
1018		if (err == dskFulErr) {
1019			/*
1020			 * Vary the behavior here if the summary table is on or off.
1021			 * If it is on, then we don't trust it it if we get into this case and
1022			 * basically do a full scan for maximum coverage.
1023			 * If it is off, then we trust the above and go up until the startingBlock.
1024			 */
1025			if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1026				err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks,
1027						flags, false,
1028						actualStartBlock, actualNumBlocks);
1029			}
1030			else {
1031				err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks,
1032						flags, false,
1033						actualStartBlock, actualNumBlocks);
1034			}
1035
1036			/*
1037		     * Last Resort: Find/use blocks that may require a journal flush.
1038	 		 */
1039			if (err == dskFulErr && forceFlush) {
1040				flags |= HFS_ALLOC_FLUSHTXN;
1041				err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks,
1042						flags, false,
1043						actualStartBlock, actualNumBlocks);
1044			}
1045		}
1046	}
1047
1048Exit:
1049	if ((hfsmp->hfs_flags & HFS_CS) && *actualNumBlocks != 0) {
1050		errno_t ec;
1051		_dk_cs_map_t cm;
1052		uint64_t mapped_blocks;
1053
1054		cm.cm_extent.offset = (uint64_t)*actualStartBlock * hfsmp->blockSize + hfsmp->hfsPlusIOPosOffset;
1055		cm.cm_extent.length = (uint64_t)*actualNumBlocks * hfsmp->blockSize;
1056		cm.cm_bytes_mapped = 0;
1057		ec = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP, (caddr_t)&cm, 0, vfs_context_current());
1058		if (ec != 0 && ec != ENOSPC) {
1059			printf ("VNOP_IOCTL(_DKIOCCSMAP) returned an unexpected error code=%d\n", ec);
1060			err = ec;
1061			goto Exit_CS;
1062		}
1063		mapped_blocks = cm.cm_bytes_mapped / hfsmp->blockSize;
1064		/* CoreStorage returned more blocks than requested */
1065		if (mapped_blocks > *actualNumBlocks) {
1066			printf ("VNOP_IOCTL(_DKIOCCSMAP) mapped too many blocks, mapped=%lld, actual=%d\n",
1067					mapped_blocks, *actualNumBlocks);
1068		}
1069		if (*actualNumBlocks > mapped_blocks) {
1070			if (forceContiguous && mapped_blocks < minBlocks) {
1071				mapped_blocks = 0;
1072			}
1073		}
1074		uint64_t numBlocksToFree = *actualNumBlocks - mapped_blocks;
1075		uint64_t firstBlockToFree = *actualStartBlock + mapped_blocks;
1076		if (numBlocksToFree > 0) {
1077			err = BlockDeallocate(vcb, firstBlockToFree, numBlocksToFree, flags);
1078			if (err != noErr) {
1079				printf ("BlockDeallocate failed (err=%d)\n", err);
1080				goto Exit_CS;
1081			}
1082		}
1083		*actualNumBlocks = mapped_blocks;
1084		if (*actualNumBlocks == 0 && err == noErr) {
1085			err = dskFulErr;
1086		}
1087	}
1088Exit_CS:
1089	// if we actually allocated something then go update the
1090	// various bits of state that we maintain regardless of
1091	// whether there was an error (i.e. partial allocations
1092	// still need to update things like the free block count).
1093	//
1094	if (*actualNumBlocks != 0) {
1095		//
1096		//	If we used the volume's roving allocation pointer, then we need to update it.
1097		//	Adding in the length of the current allocation might reduce the next allocate
1098		//	call by avoiding a re-scan of the already allocated space.  However, the clump
1099		//	just allocated can quite conceivably end up being truncated or released when
1100		//	the file is closed or its EOF changed.  Leaving the allocation pointer at the
1101		//	start of the last allocation will avoid unnecessary fragmentation in this case.
1102		//
1103		hfs_lock_mount (hfsmp);
1104
1105		lck_spin_lock(&hfsmp->vcbFreeExtLock);
1106		if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) {
1107			vcb->sparseAllocation = *actualStartBlock;
1108		}
1109		lck_spin_unlock(&hfsmp->vcbFreeExtLock);
1110		if (*actualNumBlocks < vcb->hfs_freed_block_count) {
1111			vcb->hfs_freed_block_count -= *actualNumBlocks;
1112		} else {
1113			vcb->hfs_freed_block_count = 0;
1114		}
1115
1116		if (updateAllocPtr &&
1117				((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
1118				 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
1119			HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock);
1120		}
1121
1122		(void) remove_free_extent_cache(hfsmp, *actualStartBlock, *actualNumBlocks);
1123
1124		/*
1125		 * Update the number of free blocks on the volume
1126		 *
1127		 * Skip updating the free blocks count if the block are
1128		 * being allocated to relocate data as part of hfs_truncatefs()
1129		 */
1130		if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1131			vcb->freeBlocks -= *actualNumBlocks;
1132		}
1133		MarkVCBDirty(vcb);
1134		hfs_unlock_mount(hfsmp);
1135
1136		hfs_generate_volume_notifications(VCBTOHFS(vcb));
1137	}
1138
1139	if (ALLOC_DEBUG) {
1140		if (err == noErr) {
1141			if (*actualStartBlock >= hfsmp->totalBlocks) {
1142				panic ("BlockAllocate: vending invalid blocks!");
1143			}
1144			if (*actualStartBlock >= hfsmp->allocLimit) {
1145				panic ("BlockAllocate: vending block past allocLimit!");
1146			}
1147
1148			if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->totalBlocks) {
1149				panic ("BlockAllocate: vending too many invalid blocks!");
1150			}
1151
1152			if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->allocLimit) {
1153				panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
1154			}
1155		}
1156	}
1157
1158	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1159		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
1160
1161	return err;
1162}
1163
1164
1165/*
1166;________________________________________________________________________________
1167;
1168; Routine:	   BlockDeallocate
1169;
1170; Function:    Update the bitmap to deallocate a run of disk allocation blocks
1171;
1172; Input Arguments:
1173;	 vcb		- Pointer to ExtendedVCB for the volume to free space on
1174;	 firstBlock	- First allocation block to be freed
1175;	 numBlocks	- Number of allocation blocks to free up (must be > 0!)
1176;
1177; Output:
1178;	 (result)	- Result code
1179;
1180; Side effects:
1181;	 The volume bitmap is read and updated; the volume bitmap cache may be changed.
1182;	 The Allocator's red-black trees may also be modified as a result.
1183;________________________________________________________________________________
1184*/
1185
1186OSErr BlockDeallocate (
1187		ExtendedVCB		*vcb,			//	Which volume to deallocate space on
1188		u_int32_t		firstBlock,		//	First block in range to deallocate
1189		u_int32_t		numBlocks, 		//	Number of contiguous blocks to deallocate
1190		u_int32_t 		flags)
1191{
1192	OSErr			err;
1193	struct hfsmount *hfsmp;
1194	hfsmp = VCBTOHFS(vcb);
1195
1196	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1197		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);
1198
1199	//
1200	//	If no blocks to deallocate, then exit early
1201	//
1202	if (numBlocks == 0) {
1203		err = noErr;
1204		goto Exit;
1205	}
1206
1207
1208	if (ALLOC_DEBUG) {
1209		if (firstBlock >= hfsmp->totalBlocks) {
1210			panic ("BlockDeallocate: freeing invalid blocks!");
1211		}
1212
1213		if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {
1214			panic ("BlockDeallocate: freeing too many invalid blocks!");
1215		}
1216	}
1217
1218	/*
1219	 * If we're using the summary bitmap, then try to mark the bits
1220	 * as potentially usable/free before actually deallocating them.
1221	 * It is better to be slightly speculative here for correctness.
1222	 */
1223
1224	(void) hfs_release_summary (hfsmp, firstBlock, numBlocks);
1225
1226	err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
1227
1228	if (err) {
1229		goto Exit;
1230	}
1231
1232	//
1233	//	Update the volume's free block count, and mark the VCB as dirty.
1234	//
1235	hfs_lock_mount(hfsmp);
1236	/*
1237	 * Do not update the free block count.  This flags is specified
1238	 * when a volume is being truncated.
1239	 */
1240	if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1241		vcb->freeBlocks += numBlocks;
1242	}
1243
1244	vcb->hfs_freed_block_count += numBlocks;
1245
1246	if (vcb->nextAllocation == (firstBlock + numBlocks)) {
1247		HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
1248	}
1249
1250	if (hfsmp->jnl == NULL) {
1251		/*
1252		 * In the journal case, we'll add the free extent once the journal
1253		 * calls us back to tell us it wrote the transaction to disk.
1254		 */
1255		(void) add_free_extent_cache(vcb, firstBlock, numBlocks);
1256
1257		/*
1258		 * If the journal case, we'll only update sparseAllocation once the
1259		 * free extent cache becomes empty (when we remove the last entry
1260		 * from the cache).  Skipping it here means we're less likely to
1261		 * find a recently freed extent via the bitmap before it gets added
1262		 * to the free extent cache.
1263		 */
1264		if (firstBlock < vcb->sparseAllocation) {
1265			vcb->sparseAllocation = firstBlock;
1266		}
1267	}
1268
1269	MarkVCBDirty(vcb);
1270	hfs_unlock_mount(hfsmp);
1271
1272	hfs_generate_volume_notifications(VCBTOHFS(vcb));
1273Exit:
1274
1275	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1276		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);
1277
1278	return err;
1279}
1280
1281
1282u_int8_t freebitcount[16] = {
1283	4, 3, 3, 2, 3, 2, 2, 1,  /* 0 1 2 3 4 5 6 7 */
1284	3, 2, 2, 1, 2, 1, 1, 0,  /* 8 9 A B C D E F */
1285};
1286
1287u_int32_t
1288MetaZoneFreeBlocks(ExtendedVCB *vcb)
1289{
1290	u_int32_t freeblocks;
1291	u_int32_t *currCache;
1292	uintptr_t blockRef;
1293	u_int32_t bit;
1294	u_int32_t lastbit;
1295	int bytesleft;
1296	int bytesperblock;
1297	u_int8_t byte;
1298	u_int8_t *buffer;
1299
1300	blockRef = 0;
1301	bytesleft = freeblocks = 0;
1302	buffer = NULL;
1303	bit = VCBTOHFS(vcb)->hfs_metazone_start;
1304	if (bit == 1)
1305		bit = 0;
1306
1307	lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
1308	bytesperblock = vcb->vcbVBMIOSize;
1309
1310	/*
1311	 *  Count all the bits from bit to lastbit.
1312	 */
1313	while (bit < lastbit) {
1314		/*
1315		 *  Get next bitmap block.
1316		 */
1317		if (bytesleft == 0) {
1318			if (blockRef) {
1319				(void) ReleaseBitmapBlock(vcb, blockRef, false);
1320				blockRef = 0;
1321			}
1322			if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef) != 0) {
1323				return (0);
1324			}
1325			buffer = (u_int8_t *)currCache;
1326			bytesleft = bytesperblock;
1327		}
1328		byte = *buffer++;
1329		freeblocks += freebitcount[byte & 0x0F];
1330		freeblocks += freebitcount[(byte >> 4) & 0x0F];
1331		bit += kBitsPerByte;
1332		--bytesleft;
1333	}
1334	if (blockRef)
1335		(void) ReleaseBitmapBlock(vcb, blockRef, false);
1336
1337	return (freeblocks);
1338}
1339
1340
1341/*
1342 * Obtain the next allocation block (bit) that's
1343 * outside the metadata allocation zone.
1344 */
1345static u_int32_t NextBitmapBlock(
1346		ExtendedVCB		*vcb,
1347		u_int32_t		bit)
1348{
1349	struct  hfsmount *hfsmp = VCBTOHFS(vcb);
1350
1351	if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
1352		return (bit);
1353	/*
1354	 * Skip over metadata allocation zone.
1355	 */
1356	if ((bit >= hfsmp->hfs_metazone_start) &&
1357			(bit <= hfsmp->hfs_metazone_end)) {
1358		bit = hfsmp->hfs_metazone_end + 1;
1359	}
1360	return (bit);
1361}
1362
1363
1364/*
1365;_______________________________________________________________________
1366;
1367; Routine:	ReadBitmapBlock
1368;
1369; Function:	Read in a bitmap block corresponding to a given allocation
1370;			block (bit).  Return a pointer to the bitmap block.
1371;
1372; Inputs:
1373;	vcb			--	Pointer to ExtendedVCB
1374;	bit			--	Allocation block whose bitmap block is desired
1375;
1376; Outputs:
1377;	buffer		--	Pointer to bitmap block corresonding to "block"
1378;	blockRef
1379;_______________________________________________________________________
1380*/
1381static OSErr ReadBitmapBlock(
1382		ExtendedVCB		*vcb,
1383		u_int32_t		bit,
1384		u_int32_t		**buffer,
1385		uintptr_t		*blockRef)
1386{
1387	OSErr			err;
1388	struct buf *bp = NULL;
1389	struct vnode *vp = NULL;
1390	daddr64_t block;
1391	u_int32_t blockSize;
1392
1393	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1394		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);
1395
1396	/*
1397	 * volume bitmap blocks are protected by the allocation file lock
1398	 */
1399	REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1400
1401	blockSize = (u_int32_t)vcb->vcbVBMIOSize;
1402	block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
1403
1404	/* HFS+ / HFSX */
1405	if (vcb->vcbSigWord != kHFSSigWord) {
1406		vp = vcb->hfs_allocation_vp;	/* use allocation file vnode */
1407	}
1408#if CONFIG_HFS_STD
1409	else {
1410		/* HFS Standard */
1411		vp = VCBTOHFS(vcb)->hfs_devvp;	/* use device I/O vnode */
1412		block += vcb->vcbVBMSt;			/* map to physical block */
1413	}
1414#endif
1415
1416	err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
1417
1418	if (bp) {
1419		if (err) {
1420			buf_brelse(bp);
1421			*blockRef = 0;
1422			*buffer = NULL;
1423		} else {
1424			*blockRef = (uintptr_t)bp;
1425			*buffer = (u_int32_t *)buf_dataptr(bp);
1426		}
1427	}
1428
1429	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1430		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);
1431
1432	return err;
1433}
1434
1435
1436/*
1437;_______________________________________________________________________
1438;
1439; Routine:	ReadBitmapRange
1440;
1441; Function:	Read in a range of the bitmap starting at the given offset.
1442;			Use the supplied size to determine the amount of I/O to generate
1443;			against the bitmap file. Return a pointer to the bitmap block.
1444;
1445; Inputs:
1446;	hfsmp		--	Pointer to hfs mount
1447;	offset		--	byte offset into the bitmap file
1448;	size		--  How much I/O to generate against the bitmap file.
1449;
1450; Outputs:
1451;	buffer		--	Pointer to bitmap block data corresonding to "block"
1452;	blockRef	--  struct 'buf' pointer which MUST be released in a subsequent call.
1453;_______________________________________________________________________
1454*/
1455static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset,
1456		uint32_t iosize, uint32_t **buffer, struct buf **blockRef)
1457{
1458
1459	OSErr			err;
1460	struct buf *bp = NULL;
1461	struct vnode *vp = NULL;
1462	daddr64_t block;
1463
1464	/* This function isn't supported for HFS standard */
1465	if (hfsmp->vcbSigWord != kHFSPlusSigWord) {
1466		return EINVAL;
1467	}
1468
1469	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1470		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_START, offset, iosize, 0, 0, 0);
1471	}
1472
1473	/*
1474	 * volume bitmap blocks are protected by the allocation file lock
1475	 */
1476	REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1477
1478	vp = hfsmp->hfs_allocation_vp;	/* use allocation file vnode */
1479
1480	/*
1481	 * The byte offset argument must be converted into bitmap-relative logical
1482	 * block numbers before using it in buf_meta_bread.
1483	 *
1484	 * buf_meta_bread (and the things it calls) will eventually try to
1485	 * reconstruct the byte offset into the file by multiplying the logical
1486	 * block number passed in below by the vcbVBMIOSize field in the mount
1487	 * point.  So we prepare for that by converting the byte offset back into
1488	 * logical blocks in terms of VBMIOSize units.
1489	 *
1490	 * The amount of I/O requested and the byte offset should be computed
1491	 * based on the helper function in the frame that called us, so we can
1492	 * get away with just doing a simple divide here.
1493	 */
1494	block = (daddr64_t)(offset / hfsmp->vcbVBMIOSize);
1495
1496	err = (int) buf_meta_bread(vp, block, iosize, NOCRED, &bp);
1497
1498	if (bp) {
1499		if (err) {
1500			buf_brelse(bp);
1501			*blockRef = 0;
1502			*buffer = NULL;
1503		} else {
1504			*blockRef = bp;
1505			*buffer = (u_int32_t *)buf_dataptr(bp);
1506		}
1507	}
1508
1509	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1510		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_END, err, 0, 0, 0, 0);
1511	}
1512
1513	return err;
1514}
1515
1516
1517/*
1518;_______________________________________________________________________
1519;
1520; Routine:	ReleaseBitmapBlock
1521;
1522; Function:	Relase a bitmap block.
1523;
1524; Inputs:
1525;	vcb
1526;	blockRef
1527;	dirty
1528;_______________________________________________________________________
1529*/
1530static OSErr ReleaseBitmapBlock(
1531		ExtendedVCB		*vcb,
1532		uintptr_t		blockRef,
1533		Boolean			dirty)
1534{
1535	struct buf *bp = (struct buf *)blockRef;
1536
1537	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1538		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);
1539
1540	if (blockRef == 0) {
1541		if (dirty)
1542			panic("hfs: ReleaseBitmapBlock: missing bp");
1543		return (0);
1544	}
1545
1546	if (bp) {
1547		if (dirty) {
1548			// XXXdbg
1549			struct hfsmount *hfsmp = VCBTOHFS(vcb);
1550
1551			if (hfsmp->jnl) {
1552				journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
1553			} else {
1554				buf_bdwrite(bp);
1555			}
1556		} else {
1557			buf_brelse(bp);
1558		}
1559	}
1560
1561	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1562		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);
1563
1564	return (0);
1565}
1566
1567/*
1568 * ReleaseScanBitmapRange
1569 *
1570 * This is used to release struct bufs that were created for use by
1571 * bitmap scanning code.  Because they may be of sizes different than the
1572 * typical runtime manipulation code, we want to force them to be purged out
1573 * of the buffer cache ASAP, so we'll release them differently than in the
1574 * ReleaseBitmapBlock case.
1575 *
1576 * Additionally, because we know that we're only reading the blocks and that they
1577 * should have been clean prior to reading them, we will never
1578 * issue a write to them (thus dirtying them).
1579 */
1580
1581static OSErr ReleaseScanBitmapRange(struct buf *bp ) {
1582
1583	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1584		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, 0, 0, 0, 0, 0);
1585	}
1586
1587	if (bp) {
1588		/* Mark the buffer invalid if it isn't locked, then release it */
1589		if ((buf_flags(bp) & B_LOCKED) == 0) {
1590			buf_markinvalid(bp);
1591		}
1592		buf_brelse(bp);
1593	}
1594
1595	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1596		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_SCAN_BITMAP | DBG_FUNC_END, 0, 0, 0, 0, 0);
1597	}
1598
1599	return (0);
1600}
1601
1602/*
1603_______________________________________________________________________
1604
1605Routine:	BlockAllocateContig
1606
1607Function:	Allocate a contiguous group of allocation blocks.  The
1608			allocation is all-or-nothing.  The caller guarantees that
1609			there are enough free blocks (though they may not be
1610			contiguous, in which case this call will fail).
1611
1612Inputs:
1613	vcb				Pointer to volume where space is to be allocated
1614	startingBlock	Preferred first block for allocation
1615	minBlocks		Minimum number of contiguous blocks to allocate
1616	maxBlocks		Maximum number of contiguous blocks to allocate
1617	flags
1618
1619Outputs:
1620	actualStartBlock	First block of range allocated, or 0 if error
1621	actualNumBlocks		Number of blocks allocated, or 0 if error
1622_______________________________________________________________________
1623*/
1624static OSErr BlockAllocateContig(
1625		ExtendedVCB		*vcb,
1626		u_int32_t		startingBlock,
1627		u_int32_t		minBlocks,
1628		u_int32_t		maxBlocks,
1629		u_int32_t		flags,
1630		u_int32_t		*actualStartBlock,
1631		u_int32_t		*actualNumBlocks)
1632{
1633	OSErr retval = noErr;
1634	uint32_t currentStart = startingBlock;
1635
1636	uint32_t foundStart = 0; // values to emit to caller
1637	uint32_t foundCount = 0;
1638
1639	uint32_t collision_start = 0;  // if we have to re-allocate a recently deleted extent, use this
1640	uint32_t collision_count = 0;
1641
1642	int err;
1643	int allowReuse = (flags & HFS_ALLOC_FLUSHTXN);
1644	Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
1645
1646	int recently_deleted = 0;
1647	struct hfsmount *hfsmp = VCBTOHFS(vcb);
1648
1649	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1650		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
1651
1652	while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) {
1653
1654		/* Try and find something that works. */
1655		do {
1656			/*
1657			 * NOTE: If the only contiguous free extent of at least minBlocks
1658			 * crosses startingBlock (i.e. starts before, ends after), then we
1659			 * won't find it. Earlier versions *did* find this case by letting
1660			 * the second search look past startingBlock by minBlocks.  But
1661			 * with the free extent cache, this can lead to duplicate entries
1662			 * in the cache, causing the same blocks to be allocated twice.
1663			 */
1664			retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks,
1665					maxBlocks, useMetaZone, true, &foundStart, &foundCount);
1666
1667			if (retval == dskFulErr && currentStart != 0) {
1668				/*
1669				 * We constrain the endingBlock so we don't bother looking for ranges
1670				 * that would overlap those found in the previous call, if the summary bitmap
1671				 * is not on for this volume.  If it is, then we assume that it was not trust
1672				 * -worthy and do a full scan.
1673				 */
1674				if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1675					retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks,
1676							maxBlocks, useMetaZone, false, &foundStart, &foundCount);
1677				}
1678				else {
1679					retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks,
1680							maxBlocks, useMetaZone, false, &foundStart, &foundCount);
1681				}
1682			}
1683		} while (0);
1684
1685		if (retval != noErr) {
1686			goto bailout;
1687		}
1688
1689		/* Do we overlap with the recently found collision extent? */
1690		if (collision_start) {
1691			if (extents_overlap (foundStart, foundCount, collision_start, collision_count)) {
1692				/*
1693				 * We've looped around, and the only thing we could use was the collision extent.
1694				 * Since we are allowed to use it, go ahead and do so now.
1695				 */
1696				if(allowReuse) {
1697					/*
1698					 * then we couldn't find anything except values which might have been
1699					 * recently deallocated. just return our cached value if we are allowed to.
1700					 */
1701					foundStart = collision_start;
1702					foundCount = collision_count;
1703					goto bailout;
1704				}
1705				else {
1706					/* Otherwise, we looped around and couldn't find anything that wouldn't require a journal flush. */
1707					retval = dskFulErr;
1708					goto bailout;
1709				}
1710			}
1711		}
1712
1713		/* OK, we know we must not have collided . See if this one is recently deleted */
1714		if (hfsmp->jnl) {
1715			recently_deleted = 0;
1716			uint32_t nextStart;
1717			err = CheckUnmappedBytes (hfsmp, (uint64_t)foundStart,
1718					(uint64_t) foundCount, &recently_deleted, &nextStart);
1719			if (err == 0) {
1720				if(recently_deleted != 0) {
1721					/*
1722					 * these blocks were recently deleted/deallocated.  Cache the extent, but
1723					 * but keep searching to see if we can find one that won't collide here.
1724					 */
1725					if (collision_start == 0) {
1726						collision_start = foundStart;
1727						collision_count = foundCount;
1728					}
1729					recently_deleted = 0;
1730
1731					/*
1732					 * advance currentStart to the point just past the overlap we just found. Note that
1733					 * we will automatically loop around to start of the bitmap as needed.
1734					 */
1735					currentStart = nextStart;
1736					/* Unset foundStart/Count to allow it to loop around again. */
1737					foundStart = 0;
1738					foundCount = 0;
1739				}
1740			}
1741		} // end jnl/deleted case
1742
1743		/*
1744		 * If we found something good, we'd break out of the loop at the top; foundCount
1745		 * and foundStart should be set.
1746		 */
1747
1748	} // end while loop.
1749
1750bailout:
1751	/* mark the blocks as in-use */
1752	if (retval == noErr) {
1753		*actualStartBlock = foundStart;
1754		*actualNumBlocks = foundCount;
1755		err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
1756
1757		if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) {
1758			KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, *actualStartBlock, *actualNumBlocks, 0, 0, 0);
1759		}
1760	}
1761
1762	return retval;
1763
1764}
1765
1766
1767/*
1768_______________________________________________________________________
1769
1770Routine:	BlockAllocateAny
1771
1772Function:	Allocate one or more allocation blocks.  If there are fewer
1773			free blocks than requested, all free blocks will be
1774			allocated.  The caller guarantees that there is at least
1775			one free block.
1776
1777Inputs:
1778	vcb				Pointer to volume where space is to be allocated
1779	startingBlock	Preferred first block for allocation
1780	endingBlock		Last block to check + 1
1781	maxBlocks		Maximum number of contiguous blocks to allocate
1782	useMetaZone
1783
1784Outputs:
1785	actualStartBlock	First block of range allocated, or 0 if error
1786	actualNumBlocks		Number of blocks allocated, or 0 if error
1787_______________________________________________________________________
1788*/
1789
1790static OSErr BlockAllocateAny(
1791		ExtendedVCB		*vcb,
1792		u_int32_t		startingBlock,
1793		register u_int32_t	endingBlock,
1794		u_int32_t		maxBlocks,
1795		u_int32_t		flags,
1796		Boolean			trustSummary,
1797		u_int32_t		*actualStartBlock,
1798		u_int32_t		*actualNumBlocks)
1799{
1800
1801	/*
1802	 * If it is enabled, scan through the summary table to find the first free block.
1803	 *
1804	 * If it reports that there are not any free blocks, we could have a false
1805	 * positive, so in that case, use the input arguments as a pass through.
1806	 */
1807	uint32_t start_blk  = startingBlock;
1808	uint32_t end_blk = endingBlock;
1809	struct hfsmount *hfsmp;
1810	OSErr err;
1811
1812	hfsmp = (struct hfsmount*)vcb;
1813	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1814		uint32_t suggested_start;
1815
1816		/*
1817		 * If the summary table is enabled, scan through it to find the first free
1818		 * block.  If there was an error, or we couldn't find anything free in the
1819		 * summary table, then just leave the start_blk fields unmodified. We wouldn't
1820		 * have gotten to this point if the mount point made it look like there was possibly
1821		 * free space in the FS.
1822		 */
1823		err = hfs_find_summary_free (hfsmp, startingBlock, &suggested_start);
1824		if (err == 0) {
1825			start_blk = suggested_start;
1826		}
1827		else {
1828			/* Differentiate between ENOSPC and a more esoteric error in the above call. */
1829			if ((err == ENOSPC) && (trustSummary)) {
1830				/*
1831				 * The 'trustSummary' argument is for doing a full scan if we really
1832				 * really, need the space and we think it's somewhere but can't find it in the
1833				 * summary table. If it's true, then we trust the summary table and return
1834				 * dskFulErr if we couldn't find it above.
1835				 */
1836				return dskFulErr;
1837			}
1838			/*
1839			 * If either trustSummary was false or we got a different errno, then we
1840			 * want to fall through to the real bitmap single i/o code...
1841			 */
1842		}
1843	}
1844
1845	err =  BlockAllocateAnyBitmap(vcb, start_blk, end_blk, maxBlocks,
1846			flags, actualStartBlock, actualNumBlocks);
1847
1848	return err;
1849}
1850
1851
1852/*
1853 * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out
1854 * where the free allocation blocks are.  Inputs and outputs are the same as for
1855 * BlockAllocateAny and BlockAllocateAnyRBTree
1856 */
1857
1858static OSErr BlockAllocateAnyBitmap(
1859		ExtendedVCB		*vcb,
1860		u_int32_t		startingBlock,
1861		register u_int32_t	endingBlock,
1862		u_int32_t		maxBlocks,
1863		u_int32_t		flags,
1864		u_int32_t		*actualStartBlock,
1865		u_int32_t		*actualNumBlocks)
1866{
1867	OSErr			err;
1868	register u_int32_t	block;			//	current block number
1869	register u_int32_t	currentWord;	//	Pointer to current word within bitmap block
1870	register u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
1871	register u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
1872	u_int32_t  *buffer = NULL;
1873	u_int32_t  *currCache = NULL;
1874	uintptr_t  blockRef;
1875	u_int32_t  bitsPerBlock;
1876	u_int32_t  wordsPerBlock;
1877	Boolean dirty = false;
1878	struct hfsmount *hfsmp = VCBTOHFS(vcb);
1879	uint32_t summary_block_scan = 0;
1880	Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
1881	Boolean forceFlush = (flags & HFS_ALLOC_FLUSHTXN);
1882
1883	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1884		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
1885
1886restartSearchAny:
1887	/*
1888	 * When we're skipping the metadata zone and the start/end
1889	 * range overlaps with the metadata zone then adjust the
1890	 * start to be outside of the metadata zone.  If the range
1891	 * is entirely inside the metadata zone then we can deny the
1892	 * request (dskFulErr).
1893	 */
1894	if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
1895		if (startingBlock <= vcb->hfs_metazone_end) {
1896			if (endingBlock > (vcb->hfs_metazone_end + 2))
1897				startingBlock = vcb->hfs_metazone_end + 1;
1898			else {
1899				err = dskFulErr;
1900				goto Exit;
1901			}
1902		}
1903	}
1904
1905	//	Since this routine doesn't wrap around
1906	if (maxBlocks > (endingBlock - startingBlock)) {
1907		maxBlocks = endingBlock - startingBlock;
1908	}
1909
1910	//
1911	//	Pre-read the first bitmap block
1912	//
1913	err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef);
1914	if (err != noErr) goto Exit;
1915	buffer = currCache;
1916
1917	//
1918	//	Set up the current position within the block
1919	//
1920	{
1921		u_int32_t wordIndexInBlock;
1922
1923		bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
1924		wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
1925
1926		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
1927		buffer += wordIndexInBlock;
1928		wordsLeft = wordsPerBlock - wordIndexInBlock;
1929		currentWord = SWAP_BE32 (*buffer);
1930		bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
1931	}
1932
1933	/*
1934	 * While loop 1:
1935	 *		Find the first unallocated block starting at 'block'
1936	 */
1937	block=startingBlock;
1938	while (block < endingBlock) {
1939		if ((currentWord & bitMask) == 0)
1940			break;
1941
1942		//	Next bit
1943		++block;
1944		bitMask >>= 1;
1945		if (bitMask == 0) {
1946			//	Next word
1947			bitMask = kHighBitInWordMask;
1948			++buffer;
1949
1950			if (--wordsLeft == 0) {
1951				//	Next block
1952				buffer = currCache = NULL;
1953				if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1954					/*
1955					 * If summary_block_scan is non-zero, then we must have
1956					 * pulled a bitmap file block into core, and scanned through
1957					 * the entire thing.  Because we're in this loop, we are
1958					 * implicitly trusting that the bitmap didn't have any knowledge
1959					 * about this particular block.  As a result, update the bitmap
1960					 * (lazily, now that we've scanned it) with our findings that
1961					 * this particular block is completely used up.
1962					 */
1963					if (summary_block_scan != 0) {
1964						uint32_t summary_bit;
1965						(void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
1966						hfs_set_summary (hfsmp, summary_bit, 1);
1967						summary_block_scan = 0;
1968					}
1969				}
1970
1971				err = ReleaseBitmapBlock(vcb, blockRef, false);
1972				if (err != noErr) goto Exit;
1973
1974				/*
1975				 * Skip over metadata blocks.
1976				 */
1977				if (!useMetaZone) {
1978					block = NextBitmapBlock(vcb, block);
1979				}
1980				if (block >= endingBlock) {
1981					err = dskFulErr;
1982					goto Exit;
1983				}
1984
1985				err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
1986				if (err != noErr) goto Exit;
1987				buffer = currCache;
1988				summary_block_scan = block;
1989				wordsLeft = wordsPerBlock;
1990			}
1991			currentWord = SWAP_BE32 (*buffer);
1992		}
1993	}
1994
1995	//	Did we get to the end of the bitmap before finding a free block?
1996	//	If so, then couldn't allocate anything.
1997	if (block >= endingBlock) {
1998		err = dskFulErr;
1999		goto Exit;
2000	}
2001
2002
2003	/*
2004	 * Don't move forward just yet.  Verify that either one of the following
2005	 * two conditions is true:
2006	 * 1) journaling is not enabled
2007	 * 2) block is not currently on any pending TRIM list.
2008	 */
2009	if (hfsmp->jnl != NULL && (forceFlush == false)) {
2010		int recently_deleted = 0;
2011		uint32_t nextblk;
2012		err = CheckUnmappedBytes (hfsmp, (uint64_t) block, 1, &recently_deleted, &nextblk);
2013		if ((err == 0) && (recently_deleted)) {
2014
2015			/* release the bitmap block & unset currCache.  we may jump past it. */
2016			err = ReleaseBitmapBlock(vcb, blockRef, false);
2017			currCache = NULL;
2018			if (err != noErr) {
2019				goto Exit;
2020			}
2021			/* set our start to nextblk, and re-do the search. */
2022			startingBlock = nextblk;
2023			goto restartSearchAny;
2024		}
2025	}
2026
2027
2028	//	Return the first block in the allocated range
2029	*actualStartBlock = block;
2030	dirty = true;
2031
2032	//	If we could get the desired number of blocks before hitting endingBlock,
2033	//	then adjust endingBlock so we won't keep looking.  Ideally, the comparison
2034	//	would be (block + maxBlocks) < endingBlock, but that could overflow.  The
2035	//	comparison below yields identical results, but without overflow.
2036	if (block < (endingBlock-maxBlocks)) {
2037		endingBlock = block + maxBlocks;	//	if we get this far, we've found enough
2038	}
2039
2040	/*
2041	 * While loop 2:
2042	 *		Scan the bitmap, starting at 'currentWord' in the current
2043	 *		bitmap block.  Continue iterating through the bitmap until
2044	 * 		either we hit an allocated block, or until we have accumuluated
2045	 *		maxBlocks worth of bitmap.
2046	 */
2047
2048	/* Continue until we see an allocated block */
2049	while ((currentWord & bitMask) == 0) {
2050		//	Move to the next block.  If no more, then exit.
2051		++block;
2052		if (block == endingBlock) {
2053			break;
2054		}
2055
2056		//	Next bit
2057		bitMask >>= 1;
2058		if (bitMask == 0) {
2059			//	Next word
2060			bitMask = kHighBitInWordMask;
2061			++buffer;
2062
2063			if (--wordsLeft == 0) {
2064				//	Next block
2065				buffer = currCache = NULL;
2066
2067				/* We're only reading the bitmap here, so mark it as clean */
2068				err = ReleaseBitmapBlock(vcb, blockRef, false);
2069				if (err != noErr) {
2070					goto Exit;
2071				}
2072
2073				/*
2074				 * Skip over metadata blocks.
2075				 */
2076				if (!useMetaZone) {
2077					u_int32_t nextBlock;
2078					nextBlock = NextBitmapBlock(vcb, block);
2079					if (nextBlock != block) {
2080						goto Exit;  /* allocation gap, so stop */
2081					}
2082				}
2083
2084				if (block >= endingBlock) {
2085					goto Exit;
2086				}
2087
2088				err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
2089				if (err != noErr) {
2090					goto Exit;
2091				}
2092				buffer = currCache;
2093				wordsLeft = wordsPerBlock;
2094			}
2095			currentWord = SWAP_BE32 (*buffer);
2096		}
2097	}
2098
2099Exit:
2100	if (currCache) {
2101		/* Release the bitmap reference prior to marking bits in-use */
2102		(void) ReleaseBitmapBlock(vcb, blockRef, false);
2103		currCache = NULL;
2104	}
2105
2106	if (err == noErr) {
2107		*actualNumBlocks = block - *actualStartBlock;
2108
2109		// sanity check
2110		if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
2111			panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
2112		}
2113
2114		/* Mark the bits found as in-use */
2115		err = BlockMarkAllocatedInternal (vcb, *actualStartBlock, *actualNumBlocks);
2116	}
2117	else {
2118		*actualStartBlock = 0;
2119		*actualNumBlocks = 0;
2120	}
2121
2122	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2123		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2124
2125	return err;
2126}
2127
2128
2129/*
2130_______________________________________________________________________
2131
2132Routine:	BlockAllocateKnown
2133
2134Function:	Try to allocate space from known free space in the free
2135			extent cache.
2136
2137Inputs:
2138	vcb				Pointer to volume where space is to be allocated
2139	maxBlocks		Maximum number of contiguous blocks to allocate
2140
2141Outputs:
2142	actualStartBlock	First block of range allocated, or 0 if error
2143	actualNumBlocks		Number of blocks allocated, or 0 if error
2144
2145Returns:
2146	dskFulErr		Free extent cache is empty
2147_______________________________________________________________________
2148*/
2149
2150static OSErr BlockAllocateKnown(
2151		ExtendedVCB		*vcb,
2152		u_int32_t		maxBlocks,
2153		u_int32_t		*actualStartBlock,
2154		u_int32_t		*actualNumBlocks)
2155{
2156	OSErr			err;
2157	u_int32_t		foundBlocks;
2158	struct hfsmount *hfsmp = VCBTOHFS(vcb);
2159
2160	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2161		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
2162
2163	hfs_lock_mount (hfsmp);
2164	lck_spin_lock(&vcb->vcbFreeExtLock);
2165	if ( vcb->vcbFreeExtCnt == 0 ||
2166			vcb->vcbFreeExt[0].blockCount == 0) {
2167		lck_spin_unlock(&vcb->vcbFreeExtLock);
2168		hfs_unlock_mount(hfsmp);
2169		if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2170			KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
2171		return dskFulErr;
2172	}
2173	lck_spin_unlock(&vcb->vcbFreeExtLock);
2174	hfs_unlock_mount(hfsmp);
2175
2176	lck_spin_lock(&vcb->vcbFreeExtLock);
2177
2178	//	Just grab up to maxBlocks of the first (largest) free exent.
2179	*actualStartBlock = vcb->vcbFreeExt[0].startBlock;
2180	foundBlocks = vcb->vcbFreeExt[0].blockCount;
2181	if (foundBlocks > maxBlocks)
2182		foundBlocks = maxBlocks;
2183	*actualNumBlocks = foundBlocks;
2184
2185	lck_spin_unlock(&vcb->vcbFreeExtLock);
2186
2187	remove_free_extent_cache(vcb, *actualStartBlock, *actualNumBlocks);
2188
2189	// sanity check
2190	if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
2191	{
2192		printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
2193		hfs_mark_volume_inconsistent(vcb);
2194		*actualStartBlock = 0;
2195		*actualNumBlocks = 0;
2196		err = EIO;
2197	}
2198	else
2199	{
2200		//
2201		//	Now mark the found extent in the bitmap
2202		//
2203		err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
2204	}
2205
2206	sanity_check_free_ext(vcb, 0);
2207
2208	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2209		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2210
2211	return err;
2212}
2213
2214/*
2215 * BlockMarkAllocated
2216 *
2217 * This is a wrapper function around the internal calls which will actually mark the blocks
2218 * as in-use.  It will mark the blocks in the red-black tree if appropriate.  We need to do
2219 * this logic here to avoid callers having to deal with whether or not the red-black tree
2220 * is enabled.
2221 */
2222
2223OSErr BlockMarkAllocated(
2224		ExtendedVCB		*vcb,
2225		u_int32_t		startingBlock,
2226		register u_int32_t	numBlocks)
2227{
2228	struct hfsmount *hfsmp;
2229
2230	hfsmp = VCBTOHFS(vcb);
2231
2232	return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks);
2233
2234}
2235
2236
2237
2238/*
2239_______________________________________________________________________
2240
2241Routine:	BlockMarkAllocatedInternal
2242
2243Function:	Mark a contiguous group of blocks as allocated (set in the
2244			bitmap).  It assumes those bits are currently marked
2245			deallocated (clear in the bitmap).  Note that this function
2246			must be called regardless of whether or not the bitmap or
2247			tree-based allocator is used, as all allocations must correctly
2248			be marked on-disk.  If the tree-based approach is running, then
2249			this will be done before the node is removed from the tree.
2250
2251Inputs:
2252	vcb				Pointer to volume where space is to be allocated
2253	startingBlock	First block number to mark as allocated
2254	numBlocks		Number of blocks to mark as allocated
2255_______________________________________________________________________
2256*/
2257static
2258OSErr BlockMarkAllocatedInternal (
2259		ExtendedVCB		*vcb,
2260		u_int32_t		startingBlock,
2261		register u_int32_t	numBlocks)
2262{
2263	OSErr			err;
2264	register u_int32_t	*currentWord;	//	Pointer to current word within bitmap block
2265	register u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
2266	register u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
2267	u_int32_t		firstBit;		//	Bit index within word of first bit to allocate
2268	u_int32_t		numBits;		//	Number of bits in word to allocate
2269	u_int32_t		*buffer = NULL;
2270	uintptr_t  blockRef;
2271	u_int32_t  bitsPerBlock;
2272	u_int32_t  wordsPerBlock;
2273	// XXXdbg
2274	struct hfsmount *hfsmp = VCBTOHFS(vcb);
2275
2276	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2277		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
2278
2279	int force_flush = 0;
2280	/*
2281	 * Since we are about to mark these bits as in-use
2282	 * in the bitmap, decide if we need to alert the caller
2283	 * that a journal flush might be appropriate. It's safe to
2284	 * poke at the journal pointer here since we MUST have
2285	 * called start_transaction by the time this function is invoked.
2286	 * If the journal is enabled, then it will have taken the requisite
2287	 * journal locks.  If it is not enabled, then we have taken
2288	 * a shared lock on the global lock.
2289	 */
2290	if (hfsmp->jnl) {
2291		uint32_t ignore;
2292		err = CheckUnmappedBytes (hfsmp, (uint64_t) startingBlock, (uint64_t)numBlocks, &force_flush, &ignore);
2293		if ((err == 0) && (force_flush)) {
2294			journal_request_immediate_flush (hfsmp->jnl);
2295		}
2296	}
2297
2298	hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
2299
2300	//
2301	//	Pre-read the bitmap block containing the first word of allocation
2302	//
2303
2304	err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2305	if (err != noErr) goto Exit;
2306	//
2307	//	Initialize currentWord, and wordsLeft.
2308	//
2309	{
2310		u_int32_t wordIndexInBlock;
2311
2312		bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
2313		wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2314
2315		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2316		currentWord = buffer + wordIndexInBlock;
2317		wordsLeft = wordsPerBlock - wordIndexInBlock;
2318	}
2319
2320	// XXXdbg
2321	if (hfsmp->jnl) {
2322		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2323	}
2324
2325	//
2326	//	If the first block to allocate doesn't start on a word
2327	//	boundary in the bitmap, then treat that first word
2328	//	specially.
2329	//
2330
2331	firstBit = startingBlock % kBitsPerWord;
2332	if (firstBit != 0) {
2333		bitMask = kAllBitsSetInWord >> firstBit;	//	turn off all bits before firstBit
2334		numBits = kBitsPerWord - firstBit;			//	number of remaining bits in this word
2335		if (numBits > numBlocks) {
2336			numBits = numBlocks;					//	entire allocation is inside this one word
2337			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));	//	turn off bits after last
2338		}
2339#if DEBUG_BUILD
2340		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2341			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2342		}
2343#endif
2344		*currentWord |= SWAP_BE32 (bitMask);		//	set the bits in the bitmap
2345		numBlocks -= numBits;						//	adjust number of blocks left to allocate
2346
2347		++currentWord;								//	move to next word
2348		--wordsLeft;								//	one less word left in this block
2349	}
2350
2351	//
2352	//	Allocate whole words (32 blocks) at a time.
2353	//
2354
2355	bitMask = kAllBitsSetInWord;					//	put this in a register for 68K
2356	while (numBlocks >= kBitsPerWord) {
2357		if (wordsLeft == 0) {
2358			//	Read in the next bitmap block
2359			startingBlock += bitsPerBlock;			//	generate a block number in the next bitmap block
2360
2361			buffer = NULL;
2362			err = ReleaseBitmapBlock(vcb, blockRef, true);
2363			if (err != noErr) goto Exit;
2364
2365			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2366			if (err != noErr) goto Exit;
2367
2368			// XXXdbg
2369			if (hfsmp->jnl) {
2370				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2371			}
2372
2373			//	Readjust currentWord and wordsLeft
2374			currentWord = buffer;
2375			wordsLeft = wordsPerBlock;
2376		}
2377#if DEBUG_BUILD
2378		if (*currentWord != 0) {
2379			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2380		}
2381#endif
2382		*currentWord = SWAP_BE32 (bitMask);
2383		numBlocks -= kBitsPerWord;
2384
2385		++currentWord;								//	move to next word
2386		--wordsLeft;								//	one less word left in this block
2387	}
2388
2389	//
2390	//	Allocate any remaining blocks.
2391	//
2392
2393	if (numBlocks != 0) {
2394		bitMask = ~(kAllBitsSetInWord >> numBlocks);	//	set first numBlocks bits
2395		if (wordsLeft == 0) {
2396			//	Read in the next bitmap block
2397			startingBlock += bitsPerBlock;				//	generate a block number in the next bitmap block
2398
2399			buffer = NULL;
2400			err = ReleaseBitmapBlock(vcb, blockRef, true);
2401			if (err != noErr) goto Exit;
2402
2403			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2404			if (err != noErr) goto Exit;
2405
2406			// XXXdbg
2407			if (hfsmp->jnl) {
2408				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2409			}
2410
2411			//	Readjust currentWord and wordsLeft
2412			currentWord = buffer;
2413			wordsLeft = wordsPerBlock;
2414		}
2415#if DEBUG_BUILD
2416		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2417			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2418		}
2419#endif
2420		*currentWord |= SWAP_BE32 (bitMask);			//	set the bits in the bitmap
2421
2422		//	No need to update currentWord or wordsLeft
2423	}
2424
2425Exit:
2426
2427	if (buffer)
2428		(void)ReleaseBitmapBlock(vcb, blockRef, true);
2429
2430	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2431		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
2432
2433	return err;
2434}
2435
2436
2437/*
2438 * BlockMarkFree
2439 *
2440 * This is a wrapper function around the internal calls which will actually mark the blocks
2441 * as freed.  It will mark the blocks in the red-black tree if appropriate.  We need to do
2442 * this logic here to avoid callers having to deal with whether or not the red-black tree
2443 * is enabled.
2444 *
2445 */
2446OSErr BlockMarkFree(
2447		ExtendedVCB		*vcb,
2448		u_int32_t		startingBlock,
2449		register u_int32_t	numBlocks)
2450{
2451	struct hfsmount *hfsmp;
2452	hfsmp = VCBTOHFS(vcb);
2453
2454	return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
2455}
2456
2457
2458/*
2459 * BlockMarkFreeUnused
2460 *
2461 * Scan the bitmap block beyond end of current file system for bits
2462 * that are marked as used.  If any of the bits are marked as used,
2463 * this function marks them free.
2464 *
2465 * Note:  This was specifically written to mark all bits beyond
2466 * end of current file system during hfs_extendfs(), which makes
2467 * sure that all the new blocks added to the file system are
2468 * marked as free.   We expect that all the blocks beyond end of
2469 * current file system are always marked as free, but there might
2470 * be cases where are marked as used.  This function assumes that
2471 * the number of blocks marked as used incorrectly are relatively
2472 * small, otherwise this can overflow journal transaction size
2473 * on certain file system configurations (example, large unused
2474 * bitmap with relatively small journal).
2475 *
2476 * Input:
2477 * 	startingBlock: First block of the range to mark unused
2478 * 	numBlocks: Number of blocks in the range to mark unused
2479 *
2480 * Returns: zero on success, non-zero on error.
2481 */
2482OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t	numBlocks)
2483{
2484	int error = 0;
2485	struct hfsmount *hfsmp = VCBTOHFS(vcb);
2486	u_int32_t curNumBlocks;
2487	u_int32_t  bitsPerBlock;
2488	u_int32_t lastBit;
2489
2490	/* Use the optimal bitmap I/O size instead of bitmap block size */
2491	bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
2492
2493	/*
2494	 * First clear any non bitmap allocation block aligned bits
2495	 *
2496	 * Calculate the first bit in the bitmap block next to
2497	 * the bitmap block containing the bit for startingBlock.
2498	 * Using this value, we calculate the total number of
2499	 * bits to be marked unused from startingBlock to the
2500	 * end of bitmap block containing startingBlock.
2501	 */
2502	lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
2503	curNumBlocks = lastBit - startingBlock;
2504	if (curNumBlocks > numBlocks) {
2505		curNumBlocks = numBlocks;
2506	}
2507	error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2508	if (error) {
2509		return error;
2510	}
2511	startingBlock += curNumBlocks;
2512	numBlocks -= curNumBlocks;
2513
2514	/*
2515	 * Check a full bitmap block for any 'used' bit.  If any bit is used,
2516	 * mark all the bits only in that bitmap block as free.  This ensures
2517	 * that we do not write unmodified bitmap blocks and do not
2518	 * overwhelm the journal.
2519	 *
2520	 * The code starts by checking full bitmap block at a time, and
2521	 * marks entire bitmap block as free only if any bit in that bitmap
2522	 * block is marked as used.  In the end, it handles the last bitmap
2523	 * block which might be partially full by only checking till the
2524	 * caller-specified last bit and if any bit is set, only mark that
2525	 * range as free.
2526	 */
2527	while (numBlocks) {
2528		if (numBlocks >= bitsPerBlock) {
2529			curNumBlocks = bitsPerBlock;
2530		} else {
2531			curNumBlocks = numBlocks;
2532		}
2533		if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
2534			error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2535			if (error) {
2536				return error;
2537			}
2538		}
2539		startingBlock += curNumBlocks;
2540		numBlocks -= curNumBlocks;
2541	}
2542
2543	return error;
2544}
2545
2546/*
2547_______________________________________________________________________
2548
2549Routine:	BlockMarkFreeInternal
2550
2551Function:	Mark a contiguous group of blocks as free (clear in the
2552			bitmap).  It assumes those bits are currently marked
2553			allocated (set in the bitmap).
2554
2555Inputs:
2556	vcb				Pointer to volume where space is to be freed
2557	startingBlock	First block number to mark as freed
2558	numBlocks		Number of blocks to mark as freed
2559	do_validate 	If true, validate that the blocks being
2560					deallocated to check if they are within totalBlocks
2561					for current volume and whether they were allocated
2562					before they are marked free.
2563_______________________________________________________________________
2564*/
2565static
2566OSErr BlockMarkFreeInternal(
2567		ExtendedVCB		*vcb,
2568		u_int32_t		startingBlock_in,
2569		register u_int32_t	numBlocks_in,
2570		Boolean 		do_validate)
2571{
2572	OSErr		err;
2573	u_int32_t	startingBlock = startingBlock_in;
2574	u_int32_t	numBlocks = numBlocks_in;
2575	uint32_t	unmapStart = startingBlock_in;
2576	uint32_t	unmapCount = numBlocks_in;
2577	uint32_t	wordIndexInBlock;
2578	u_int32_t	*currentWord;	//	Pointer to current word within bitmap block
2579	u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
2580	u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
2581	u_int32_t	currentBit;		//	Bit index within word of current bit to allocate
2582	u_int32_t	numBits;		//	Number of bits in word to allocate
2583	u_int32_t	*buffer = NULL;
2584	uintptr_t	blockRef;
2585	u_int32_t	bitsPerBlock;
2586	u_int32_t	wordsPerBlock;
2587	// XXXdbg
2588	struct hfsmount *hfsmp = VCBTOHFS(vcb);
2589
2590	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2591		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
2592
2593	/*
2594	 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
2595	 * need to be able to free blocks being relocated during hfs_truncatefs.
2596	 */
2597	if ((do_validate == true) &&
2598			(startingBlock + numBlocks > vcb->totalBlocks)) {
2599		if (ALLOC_DEBUG) {
2600			panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
2601		}
2602
2603		printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
2604		hfs_mark_volume_inconsistent(vcb);
2605		err = EIO;
2606		goto Exit;
2607	}
2608
2609	//
2610	//	Pre-read the bitmap block containing the first word of allocation
2611	//
2612
2613	err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2614	if (err != noErr) goto Exit;
2615	// XXXdbg
2616	if (hfsmp->jnl) {
2617		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2618	}
2619
2620	//
2621	//	Figure out how many bits and words per bitmap block.
2622	//
2623	bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
2624	wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2625	wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2626
2627	//
2628	// Look for a range of free blocks immediately before startingBlock
2629	// (up to the start of the current bitmap block).  Set unmapStart to
2630	// the first free block.
2631	//
2632	currentWord = buffer + wordIndexInBlock;
2633	currentBit = startingBlock % kBitsPerWord;
2634	bitMask = kHighBitInWordMask >> currentBit;
2635	while (true) {
2636		// Move currentWord/bitMask back by one bit
2637		bitMask <<= 1;
2638		if (bitMask == 0) {
2639			if (--currentWord < buffer)
2640				break;
2641			bitMask = kLowBitInWordMask;
2642		}
2643
2644		if (*currentWord & SWAP_BE32(bitMask))
2645			break;	// Found an allocated block.  Stop searching.
2646		--unmapStart;
2647		++unmapCount;
2648	}
2649
2650	//
2651	//	If the first block to free doesn't start on a word
2652	//	boundary in the bitmap, then treat that first word
2653	//	specially.
2654	//
2655
2656	currentWord = buffer + wordIndexInBlock;
2657	wordsLeft = wordsPerBlock - wordIndexInBlock;
2658	currentBit = startingBlock % kBitsPerWord;
2659	if (currentBit != 0) {
2660		bitMask = kAllBitsSetInWord >> currentBit;	//	turn off all bits before currentBit
2661		numBits = kBitsPerWord - currentBit;		//	number of remaining bits in this word
2662		if (numBits > numBlocks) {
2663			numBits = numBlocks;					//	entire allocation is inside this one word
2664			bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits));	//	turn off bits after last
2665		}
2666		if ((do_validate == true) &&
2667				(*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
2668			goto Corruption;
2669		}
2670		*currentWord &= SWAP_BE32 (~bitMask);		//	clear the bits in the bitmap
2671		numBlocks -= numBits;						//	adjust number of blocks left to free
2672
2673		++currentWord;								//	move to next word
2674		--wordsLeft;								//	one less word left in this block
2675	}
2676
2677	//
2678	//	Free whole words (32 blocks) at a time.
2679	//
2680
2681	while (numBlocks >= kBitsPerWord) {
2682		if (wordsLeft == 0) {
2683			//	Read in the next bitmap block
2684			startingBlock += bitsPerBlock;			//	generate a block number in the next bitmap block
2685
2686			buffer = NULL;
2687			err = ReleaseBitmapBlock(vcb, blockRef, true);
2688			if (err != noErr) goto Exit;
2689
2690			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2691			if (err != noErr) goto Exit;
2692
2693			// XXXdbg
2694			if (hfsmp->jnl) {
2695				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2696			}
2697
2698			//	Readjust currentWord and wordsLeft
2699			currentWord = buffer;
2700			wordsLeft = wordsPerBlock;
2701		}
2702		if ((do_validate == true) &&
2703				(*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
2704			goto Corruption;
2705		}
2706		*currentWord = 0;							//	clear the entire word
2707		numBlocks -= kBitsPerWord;
2708
2709		++currentWord;								//	move to next word
2710		--wordsLeft;									//	one less word left in this block
2711	}
2712
2713	//
2714	//	Free any remaining blocks.
2715	//
2716
2717	if (numBlocks != 0) {
2718		bitMask = ~(kAllBitsSetInWord >> numBlocks);	//	set first numBlocks bits
2719		if (wordsLeft == 0) {
2720			//	Read in the next bitmap block
2721			startingBlock += bitsPerBlock;				//	generate a block number in the next bitmap block
2722
2723			buffer = NULL;
2724			err = ReleaseBitmapBlock(vcb, blockRef, true);
2725			if (err != noErr) goto Exit;
2726
2727			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2728			if (err != noErr) goto Exit;
2729
2730			// XXXdbg
2731			if (hfsmp->jnl) {
2732				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2733			}
2734
2735			//	Readjust currentWord and wordsLeft
2736			currentWord = buffer;
2737			wordsLeft = wordsPerBlock;
2738		}
2739		if ((do_validate == true) &&
2740				(*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
2741			goto Corruption;
2742		}
2743		*currentWord &= SWAP_BE32 (~bitMask);			//	clear the bits in the bitmap
2744
2745		//	No need to update currentWord or wordsLeft
2746	}
2747
2748	//
2749	// Look for a range of free blocks immediately after the range we just freed
2750	// (up to the end of the current bitmap block).
2751	//
2752	wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord;
2753	wordsLeft = wordsPerBlock - wordIndexInBlock;
2754	currentWord = buffer + wordIndexInBlock;
2755	currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
2756	bitMask = kHighBitInWordMask >> currentBit;
2757	while (true) {
2758		// Move currentWord/bitMask/wordsLeft forward one bit
2759		bitMask >>= 1;
2760		if (bitMask == 0) {
2761			if (--wordsLeft == 0)
2762				break;
2763			++currentWord;
2764			bitMask = kHighBitInWordMask;
2765		}
2766
2767		if (*currentWord & SWAP_BE32(bitMask))
2768			break;	// Found an allocated block.  Stop searching.
2769		++unmapCount;
2770	}
2771
2772Exit:
2773
2774	if (buffer)
2775		(void)ReleaseBitmapBlock(vcb, blockRef, true);
2776
2777	if (err == noErr) {
2778		hfs_unmap_free_extent(vcb, unmapStart, unmapCount);
2779	}
2780
2781	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2782		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
2783
2784	return err;
2785
2786Corruption:
2787#if DEBUG_BUILD
2788	panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
2789#else
2790	printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s\n", vcb->vcbVN);
2791	hfs_mark_volume_inconsistent(vcb);
2792	err = EIO;
2793	goto Exit;
2794#endif
2795}
2796
2797
2798/*
2799_______________________________________________________________________
2800
2801Routine:	BlockFindContiguous
2802
2803Function:	Find a contiguous range of blocks that are free (bits
2804			clear in the bitmap).  If a contiguous range of the
2805			minimum size can't be found, an error will be returned.
2806			This is only needed to support the bitmap-scanning logic,
2807			as the red-black tree should be able to do this by internally
2808			searching its tree.
2809
2810Inputs:
2811	vcb				Pointer to volume where space is to be allocated
2812	startingBlock	Preferred first block of range
2813	endingBlock		Last possible block in range + 1
2814	minBlocks		Minimum number of blocks needed.  Must be > 0.
2815	maxBlocks		Maximum (ideal) number of blocks desired
2816	useMetaZone	OK to dip into metadata allocation zone
2817
2818Outputs:
2819	actualStartBlock	First block of range found, or 0 if error
2820	actualNumBlocks		Number of blocks found, or 0 if error
2821
2822Returns:
2823	noErr			Found at least minBlocks contiguous
2824	dskFulErr		No contiguous space found, or all less than minBlocks
2825_______________________________________________________________________
2826*/
2827
2828static OSErr BlockFindContiguous(
2829		ExtendedVCB		*vcb,
2830		u_int32_t		startingBlock,
2831		u_int32_t		endingBlock,
2832		u_int32_t		minBlocks,
2833		u_int32_t		maxBlocks,
2834		Boolean			useMetaZone,
2835		Boolean			trustSummary,
2836		u_int32_t		*actualStartBlock,
2837		u_int32_t		*actualNumBlocks)
2838{
2839	OSErr			err;
2840	register u_int32_t	currentBlock;		//	Block we're currently looking at.
2841	u_int32_t			firstBlock;			//	First free block in current extent.
2842	u_int32_t			stopBlock;			//	If we get to this block, stop searching for first free block.
2843	u_int32_t			foundBlocks;		//	Number of contiguous free blocks in current extent.
2844	u_int32_t			*buffer = NULL;
2845	register u_int32_t	*currentWord;
2846	register u_int32_t	bitMask;
2847	register u_int32_t	wordsLeft;
2848	register u_int32_t	tempWord;
2849	uintptr_t  blockRef;
2850	u_int32_t  wordsPerBlock;
2851	u_int32_t  updated_free_extent = 0;
2852	struct hfsmount *hfsmp = (struct hfsmount*) vcb;
2853
2854	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2855		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
2856
2857	/*
2858	 * When we're skipping the metadata zone and the start/end
2859	 * range overlaps with the metadata zone then adjust the
2860	 * start to be outside of the metadata zone.  If the range
2861	 * is entirely inside the metadata zone then we can deny the
2862	 * request (dskFulErr).
2863	 */
2864	if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
2865		if (startingBlock <= vcb->hfs_metazone_end) {
2866			if (endingBlock > (vcb->hfs_metazone_end + 2))
2867				startingBlock = vcb->hfs_metazone_end + 1;
2868			else
2869				goto DiskFull;
2870		}
2871	}
2872
2873	if ((endingBlock - startingBlock) < minBlocks)
2874	{
2875		//	The set of blocks we're checking is smaller than the minimum number
2876		//	of blocks, so we couldn't possibly find a good range.
2877		goto DiskFull;
2878	}
2879
2880	stopBlock = endingBlock - minBlocks + 1;
2881	currentBlock = startingBlock;
2882	firstBlock = 0;
2883
2884	/*
2885	 * Skip over metadata blocks.
2886	 */
2887	if (!useMetaZone)
2888		currentBlock = NextBitmapBlock(vcb, currentBlock);
2889
2890	/*
2891	 * Use the summary table if we can.  Skip over any totally
2892	 * allocated blocks.  currentBlock should now point to the first
2893	 * block beyond the metadata zone if the metazone allocations are not
2894	 * allowed in this invocation.
2895	 */
2896	if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
2897		uint32_t suggestion;
2898		if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) {
2899			currentBlock = suggestion;
2900		}
2901	}
2902
2903
2904	//
2905	//	Pre-read the first bitmap block.
2906	//
2907	err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
2908	if ( err != noErr ) goto ErrorExit;
2909
2910	//
2911	//	Figure out where currentBlock is within the buffer.
2912	//
2913	wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2914
2915	wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1);	// Current index into buffer
2916	currentWord = buffer + wordsLeft;
2917	wordsLeft = wordsPerBlock - wordsLeft;
2918
2919	/*
2920	 * This outer do-while loop is the main body of this function.  Its job is
2921	 * to search through the blocks (until we hit 'stopBlock'), and iterate
2922	 * through swaths of allocated bitmap until it finds free regions.
2923	 */
2924
2925	do
2926	{
2927		foundBlocks = 0;
2928		uint32_t summary_block_scan = 0;
2929		/*
2930		 * Inner while loop 1:
2931		 *		Look for free blocks, skipping over allocated ones.
2932		 *
2933		 * Initialization starts with checking the initial partial word
2934		 * if applicable.
2935		 */
2936		bitMask = currentBlock & kBitsWithinWordMask;
2937		if (bitMask)
2938		{
2939			tempWord = SWAP_BE32(*currentWord);			//	Fetch the current word only once
2940			bitMask = kHighBitInWordMask >> bitMask;
2941			while (tempWord & bitMask)
2942			{
2943				bitMask >>= 1;
2944				++currentBlock;
2945			}
2946
2947			//	Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)?
2948			if (bitMask)
2949				goto FoundUnused;
2950
2951			//	Didn't find any unused bits, so we're done with this word.
2952			++currentWord;
2953			--wordsLeft;
2954		}
2955
2956		//
2957		//	Check whole words
2958		//
2959		while (currentBlock < stopBlock)
2960		{
2961			//	See if it's time to read another block.
2962			if (wordsLeft == 0)
2963			{
2964				buffer = NULL;
2965				if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2966					/*
2967					 * If summary_block_scan is non-zero, then we must have
2968					 * pulled a bitmap file block into core, and scanned through
2969					 * the entire thing.  Because we're in this loop, we are
2970					 * implicitly trusting that the bitmap didn't have any knowledge
2971					 * about this particular block.  As a result, update the bitmap
2972					 * (lazily, now that we've scanned it) with our findings that
2973					 * this particular block is completely used up.
2974					 */
2975					if (summary_block_scan != 0) {
2976						uint32_t summary_bit;
2977						(void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
2978						hfs_set_summary (hfsmp, summary_bit, 1);
2979						summary_block_scan = 0;
2980					}
2981				}
2982				err = ReleaseBitmapBlock(vcb, blockRef, false);
2983				if (err != noErr) goto ErrorExit;
2984
2985				/*
2986				 * Skip over metadata blocks.
2987				 */
2988				if (!useMetaZone) {
2989					currentBlock = NextBitmapBlock(vcb, currentBlock);
2990					if (currentBlock >= stopBlock) {
2991						goto LoopExit;
2992					}
2993				}
2994
2995				/* Skip over fully allocated bitmap blocks if we can */
2996				if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
2997					uint32_t suggestion;
2998					if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) {
2999						if (suggestion < stopBlock) {
3000							currentBlock = suggestion;
3001						}
3002					}
3003				}
3004
3005				err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
3006				if ( err != noErr ) goto ErrorExit;
3007
3008				/*
3009				 * Set summary_block_scan to be the block we just read into the block cache.
3010				 *
3011				 * At this point, we've just read an allocation block worth of bitmap file
3012				 * into the buffer above, but we don't know if it is completely allocated or not.
3013				 * If we find that it is completely allocated/full then we will jump
3014				 * through this loop again and set the appropriate summary bit as fully allocated.
3015				 */
3016				summary_block_scan = currentBlock;
3017				currentWord = buffer;
3018				wordsLeft = wordsPerBlock;
3019			}
3020
3021			//	See if any of the bits are clear
3022			if ((tempWord = SWAP_BE32(*currentWord)) + 1)	//	non-zero if any bits were clear
3023			{
3024				//	Figure out which bit is clear
3025				bitMask = kHighBitInWordMask;
3026				while (tempWord & bitMask)
3027				{
3028					bitMask >>= 1;
3029					++currentBlock;
3030				}
3031
3032				break;		//	Found the free bit; break out to FoundUnused.
3033			}
3034
3035			//	Keep looking at the next word
3036			currentBlock += kBitsPerWord;
3037			++currentWord;
3038			--wordsLeft;
3039		}
3040
3041FoundUnused:
3042		//	Make sure the unused bit is early enough to use
3043		if (currentBlock >= stopBlock)
3044		{
3045			break;
3046		}
3047
3048		//	Remember the start of the extent
3049		firstBlock = currentBlock;
3050
3051
3052		/*
3053		 * Inner while loop 2:
3054		 *		We get here if we find a free block. Count the number
3055		 * 		of contiguous free blocks observed.
3056		 *
3057		 * Initialization starts with checking the initial partial word
3058		 * if applicable.
3059		 */
3060		bitMask = currentBlock & kBitsWithinWordMask;
3061		if (bitMask)
3062		{
3063			tempWord = SWAP_BE32(*currentWord);			//	Fetch the current word only once
3064			bitMask = kHighBitInWordMask >> bitMask;
3065			while (bitMask && !(tempWord & bitMask))
3066			{
3067				bitMask >>= 1;
3068				++currentBlock;
3069			}
3070
3071			//	Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)?
3072			if (bitMask)
3073				goto FoundUsed;
3074
3075			//	Didn't find any used bits, so we're done with this word.
3076			++currentWord;
3077			--wordsLeft;
3078		}
3079
3080		//
3081		//	Check whole words
3082		//
3083		while (currentBlock < endingBlock)
3084		{
3085			//	See if it's time to read another block.
3086			if (wordsLeft == 0)
3087			{
3088				buffer = NULL;
3089				err = ReleaseBitmapBlock(vcb, blockRef, false);
3090				if (err != noErr) goto ErrorExit;
3091
3092				/*
3093				 * Skip over metadata blocks.
3094				 */
3095				if (!useMetaZone) {
3096					u_int32_t nextBlock;
3097
3098					nextBlock = NextBitmapBlock(vcb, currentBlock);
3099					if (nextBlock != currentBlock) {
3100						goto LoopExit;  /* allocation gap, so stop */
3101					}
3102				}
3103
3104				err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
3105				if ( err != noErr ) goto ErrorExit;
3106
3107				currentWord = buffer;
3108				wordsLeft = wordsPerBlock;
3109			}
3110
3111			//	See if any of the bits are set
3112			if ((tempWord = SWAP_BE32(*currentWord)) != 0)
3113			{
3114				//	Figure out which bit is set
3115				bitMask = kHighBitInWordMask;
3116				while (!(tempWord & bitMask))
3117				{
3118					bitMask >>= 1;
3119					++currentBlock;
3120				}
3121
3122				break;		//	Found the used bit; break out to FoundUsed.
3123			}
3124
3125			//	Keep looking at the next word
3126			currentBlock += kBitsPerWord;
3127			++currentWord;
3128			--wordsLeft;
3129
3130			//	If we found at least maxBlocks, we can quit early.
3131			if ((currentBlock - firstBlock) >= maxBlocks)
3132				break;
3133		}
3134
3135FoundUsed:
3136		//	Make sure we didn't run out of bitmap looking for a used block.
3137		//	If so, pin to the end of the bitmap.
3138		if (currentBlock > endingBlock)
3139			currentBlock = endingBlock;
3140
3141		//	Figure out how many contiguous free blocks there were.
3142		//	Pin the answer to maxBlocks.
3143		foundBlocks = currentBlock - firstBlock;
3144		if (foundBlocks > maxBlocks)
3145			foundBlocks = maxBlocks;
3146		if (foundBlocks >= minBlocks)
3147			break;		//	Found what we needed!
3148
3149		/*
3150		 * We did not find the total blocks were were looking for, but
3151		 * add this free block run to our free extent cache list, if possible.
3152		 */
3153		if (hfsmp->jnl == NULL) {
3154			/* If there is no journal, go ahead and add to the free ext cache. */
3155			updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3156		}
3157		else {
3158			/*
3159			 * If journaled, only add to the free extent cache if this block is not
3160			 * waiting for a TRIM to complete; that implies that the transaction that freed it
3161			 * has not yet been committed to stable storage.
3162			 */
3163			int recently_deleted = 0;
3164			uint32_t nextblock;
3165			err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock,
3166					(uint64_t)foundBlocks, &recently_deleted, &nextblock);
3167			if ((err) || (recently_deleted == 0))  {
3168				/* if we hit an error, or the blocks not recently freed, go ahead and insert it */
3169				updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3170			}
3171			err = 0;
3172		}
3173
3174	} while (currentBlock < stopBlock);
3175LoopExit:
3176
3177	//	Return the outputs.
3178	if (foundBlocks < minBlocks)
3179	{
3180DiskFull:
3181		err = dskFulErr;
3182ErrorExit:
3183		*actualStartBlock = 0;
3184		*actualNumBlocks = 0;
3185	}
3186	else
3187	{
3188		err = noErr;
3189		*actualStartBlock = firstBlock;
3190		*actualNumBlocks = foundBlocks;
3191		/*
3192		 * Sanity check for overflow
3193		 */
3194		if ((firstBlock + foundBlocks) > vcb->allocLimit) {
3195			panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
3196					vcb->vcbVN, startingBlock, endingBlock, currentBlock,
3197					firstBlock, stopBlock, minBlocks, foundBlocks);
3198		}
3199	}
3200
3201	if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
3202		int i;
3203		u_int32_t min_start = vcb->totalBlocks;
3204
3205		// set the nextAllocation pointer to the smallest free block number
3206		// we've seen so on the next mount we won't rescan unnecessarily
3207		lck_spin_lock(&vcb->vcbFreeExtLock);
3208		for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
3209			if (vcb->vcbFreeExt[i].startBlock < min_start) {
3210				min_start = vcb->vcbFreeExt[i].startBlock;
3211			}
3212		}
3213		lck_spin_unlock(&vcb->vcbFreeExtLock);
3214		if (min_start != vcb->totalBlocks) {
3215			if (min_start < vcb->nextAllocation) {
3216				vcb->nextAllocation = min_start;
3217			}
3218			if (min_start < vcb->sparseAllocation) {
3219				vcb->sparseAllocation = min_start;
3220			}
3221		}
3222	}
3223
3224	if (buffer)
3225		(void) ReleaseBitmapBlock(vcb, blockRef, false);
3226
3227	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3228		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
3229
3230	return err;
3231}
3232
3233
3234/*
3235 * Count number of bits set in the given 32-bit unsigned number
3236 *
3237 * Returns:
3238 * 	Number of bits set
3239 */
3240static int num_bits_set(u_int32_t num)
3241{
3242	int count;
3243
3244	for (count = 0; num; count++) {
3245		num &= num - 1;
3246	}
3247
3248	return count;
3249}
3250
3251/*
3252 * For a given range of blocks, find the total number of blocks
3253 * allocated.  If 'stop_on_first' is true, it stops as soon as it
3254 * encounters the first allocated block.  This option is useful
3255 * to determine if any block is allocated or not.
3256 *
3257 * Inputs:
3258 * 	startingBlock	First allocation block number of the range to be scanned.
3259 * 	numBlocks	Total number of blocks that need to be scanned.
3260 * 	stop_on_first	Stop the search after the first allocated block is found.
3261 *
3262 * Output:
3263 * 	allocCount	Total number of allocation blocks allocated in the given range.
3264 *
3265 * 			On error, it is the number of allocated blocks found
3266 * 			before the function got an error.
3267 *
3268 * 			If 'stop_on_first' is set,
3269 * 				allocCount = 1 if any allocated block was found.
3270 * 				allocCount = 0 if no allocated block was found.
3271 *
3272 * Returns:
3273 * 	0 on success, non-zero on failure.
3274 */
3275static int
3276hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
3277		u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
3278{
3279	u_int32_t  *currentWord;   // Pointer to current word within bitmap block
3280	u_int32_t  wordsLeft;      // Number of words left in this bitmap block
3281	u_int32_t  bitMask;        // Word with given bits already set (ready to test)
3282	u_int32_t  firstBit;       // Bit index within word of first bit to allocate
3283	u_int32_t  numBits;        // Number of bits in word to allocate
3284	u_int32_t  *buffer = NULL;
3285	uintptr_t  blockRef;
3286	u_int32_t  bitsPerBlock;
3287	u_int32_t  wordsPerBlock;
3288	u_int32_t  blockCount = 0;
3289	int  error;
3290
3291	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3292		KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);
3293
3294	/*
3295	 * Pre-read the bitmap block containing the first word of allocation
3296	 */
3297	error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3298	if (error)
3299		goto JustReturn;
3300
3301	/*
3302	 * Initialize currentWord, and wordsLeft.
3303	 */
3304	{
3305		u_int32_t wordIndexInBlock;
3306
3307		bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
3308		wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
3309
3310		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3311		currentWord = buffer + wordIndexInBlock;
3312		wordsLeft = wordsPerBlock - wordIndexInBlock;
3313	}
3314
3315	/*
3316	 * First test any non word aligned bits.
3317	 */
3318	firstBit = startingBlock % kBitsPerWord;
3319	if (firstBit != 0) {
3320		bitMask = kAllBitsSetInWord >> firstBit;
3321		numBits = kBitsPerWord - firstBit;
3322		if (numBits > numBlocks) {
3323			numBits = numBlocks;
3324			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
3325		}
3326		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3327			if (stop_on_first) {
3328				blockCount = 1;
3329				goto Exit;
3330			}
3331			blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3332		}
3333		numBlocks -= numBits;
3334		++currentWord;
3335		--wordsLeft;
3336	}
3337
3338	/*
3339	 * Test whole words (32 blocks) at a time.
3340	 */
3341	while (numBlocks >= kBitsPerWord) {
3342		if (wordsLeft == 0) {
3343			/* Read in the next bitmap block. */
3344			startingBlock += bitsPerBlock;
3345
3346			buffer = NULL;
3347			error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3348			if (error) goto Exit;
3349
3350			error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3351			if (error) goto Exit;
3352
3353			/* Readjust currentWord and wordsLeft. */
3354			currentWord = buffer;
3355			wordsLeft = wordsPerBlock;
3356		}
3357		if (*currentWord != 0) {
3358			if (stop_on_first) {
3359				blockCount = 1;
3360				goto Exit;
3361			}
3362			blockCount += num_bits_set(*currentWord);
3363		}
3364		numBlocks -= kBitsPerWord;
3365		++currentWord;
3366		--wordsLeft;
3367	}
3368
3369	/*
3370	 * Test any remaining blocks.
3371	 */
3372	if (numBlocks != 0) {
3373		bitMask = ~(kAllBitsSetInWord >> numBlocks);
3374		if (wordsLeft == 0) {
3375			/* Read in the next bitmap block */
3376			startingBlock += bitsPerBlock;
3377
3378			buffer = NULL;
3379			error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3380			if (error) goto Exit;
3381
3382			error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3383			if (error) goto Exit;
3384
3385			currentWord = buffer;
3386			wordsLeft = wordsPerBlock;
3387		}
3388		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3389			if (stop_on_first) {
3390				blockCount = 1;
3391				goto Exit;
3392			}
3393			blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3394		}
3395	}
3396Exit:
3397	if (buffer) {
3398		(void)ReleaseBitmapBlock(hfsmp, blockRef, false);
3399	}
3400	if (allocCount) {
3401		*allocCount = blockCount;
3402	}
3403
3404JustReturn:
3405	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3406		KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);
3407
3408	return (error);
3409}
3410
3411/*
3412 * Count total number of blocks that are allocated in the given
3413 * range from the bitmap.  This is used to preflight total blocks
3414 * that need to be relocated during volume resize.
3415 *
3416 * The journal or allocation file lock must be held.
3417 *
3418 * Returns:
3419 * 	0 on success, non-zero on failure.
3420 * 	On failure, allocCount is zero.
3421 */
3422	int
3423hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
3424		u_int32_t numBlocks, u_int32_t *allocCount)
3425{
3426	return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
3427}
3428
3429/*
3430 * Test to see if any blocks in a range are allocated.
3431 *
3432 * Note:  On error, this function returns 1, which means that
3433 * one or more blocks in the range are allocated.  This function
3434 * is primarily used for volume resize and we do not want
3435 * to report to the caller that the blocks are free when we
3436 * were not able to deterministically find it out.  So on error,
3437 * we always report that the blocks are allocated.
3438 *
3439 * The journal or allocation file lock must be held.
3440 *
3441 * Returns
3442 *	0 if all blocks in the range are free.
3443 *	1 if blocks in the range are allocated, or there was an error.
3444 */
3445	int
3446hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
3447{
3448	int error;
3449	u_int32_t allocCount;
3450
3451	error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
3452	if (error) {
3453		/* On error, we always say that the blocks are allocated
3454		 * so that volume resize does not return false success.
3455		 */
3456		return 1;
3457	} else {
3458		/* The function was deterministically able to find out
3459		 * if there was any block allocated or not.  In that case,
3460		 * the value in allocCount is good enough to be returned
3461		 * back to the caller.
3462		 */
3463		return allocCount;
3464	}
3465}
3466
3467/*
3468 * CONFIG_HFS_RBTREE
3469 * Check to see if the red-black tree is live.  Allocation file lock must be held
3470 * shared or exclusive to call this function. Note that we may call this even if
3471 * HFS is built without activating the red-black tree code.
3472 */
3473__private_extern__
3474int
3475hfs_isrbtree_active(struct hfsmount *hfsmp){
3476
3477#pragma unused (hfsmp)
3478
3479	/* Just return 0 for now */
3480	return 0;
3481}
3482
3483
3484
3485/* Summary Table Functions */
3486/*
3487 * hfs_check_summary:
3488 *
3489 * This function should be used to query the summary table to see if we can
3490 * bypass a bitmap block or not when we're trying to find a free allocation block.
3491 *
3492 *
3493 * Inputs:
3494 * 		allocblock - allocation block number. Will be used to infer the correct summary bit.
3495 * 		hfsmp -- filesystem in question.
3496 *
3497 * Output Arg:
3498 *		*freeblocks - set to 1 if we believe at least one free blocks in this vcbVBMIOSize
3499 * 		page of bitmap file.
3500 *
3501 *
3502 * Returns:
3503 * 		0 on success
3504 *		EINVAL on error
3505 *
3506 */
3507
3508static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t allocblock, uint32_t *freeblocks) {
3509
3510	int err = EINVAL;
3511	if (hfsmp->vcbVBMIOSize) {
3512		if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3513			uint32_t index;
3514			if (hfs_get_summary_index (hfsmp, allocblock, &index)) {
3515				*freeblocks = 0;
3516				return EINVAL;
3517			}
3518
3519			/* Ok, now that we have the bit index into the array, what byte is it in ? */
3520			uint32_t byteindex = index / kBitsPerByte;
3521			uint8_t current_byte = hfsmp->hfs_summary_table[byteindex];
3522			uint8_t bit_in_byte = index % kBitsPerByte;
3523
3524			if (current_byte & (1 << bit_in_byte)) {
3525				/*
3526				 * We do not believe there is anything free in the
3527				 * entire vcbVBMIOSize'd block.
3528				 */
3529				*freeblocks = 0;
3530			}
3531			else {
3532				/* Looks like there might be a free block here... */
3533				*freeblocks = 1;
3534			}
3535		}
3536		err = 0;
3537	}
3538
3539	return err;
3540}
3541
3542
3543#if 0
3544/*
3545 * hfs_get_next_summary
3546 *
3547 * From a given allocation block, jump to the allocation block at the start of the
3548 * next vcbVBMIOSize boundary.  This is useful when trying to quickly skip over
3549 * large swaths of bitmap once we have determined that the bitmap is relatively full.
3550 *
3551 * Inputs: hfsmount, starting allocation block number
3552 * Output Arg: *newblock will contain the allocation block number to start
3553 * querying.
3554 *
3555 * Returns:
3556 *		0 on success
3557 * 		EINVAL if the block argument is too large to be used, or the summary table not live.
3558 * 		EFBIG if there are no more summary bits to be queried
3559 */
3560static int
3561hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
3562
3563	u_int32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
3564	u_int32_t start_offset;
3565	u_int32_t next_offset;
3566	int err = EINVAL;
3567
3568	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3569		if ((err = hfs_get_summary_index(hfsmp, block, &start_offset))) {
3570			return err;
3571		}
3572
3573		next_offset = start_offset++;
3574
3575		if ((start_offset >= hfsmp->hfs_summary_size) || (next_offset >= hfsmp->hfs_summary_size)) {
3576			/* Can't jump to the next summary bit. */
3577			return EINVAL;
3578		}
3579
3580		/* Otherwise, compute and return */
3581		*newblock = next_offset * bits_per_iosize;
3582		if (*newblock >= hfsmp->totalBlocks) {
3583			return EINVAL;
3584		}
3585		err = 0;
3586	}
3587
3588	return err;
3589}
3590
3591#endif
3592
3593/*
3594 * hfs_release_summary
3595 *
3596 * Given an extent that is about to be de-allocated on-disk, determine the number
3597 * of summary bitmap bits that need to be marked as 'potentially available'.
3598 * Then go ahead and mark them as free.
3599 *
3600 *	Inputs:
3601 * 		hfsmp 		- hfs mount
3602 * 		block 		- starting allocation block.
3603 * 		length		- length of the extent.
3604 *
3605 * 	Returns:
3606 *		EINVAL upon any errors.
3607 */
3608static int hfs_release_summary(struct hfsmount *hfsmp, uint32_t start_blk, uint32_t length) {
3609	int err = EINVAL;
3610	uint32_t end_blk = (start_blk + length) - 1;
3611
3612	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3613		/* Figure out what the starting / ending block's summary bits are */
3614		uint32_t start_bit;
3615		uint32_t end_bit;
3616		uint32_t current_bit;
3617
3618		err = hfs_get_summary_index (hfsmp, start_blk, &start_bit);
3619		if (err) {
3620			goto release_err;
3621		}
3622		err = hfs_get_summary_index (hfsmp, end_blk, &end_bit);
3623		if (err) {
3624			goto release_err;
3625		}
3626
3627		if (ALLOC_DEBUG) {
3628			if (start_bit > end_bit) {
3629				panic ("HFS: start > end!, %d %d ", start_bit, end_bit);
3630			}
3631		}
3632		current_bit = start_bit;
3633		while (current_bit <= end_bit) {
3634			err = hfs_set_summary (hfsmp, current_bit, 0);
3635			current_bit++;
3636		}
3637	}
3638
3639release_err:
3640	return err;
3641}
3642
3643/*
3644 * hfs_find_summary_free
3645 *
3646 * Given a allocation block as input, returns an allocation block number as output as a
3647 * suggestion for where to start scanning the bitmap in order to find free blocks.  It will
3648 * determine the vcbVBMIOsize of the input allocation block, convert that into a summary
3649 * bit, then keep iterating over the summary bits in order to find the first free one.
3650 *
3651 * Inputs:
3652 *		hfsmp 		- hfs mount
3653 * 		block		- starting allocation block
3654 * 		newblock 	- output block as suggestion
3655 *
3656 * Returns:
3657 * 		0 on success
3658 * 		ENOSPC if we could not find a free block
3659 */
3660
3661int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block,  uint32_t *newblock) {
3662
3663	int err = ENOSPC;
3664	uint32_t bit_index = 0;
3665	uint32_t maybe_has_blocks = 0;
3666
3667	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3668		uint32_t byte_index;
3669		uint8_t curbyte;
3670		uint8_t bit_in_byte;
3671		uint32_t summary_cap;
3672
3673		/*
3674		 * We generate a cap for the summary search because the summary table
3675		 * always represents a full summary of the bitmap FILE, which may
3676		 * be way more bits than are necessary for the actual filesystem
3677		 * whose allocations are mapped by the bitmap.
3678		 *
3679		 * Compute how much of hfs_summary_size is useable for the given number
3680		 * of allocation blocks eligible on this FS.
3681		 */
3682		err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit, &summary_cap);
3683		if (err) {
3684			goto summary_exit;
3685		}
3686
3687		/* Check the starting block first */
3688		err = hfs_check_summary (hfsmp, block, &maybe_has_blocks);
3689		if (err) {
3690			goto summary_exit;
3691		}
3692
3693		if (maybe_has_blocks) {
3694			/*
3695			 * It looks like the initial start block could have something.
3696			 * Short-circuit and just use that.
3697			 */
3698			*newblock = block;
3699			goto summary_exit;
3700		}
3701
3702		/*
3703		 * OK, now we know that the first block was useless.
3704		 * Get the starting summary bit, and find it in the array
3705		 */
3706		maybe_has_blocks = 0;
3707		err = hfs_get_summary_index (hfsmp, block, &bit_index);
3708		if (err) {
3709			goto summary_exit;
3710		}
3711
3712		/* Iterate until we find something. */
3713		while (bit_index <= summary_cap) {
3714			byte_index = bit_index / kBitsPerByte;
3715			curbyte = hfsmp->hfs_summary_table[byte_index];
3716			bit_in_byte = bit_index % kBitsPerByte;
3717
3718			if (curbyte & (1 << bit_in_byte)) {
3719				/* nothing here.  increment and move on */
3720				bit_index++;
3721			}
3722			else {
3723				/*
3724				 * found something! convert bit_index back into
3725				 * an allocation block for use. 'newblock' will now
3726				 * contain the proper allocation block # based on the bit
3727				 * index.
3728				 */
3729				err = hfs_get_summary_allocblock (hfsmp, bit_index, newblock);
3730				if (err) {
3731					goto summary_exit;
3732				}
3733				maybe_has_blocks = 1;
3734				break;
3735			}
3736		}
3737
3738		/* If our loop didn't find anything, set err to ENOSPC */
3739		if (maybe_has_blocks == 0) {
3740			err = ENOSPC;
3741		}
3742	}
3743
3744	/* If the summary table is not active for this mount, we'll just return ENOSPC */
3745summary_exit:
3746	if (maybe_has_blocks) {
3747		err = 0;
3748	}
3749
3750	return err;
3751}
3752
3753/*
3754 * hfs_get_summary_allocblock
3755 *
3756 * Convert a summary bit into an allocation block number to use to start searching for free blocks.
3757 *
3758 * Inputs:
3759 *		hfsmp 			- hfs mount
3760 * 		summarybit 		- summmary bit index
3761 *		*alloc			- allocation block number in the bitmap file.
3762 *
3763 * Output:
3764 *		0 on success
3765 * 		EINVAL on failure
3766 */
3767int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t
3768		summarybit, uint32_t *alloc) {
3769	uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
3770	uint32_t allocblk;
3771
3772	allocblk = summarybit * bits_per_iosize;
3773
3774	if (allocblk >= hfsmp->totalBlocks) {
3775		return EINVAL;
3776	}
3777	else {
3778		*alloc = allocblk;
3779	}
3780
3781	return 0;
3782}
3783
3784
3785/*
3786 * hfs_set_summary:
3787 *
3788 * This function should be used to manipulate the summary table
3789 *
3790 * The argument 'inuse' will set the value of the bit in question to one or zero
3791 * depending on its value.
3792 *
3793 * Inputs:
3794 * 		hfsmp 		- hfs mount
3795 *		summarybit	- the bit index into the summary table to set/unset.
3796 * 		inuse		- the value to assign to the bit.
3797 *
3798 * Returns:
3799 * 		0 on success
3800 *		EINVAL on error
3801 *
3802 */
3803
3804static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse) {
3805
3806	int err = EINVAL;
3807	if (hfsmp->vcbVBMIOSize) {
3808		if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3809
3810			if (ALLOC_DEBUG) {
3811				if (hfsmp->hfs_summary_table == NULL) {
3812					panic ("hfs_set_summary: no table for %p ", hfsmp);
3813				}
3814			}
3815
3816			/* Ok, now that we have the bit index into the array, what byte is it in ? */
3817			uint32_t byte_index = summarybit / kBitsPerByte;
3818			uint8_t current_byte = hfsmp->hfs_summary_table[byte_index];
3819			uint8_t bit_in_byte = summarybit % kBitsPerByte;
3820
3821			if (inuse) {
3822				current_byte = (current_byte | (1 << bit_in_byte));
3823			}
3824			else {
3825				current_byte = (current_byte & ~(1 << bit_in_byte));
3826			}
3827
3828			hfsmp->hfs_summary_table[byte_index] = current_byte;
3829		}
3830		err = 0;
3831	}
3832
3833	return err;
3834}
3835
3836
3837/*
3838 * hfs_get_summary_index:
3839 *
3840 * This is a helper function which determines what summary bit represents the vcbVBMIOSize worth
3841 * of IO against the bitmap file.
3842 *
3843 * Returns:
3844 *		0 on success
3845 * 		EINVAL on failure
3846 */
3847static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t* index) {
3848	uint32_t summary_bit;
3849	uint32_t bits_per_iosize;
3850	int err = EINVAL;
3851
3852	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3853		/* Is the input block bigger than the total number of blocks? */
3854		if (block >= hfsmp->totalBlocks) {
3855			return EINVAL;
3856		}
3857
3858		/* Is there even a vbmIOSize set? */
3859		if (hfsmp->vcbVBMIOSize == 0) {
3860			return EINVAL;
3861		}
3862
3863		bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
3864
3865		summary_bit = block / bits_per_iosize;
3866
3867		*index = summary_bit;
3868		err = 0;
3869	}
3870
3871	return err;
3872}
3873
3874/*
3875 * hfs_init_summary
3876 *
3877 * From a given mount structure, compute how big the summary table should be for the given
3878 * filesystem, then allocate and bzero the memory.
3879 *
3880 * Returns:
3881 * 0 on success
3882 * EINVAL on failure
3883 */
3884int
3885hfs_init_summary (struct hfsmount *hfsmp) {
3886
3887	uint32_t summary_size;
3888	uint32_t summary_size_bytes;
3889	uint8_t *summary_table;
3890
3891	if (hfsmp->hfs_allocation_cp == NULL) {
3892		if (ALLOC_DEBUG) {
3893			printf("hfs: summary table cannot progress without a bitmap cnode! \n");
3894		}
3895		return EINVAL;
3896	}
3897	/*
3898	 * The practical maximum size of the summary table is 16KB:
3899	 *
3900	 *		(512MB maximum bitmap size / (4k -- min alloc block size)) / 8 bits/byte.
3901	 *
3902	 * HFS+ will allow filesystems with allocation block sizes smaller than 4k, but
3903	 * the end result is that we'll start to issue I/O in 2k or 1k sized chunks, which makes
3904	 * supporting this much worse.  The math would instead look like this:
3905	 * (512MB / 2k) / 8 == 32k.
3906	 *
3907	 * So, we will disallow the summary table if the allocation block size is < 4k.
3908	 */
3909
3910	if (hfsmp->blockSize < HFS_MIN_SUMMARY_BLOCKSIZE) {
3911		printf("hfs: summary table not allowed on FS with block size of %d\n", hfsmp->blockSize);
3912		return EINVAL;
3913	}
3914
3915	summary_size = hfsmp->hfs_allocation_cp->c_blocks;
3916
3917	if (ALLOC_DEBUG) {
3918		printf("HFS Summary Table Initialization: Bitmap %u blocks\n",
3919				hfsmp->hfs_allocation_cp->c_blocks);
3920	}
3921
3922	/*
3923	 * If the bitmap IO size is not the same as the allocation block size then
3924	 * then re-compute the number of summary bits necessary.  Note that above, the
3925	 * the default size is the number of allocation blocks in the bitmap *FILE*
3926	 * (not the number of bits in the bitmap itself).  If the allocation block size
3927	 * is large enough though, we may need to increase this.
3928	 */
3929	if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
3930		uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
3931		lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
3932
3933		/* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
3934		summary_size = (uint32_t) lrg_size;
3935	}
3936
3937	/*
3938	 * If the block size is the same as the IO Size, then the total number of blocks
3939	 * is already equal to the number of IO units, which is our number of summary bits.
3940	 */
3941
3942	summary_size_bytes = summary_size / kBitsPerByte;
3943	/* Always add one byte, just in case we have a dangling number of bits */
3944	summary_size_bytes++;
3945
3946	if (ALLOC_DEBUG) {
3947		printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, summary_size);
3948		printf("HFS Summary Table Size (in bytes) %d \n", summary_size_bytes);
3949	}
3950
3951	/* Store the field in the mount point, and then MALLOC/bzero the memory */
3952	hfsmp->hfs_summary_size = summary_size;
3953	hfsmp->hfs_summary_bytes = summary_size_bytes;
3954
3955	MALLOC (summary_table, uint8_t*, summary_size_bytes, M_TEMP, M_WAITOK);
3956	if (summary_table == NULL) {
3957		return ENOMEM;
3958	}
3959	bzero (summary_table, summary_size_bytes);
3960
3961	/* enable the summary table */
3962	hfsmp->hfs_flags |= HFS_SUMMARY_TABLE;
3963	hfsmp->hfs_summary_table = summary_table;
3964
3965	if (ALLOC_DEBUG) {
3966		if (hfsmp->hfs_summary_table == NULL) {
3967			panic ("HFS Summary Init: no table for %p\n", hfsmp);
3968		}
3969	}
3970	return 0;
3971}
3972
3973/*
3974 * hfs_rebuild_summary
3975 *
3976 * This function should be used to allocate a new hunk of memory for use as a summary
3977 * table, then copy the existing data into it.  We use it whenever the filesystem's size
3978 * changes.  When a resize is in progress, you can still use the extant summary
3979 * table if it is active.
3980 *
3981 * Inputs:
3982 * 		hfsmp 		-- FS in question
3983 * 		newlength	-- new length of the FS in allocation blocks.
3984 *
3985 * Outputs:
3986 *		0 on success, EINVAL on failure.  If this function fails,  the summary table
3987 * 		will be disabled for future use.
3988 *
3989 */
3990static int hfs_rebuild_summary (struct hfsmount *hfsmp) {
3991
3992	uint32_t new_summary_size;
3993
3994	new_summary_size = hfsmp->hfs_allocation_cp->c_blocks;
3995
3996
3997	if (ALLOC_DEBUG) {
3998		printf("HFS Summary Table Re-init: bitmap %u blocks\n", new_summary_size);
3999	}
4000
4001	/*
4002	 * If the bitmap IO size is not the same as the allocation block size, then re-compute
4003	 * the number of summary bits necessary.  Note that above, the default size is the number
4004	 * of allocation blocks in the bitmap *FILE* (not the number of bits that the bitmap manages).
4005	 * If the allocation block size is large enough though, we may need to increase this, as
4006	 * bitmap IO is capped at 64k per IO
4007	 */
4008	if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
4009		uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
4010		lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
4011
4012		/* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
4013		new_summary_size = (uint32_t) lrg_size;
4014	}
4015
4016	/*
4017	 * Ok, we have the new summary bitmap theoretical max size.  See if it's the same as
4018	 * what we've got already...
4019	 */
4020	if (new_summary_size != hfsmp->hfs_summary_size) {
4021		uint32_t summarybytes = new_summary_size / kBitsPerByte;
4022		uint32_t copysize;
4023		uint8_t *newtable;
4024		/* Add one byte for slop */
4025		summarybytes++;
4026
4027		if (ALLOC_DEBUG) {
4028			printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, new_summary_size);
4029			printf("HFS Summary Table Size (in bytes) %d \n", summarybytes);
4030		}
4031
4032		/* Attempt to MALLOC the memory */
4033		MALLOC (newtable, uint8_t*, summarybytes, M_TEMP, M_WAITOK);
4034		if (newtable == NULL) {
4035			/*
4036			 * ERROR!  We need to disable the table now
4037			 */
4038			FREE (hfsmp->hfs_summary_table, M_TEMP);
4039			hfsmp->hfs_summary_table = NULL;
4040			hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
4041			return EINVAL;
4042		}
4043		bzero (newtable, summarybytes);
4044
4045		/*
4046		 * The new table may be smaller than the old one. If this is true, then
4047		 * we can't copy the full size of the existing summary table into the new
4048		 * one.
4049		 *
4050		 * The converse is not an issue since we bzeroed the table above.
4051		 */
4052		copysize = hfsmp->hfs_summary_bytes;
4053		if (summarybytes < hfsmp->hfs_summary_bytes) {
4054			copysize = summarybytes;
4055		}
4056		memcpy (newtable, hfsmp->hfs_summary_table, copysize);
4057
4058		/* We're all good.  Destroy the old copy and update ptrs */
4059		FREE (hfsmp->hfs_summary_table, M_TEMP);
4060
4061		hfsmp->hfs_summary_table = newtable;
4062		hfsmp->hfs_summary_size = new_summary_size;
4063		hfsmp->hfs_summary_bytes = summarybytes;
4064	}
4065
4066	return 0;
4067}
4068
4069
4070#if ALLOC_DEBUG
4071/*
4072 * hfs_validate_summary
4073 *
4074 * Validation routine for the summary table.  Debug-only function.
4075 *
4076 * Bitmap lock must be held.
4077 *
4078 */
4079void hfs_validate_summary (struct hfsmount *hfsmp) {
4080	uint32_t i;
4081	int err;
4082
4083	/*
4084	 * Iterate over all of the bits in the summary table, and verify if
4085	 * there really are free blocks in the pages that we believe may
4086	 * may contain free blocks.
4087	 */
4088
4089	if (hfsmp->hfs_summary_table == NULL) {
4090		panic ("HFS Summary: No HFS summary table!");
4091	}
4092
4093	/* 131072 bits == 16384 bytes.  This is the theoretical max size of the summary table. we add 1 byte for slop */
4094	if (hfsmp->hfs_summary_size == 0 || hfsmp->hfs_summary_size > 131080) {
4095		panic("HFS Summary: Size is bad! %d", hfsmp->hfs_summary_size);
4096	}
4097
4098	if (hfsmp->vcbVBMIOSize == 0) {
4099		panic("HFS Summary: no VCB VBM IO Size !");
4100	}
4101
4102	printf("hfs: summary validation beginning on %s\n", hfsmp->vcbVN);
4103	printf("hfs: summary validation %d summary bits, %d summary blocks\n", hfsmp->hfs_summary_size, hfsmp->totalBlocks);
4104
4105
4106	/* iterate through all possible summary bits */
4107	for (i = 0; i < hfsmp->hfs_summary_size ; i++) {
4108
4109		uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4110		uint32_t byte_offset = hfsmp->vcbVBMIOSize * i;
4111
4112		/* Compute the corresponding allocation block for the summary bit. */
4113		uint32_t alloc_block = i * bits_per_iosize;
4114
4115		/*
4116		 * We use a uint32_t pointer here because it will speed up
4117		 * access to the real bitmap data on disk.
4118		 */
4119		uint32_t *block_data;
4120		struct buf *bp;
4121		int counter;
4122		int counter_max;
4123		int saw_free_bits = 0;
4124
4125		/* Get the block */
4126		if ((err = ReadBitmapRange (hfsmp, byte_offset, hfsmp->vcbVBMIOSize, &block_data,  &bp))) {
4127			panic ("HFS Summary: error (%d) in ReadBitmapRange!", err);
4128		}
4129
4130		/* Query the status of the bit and then make sure we match */
4131		uint32_t maybe_has_free_blocks;
4132		err = hfs_check_summary (hfsmp, alloc_block, &maybe_has_free_blocks);
4133		if (err) {
4134			panic ("HFS Summary: hfs_check_summary returned error (%d) ", err);
4135		}
4136		counter_max = hfsmp->vcbVBMIOSize / kBytesPerWord;
4137
4138		for (counter = 0; counter < counter_max; counter++) {
4139			uint32_t word = block_data[counter];
4140
4141			/* We assume that we'll not find any free bits here. */
4142			if (word != kAllBitsSetInWord) {
4143				if (maybe_has_free_blocks) {
4144					/* All done */
4145					saw_free_bits = 1;
4146					break;
4147				}
4148				else {
4149					panic ("HFS Summary: hfs_check_summary saw free bits!");
4150				}
4151			}
4152		}
4153
4154		if (maybe_has_free_blocks && (saw_free_bits == 0)) {
4155			panic ("HFS Summary: did not see free bits !");
4156		}
4157
4158		/* Release the block. */
4159		if ((err =  ReleaseScanBitmapRange (bp))) {
4160			panic ("HFS Summary: Error (%d) in ReleaseScanBitmapRange", err);
4161		}
4162	}
4163
4164	printf("hfs: summary validation completed successfully on %s\n", hfsmp->vcbVN);
4165
4166	return;
4167}
4168#endif
4169
4170/*
4171 * hfs_alloc_scan_range:
4172 *
4173 * This function should be used to scan large ranges of the allocation bitmap
4174 * at one time.  It makes two key assumptions:
4175 *
4176 * 		1) Bitmap lock is held during the duration of the call (exclusive)
4177 * 		2) There are no pages in the buffer cache for any of the bitmap
4178 * 		blocks that we may encounter.  It *MUST* be completely empty.
4179 *
4180 * The expected use case is when we are scanning the bitmap in full while we are
4181 * still mounting the filesystem in order to issue TRIMs or build up the summary
4182 * table for the mount point. It should be done after any potential journal replays
4183 * are completed and their I/Os fully issued.
4184 *
4185 * The key reason for assumption (2) above is that this function will try to issue
4186 * I/O against the bitmap file in chunks as large a possible -- essentially as
4187 * much as the buffer layer will handle (1MB).  Because the size of these I/Os
4188 * is larger than what would be expected during normal runtime we must invalidate
4189 * the buffers as soon as we are done with them so that they do not persist in
4190 * the buffer cache for other threads to find, as they'll typically be doing
4191 * allocation-block size I/Os instead.
4192 *
4193 * Input Args:
4194 *		hfsmp 		- hfs mount data structure
4195 * 		startbit 	- allocation block # to start our scan. It must be aligned
4196 *					on a vcbVBMIOsize boundary.
4197 *		list		- journal trim list data structure for issuing TRIMs
4198 *
4199 * Output Args:
4200 *		bitToScan 	- Return the next bit to scan if this function is called again.
4201 *					Caller will supply this into the next invocation
4202 *					of this call as 'startbit'.
4203 */
4204
4205static int hfs_alloc_scan_range(struct hfsmount *hfsmp, u_int32_t startbit,
4206		u_int32_t *bitToScan, struct jnl_trim_list *list) {
4207
4208	int error;
4209	int readwrite = 1;
4210	u_int32_t curAllocBlock;
4211	struct buf *blockRef = NULL;
4212	u_int32_t *buffer = NULL;
4213	u_int32_t free_offset = 0; //tracks the start of the current free range
4214	u_int32_t size = 0; // tracks the length of the current free range.
4215	u_int32_t iosize = 0; //how much io we should generate against the bitmap
4216	u_int32_t byte_off; // byte offset into the bitmap file.
4217	u_int32_t completed_size; // how much io was actually completed
4218	u_int32_t last_bitmap_block;
4219	u_int32_t current_word;
4220	u_int32_t word_index = 0;
4221
4222	/* summary table building */
4223	uint32_t summary_bit = 0;
4224	uint32_t saw_free_blocks = 0;
4225	uint32_t last_marked = 0;
4226
4227	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
4228		readwrite = 0;
4229	}
4230
4231	/*
4232	 * Compute how much I/O we should generate here.
4233	 * hfs_scan_range_size will validate that the start bit
4234	 * converted into a byte offset into the bitmap file,
4235	 * is aligned on a VBMIOSize boundary.
4236	 */
4237	error = hfs_scan_range_size (hfsmp, startbit, &iosize);
4238	if (error) {
4239		if (ALLOC_DEBUG) {
4240			panic ("hfs_alloc_scan_range: hfs_scan_range_size error %d\n", error);
4241		}
4242		return error;
4243	}
4244
4245	if (iosize < hfsmp->vcbVBMIOSize) {
4246		if (ALLOC_DEBUG) {
4247			panic ("hfs_alloc_scan_range: iosize too small! (iosize %d)\n", iosize);
4248		}
4249		return EINVAL;
4250	}
4251
4252	/* hfs_scan_range_size should have verified startbit.  Convert it to bytes */
4253	byte_off = startbit / kBitsPerByte;
4254
4255	/*
4256	 * When the journal replays blocks, it does so by writing directly to the disk
4257	 * device (bypassing any filesystem vnodes and such).  When it finishes its I/Os
4258	 * it also immediately re-reads and invalidates the range covered by the bp so
4259	 * it does not leave anything lingering in the cache (for iosize reasons).
4260	 *
4261	 * As such, it is safe to do large I/Os here with ReadBitmapRange.
4262	 *
4263	 * NOTE: It is not recommended, but it is possible to call the function below
4264	 * on sections of the bitmap that may be in core already as long as the pages are not
4265	 * dirty.  In that case, we'd notice that something starting at that
4266	 * logical block of the bitmap exists in the metadata cache, and we'd check
4267	 * if the iosize requested is the same as what was already allocated for it.
4268	 * Odds are pretty good we're going to request something larger.  In that case,
4269	 * we just free the existing memory associated with the buf and reallocate a
4270	 * larger range. This function should immediately invalidate it as soon as we're
4271	 * done scanning, so this shouldn't cause any coherency issues.
4272	 */
4273
4274	error = ReadBitmapRange(hfsmp, byte_off, iosize, &buffer, &blockRef);
4275	if (error) {
4276		if (ALLOC_DEBUG) {
4277			panic ("hfs_alloc_scan_range: start %d iosize %d ReadBitmapRange error %d\n", startbit, iosize, error);
4278		}
4279		return error;
4280	}
4281
4282	/*
4283	 * At this point, we have a giant wired buffer that represents some portion of
4284	 * the bitmap file that we want to analyze.   We may not have gotten all 'iosize'
4285	 * bytes though, so clip our ending bit to what we actually read in.
4286	 */
4287	completed_size = buf_count(blockRef);
4288	last_bitmap_block = completed_size * kBitsPerByte;
4289	last_bitmap_block = last_bitmap_block + startbit;
4290
4291	/* Cap the last block to the total number of blocks if required */
4292	if (last_bitmap_block > hfsmp->totalBlocks) {
4293		last_bitmap_block = hfsmp->totalBlocks;
4294	}
4295
4296	/* curAllocBlock represents the logical block we're analyzing. */
4297	curAllocBlock = startbit;
4298	word_index = 0;
4299	size = 0;
4300
4301	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4302		if (hfs_get_summary_index (hfsmp, startbit, &summary_bit)) {
4303			error = EINVAL;
4304			if (ALLOC_DEBUG) {
4305				panic ("hfs_alloc_scan_range: Could not acquire summary index for %u", startbit);
4306			}
4307			return error;
4308		}
4309		/*
4310		 * summary_bit should now be set to the summary bit corresponding to
4311		 * the allocation block of the first bit that we're supposed to scan
4312		 */
4313	}
4314	saw_free_blocks = 0;
4315
4316	while (curAllocBlock < last_bitmap_block) {
4317		u_int32_t bit;
4318
4319		/* Update the summary table as needed */
4320		if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4321			if (ALLOC_DEBUG) {
4322				if (hfsmp->hfs_summary_table == NULL) {
4323					panic ("hfs_alloc_scan_range: no summary table!");
4324				}
4325			}
4326
4327			uint32_t temp_summary;
4328			error = hfs_get_summary_index (hfsmp, curAllocBlock, &temp_summary);
4329			if (error) {
4330				if (ALLOC_DEBUG) {
4331					panic ("hfs_alloc_scan_range: could not get summary index for %u", curAllocBlock);
4332				}
4333				return EINVAL;
4334			}
4335
4336			if (ALLOC_DEBUG) {
4337				if (temp_summary < summary_bit) {
4338					panic ("hfs_alloc_scan_range: backwards summary bit?\n");
4339				}
4340			}
4341
4342			/*
4343			 * If temp_summary is greater than summary_bit, then this
4344			 * means that the next allocation block crosses a vcbVBMIOSize boundary
4345			 * and we should treat this range of on-disk data as part of a new summary
4346			 * bit.
4347			 */
4348			if (temp_summary > summary_bit) {
4349				if (saw_free_blocks == 0) {
4350					/* Mark the bit as totally consumed in the summary table */
4351					hfs_set_summary (hfsmp, summary_bit, 1);
4352				}
4353				else {
4354					/* Mark the bit as potentially free in summary table */
4355					hfs_set_summary (hfsmp, summary_bit, 0);
4356				}
4357				last_marked = summary_bit;
4358				/*
4359				 * Any time we set the summary table, update our counter which tracks
4360				 * what the last bit that was fully marked in the summary table.
4361				 *
4362				 * Then reset our marker which says we haven't seen a free bit yet.
4363				 */
4364				saw_free_blocks = 0;
4365				summary_bit = temp_summary;
4366			}
4367		} /* End summary table conditions */
4368
4369		current_word = SWAP_BE32(buffer[word_index]);
4370		/* Iterate through the word 1 bit at a time... */
4371		for (bit = 0 ; bit < kBitsPerWord ; bit++, curAllocBlock++) {
4372			if (curAllocBlock >= last_bitmap_block) {
4373				break;
4374			}
4375			u_int32_t allocated = (current_word & (kHighBitInWordMask >> bit));
4376
4377			if (allocated) {
4378				if (size != 0) {
4379					if (readwrite) {
4380						/* Insert the previously tracked range of free blocks to the trim list */
4381						hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
4382					}
4383					add_free_extent_cache (hfsmp, free_offset, size);
4384					size = 0;
4385					free_offset = 0;
4386				}
4387			}
4388			else {
4389				/* Not allocated */
4390				size++;
4391				if (free_offset == 0) {
4392					/* Start a new run of free spcae at curAllocBlock */
4393					free_offset = curAllocBlock;
4394				}
4395				if (saw_free_blocks == 0) {
4396					saw_free_blocks = 1;
4397				}
4398			}
4399		} /* end for loop iterating through the word */
4400
4401		if (curAllocBlock < last_bitmap_block) {
4402			word_index++;
4403		}
4404
4405	} /* End while loop (iterates through last_bitmap_block) */
4406
4407
4408	/*
4409	 * We've (potentially) completed our pass through this region of bitmap,
4410	 * but one thing we may not have done is updated that last summary bit for
4411	 * the last page we scanned, because we would have never transitioned across
4412	 * a vcbVBMIOSize boundary again.  Check for that and update the last bit
4413	 * as needed.
4414	 *
4415	 * Note that 'last_bitmap_block' is *not* inclusive WRT the very last bit in the bitmap
4416	 * for the region of bitmap on-disk that we were scanning. (it is one greater).
4417	 */
4418	if ((curAllocBlock >= last_bitmap_block) &&
4419			(hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
4420		uint32_t temp_summary;
4421		/* temp_block should be INSIDE the region we just scanned, so subtract 1 */
4422		uint32_t temp_block = last_bitmap_block - 1;
4423		error = hfs_get_summary_index (hfsmp, temp_block, &temp_summary);
4424		if (error) {
4425			if (ALLOC_DEBUG) {
4426				panic ("hfs_alloc_scan_range: end bit curAllocBlock %u, last_bitmap_block %u", curAllocBlock, last_bitmap_block);
4427			}
4428			return EINVAL;
4429		}
4430
4431		/* Did we already update this in the table? */
4432		if (temp_summary > last_marked) {
4433			if (saw_free_blocks == 0) {
4434				hfs_set_summary (hfsmp, temp_summary, 1);
4435			}
4436			else {
4437				hfs_set_summary (hfsmp, temp_summary, 0);
4438			}
4439		}
4440	}
4441
4442	/*
4443	 * We may have been tracking a range of free blocks that hasn't been inserted yet.
4444	 * Keep the logic for the TRIM and free extent separate from that of the summary
4445	 * table management even though they are closely linked.
4446	 */
4447	if (size != 0) {
4448		if (readwrite) {
4449			hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
4450		}
4451		add_free_extent_cache (hfsmp, free_offset, size);
4452	}
4453
4454	/*
4455	 * curAllocBlock represents the next block we need to scan when we return
4456	 * to this function.
4457	 */
4458	*bitToScan = curAllocBlock;
4459	ReleaseScanBitmapRange(blockRef);
4460
4461	return 0;
4462
4463}
4464
4465
4466
4467/*
4468 * Compute the maximum I/O size to generate against the bitmap file
4469 * Will attempt to generate at LEAST VBMIOsize I/Os for interior ranges of the bitmap.
4470 *
4471 * Inputs:
4472 *		hfsmp		-- hfsmount to look at
4473 *		bitmap_off 	-- bit offset into the bitmap file
4474 *
4475 * Outputs:
4476 * 		iosize	-- iosize to generate.
4477 *
4478 * Returns:
4479 *		0 on success; EINVAL otherwise
4480 */
4481static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint32_t *iosize) {
4482
4483	/*
4484	 * The maximum bitmap size is 512MB regardless of ABN size, so we can get away
4485	 * with 32 bit math in this function.
4486	 */
4487
4488	uint32_t bitmap_len;
4489	uint32_t remaining_bitmap;
4490	uint32_t target_iosize;
4491	uint32_t bitmap_off;
4492
4493	/* Is this bit index not word aligned?  If so, immediately fail. */
4494	if (bitmap_st % kBitsPerWord) {
4495		if (ALLOC_DEBUG) {
4496			panic ("hfs_scan_range_size unaligned start bit! bitmap_st %d \n", bitmap_st);
4497		}
4498		return EINVAL;
4499	}
4500
4501	/* bitmap_off is in bytes, not allocation blocks/bits */
4502	bitmap_off = bitmap_st / kBitsPerByte;
4503
4504	if ((hfsmp->totalBlocks <= bitmap_st) || (bitmap_off > (512 * 1024 * 1024))) {
4505		if (ALLOC_DEBUG) {
4506			panic ("hfs_scan_range_size: invalid start! bitmap_st %d, bitmap_off %d\n", bitmap_st, bitmap_off);
4507		}
4508		return EINVAL;
4509	}
4510
4511	/*
4512	 * Also invalid if it's not at least aligned to HFS bitmap logical
4513	 * block boundaries.  We don't have to emit an iosize that's an
4514	 * exact multiple of the VBMIOSize, but it must start on such
4515	 * a boundary.
4516	 *
4517	 * The vcbVBMIOSize may be SMALLER than the allocation block size
4518	 * on a FS with giant allocation blocks, but it will never be
4519	 * greater than it, so it should be safe to start I/O
4520	 * aligned on a VBMIOsize boundary.
4521	 */
4522	if (bitmap_off & (hfsmp->vcbVBMIOSize - 1)) {
4523		if (ALLOC_DEBUG) {
4524			panic ("hfs_scan_range_size: unaligned start! bitmap_off %d\n", bitmap_off);
4525		}
4526		return EINVAL;
4527	}
4528
4529	/*
4530	 * Generate the total bitmap file length in bytes, then round up
4531	 * that value to the end of the last allocation block, if needed (It
4532	 * will probably be needed).  We won't scan past the last actual
4533	 * allocation block.
4534	 *
4535	 * Unless we're completing the bitmap scan (or bitmap < 1MB), we
4536	 * have to complete the I/O on VBMIOSize boundaries, but we can only read
4537	 * up until the end of the bitmap file.
4538	 */
4539	bitmap_len = hfsmp->totalBlocks / kBitsPerByte;
4540	if (bitmap_len % (hfsmp->blockSize)) {
4541		bitmap_len = (bitmap_len / hfsmp->blockSize);
4542		/* round up to the end of the next alloc block */
4543		bitmap_len++;
4544
4545		/* Convert the # of alloc blocks back to bytes. */
4546		bitmap_len = bitmap_len * hfsmp->blockSize;
4547	}
4548
4549	remaining_bitmap = bitmap_len - bitmap_off;
4550
4551	/*
4552	 * io size is the MIN of the maximum I/O we can generate or the
4553	 * remaining amount of bitmap.
4554	 */
4555	target_iosize = MIN((MAXBSIZE), remaining_bitmap);
4556	*iosize = target_iosize;
4557
4558	return 0;
4559}
4560
4561
4562
4563
4564/*
4565 * This function is basically the same as hfs_isallocated, except it's designed for
4566 * use with the red-black tree validation code.  It assumes we're only checking whether
4567 * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
4568 * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
4569 *
4570 * This should not be called in general purpose scanning code.
4571 */
4572int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
4573
4574	u_int32_t  *currentWord;   // Pointer to current word within bitmap block
4575	u_int32_t  bitMask;        // Word with given bits already set (ready to test)
4576	u_int32_t  firstBit;       // Bit index within word of first bit to allocate
4577	u_int32_t  numBits;        // Number of bits in word to allocate
4578	u_int32_t  bitsPerBlock;
4579	uintptr_t  blockRef;
4580	u_int32_t  wordsPerBlock;
4581	u_int32_t  numBlocks = 1;
4582	u_int32_t  *buffer = NULL;
4583
4584	int  inuse = 0;
4585	int error;
4586
4587
4588	if (bp_buf) {
4589		/* just use passed-in buffer if avail. */
4590		buffer = bp_buf;
4591	}
4592	else {
4593		/*
4594		 * Pre-read the bitmap block containing the first word of allocation
4595		 */
4596		error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
4597		if (error)
4598			return (error);
4599	}
4600
4601	/*
4602	 * Initialize currentWord, and wordsLeft.
4603	 */
4604	u_int32_t wordIndexInBlock;
4605
4606	bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
4607	wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
4608
4609	wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
4610	currentWord = buffer + wordIndexInBlock;
4611
4612	/*
4613	 * First test any non word aligned bits.
4614	 */
4615	firstBit = startingBlock % kBitsPerWord;
4616	bitMask = kAllBitsSetInWord >> firstBit;
4617	numBits = kBitsPerWord - firstBit;
4618	if (numBits > numBlocks) {
4619		numBits = numBlocks;
4620		bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
4621	}
4622	if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
4623		inuse = 1;
4624		goto Exit;
4625	}
4626	numBlocks -= numBits;
4627	++currentWord;
4628
4629Exit:
4630	if(bp_buf == NULL) {
4631		if (buffer) {
4632			(void)ReleaseBitmapBlock(hfsmp, blockRef, false);
4633		}
4634	}
4635	return (inuse);
4636
4637
4638
4639}
4640
4641/*
4642 * This function resets all of the data structures relevant to the
4643 * free extent cache stored in the hfsmount struct.
4644 *
4645 * If we are using the red-black tree code then we need to account for the fact that
4646 * we may encounter situations where we need to jettison the tree.  If that is the
4647 * case, then we fail-over to the bitmap scanning logic, but we need to ensure that
4648 * the free ext cache is zeroed before we start using it.
4649 *
4650 * We also reset and disable the cache when allocLimit is updated... which
4651 * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()).
4652 * It is independent of the type of allocator being used currently.
4653 */
4654void ResetVCBFreeExtCache(struct hfsmount *hfsmp)
4655{
4656	int bytes;
4657	void *freeExt;
4658
4659	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4660		KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);
4661
4662	lck_spin_lock(&hfsmp->vcbFreeExtLock);
4663
4664	/* reset Free Extent Count */
4665	hfsmp->vcbFreeExtCnt = 0;
4666
4667	/* reset the actual array */
4668	bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
4669	freeExt = (void*)(hfsmp->vcbFreeExt);
4670
4671	bzero (freeExt, bytes);
4672
4673	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4674
4675	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4676		KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);
4677
4678	return;
4679}
4680
4681/*
4682 * This function is used to inform the allocator if we have to effectively shrink
4683 * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs.
4684 *
4685 * The bitmap lock must be held when calling this function.  This function also modifies the
4686 * allocLimit field in the hfs mount point structure in the general case.
4687 *
4688 * In the shrinking case, we'll have to remove all free extents from the red-black
4689 * tree past the specified offset new_end_block.  In the growth case, we'll have to force
4690 * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
4691 *
4692 * new_end_block represents the total number of blocks available for allocation in the resized
4693 * filesystem.  Block #new_end_block should not be allocatable in the resized filesystem since it
4694 * will be out of the (0, n-1) range that are indexable in the bitmap.
4695 *
4696 * Returns	0 on success
4697 *			errno on failure
4698 */
4699__private_extern__
4700u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
4701
4702	/*
4703	 * Update allocLimit to the argument specified
4704	 */
4705	hfsmp->allocLimit = new_end_block;
4706
4707	/* Invalidate the free extent cache completely so that
4708	 * it does not have any extents beyond end of current
4709	 * volume.
4710	 */
4711	ResetVCBFreeExtCache(hfsmp);
4712
4713	/* Force a rebuild of the summary table. */
4714	(void) hfs_rebuild_summary (hfsmp);
4715
4716	return 0;
4717
4718}
4719
4720
4721/*
4722 * Remove an extent from the list of free extents.
4723 *
4724 * This is a low-level routine.	 It does not handle overlaps or splitting;
4725 * that is the responsibility of the caller.  The input extent must exactly
4726 * match an extent already in the list; it will be removed, and any following
4727 * extents in the list will be shifted up.
4728 *
4729 * Inputs:
4730 *	startBlock - Start of extent to remove
4731 *	blockCount - Number of blocks in extent to remove
4732 *
4733 * Result:
4734 *	The index of the extent that was removed.
4735 */
4736static void remove_free_extent_list(struct hfsmount *hfsmp, int index)
4737{
4738	if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) {
4739		if (ALLOC_DEBUG)
4740			panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
4741		else
4742			printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
4743		return;
4744	}
4745	int shift_count = hfsmp->vcbFreeExtCnt - index - 1;
4746	if (shift_count > 0) {
4747		memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
4748	}
4749	hfsmp->vcbFreeExtCnt--;
4750}
4751
4752
4753/*
4754 * Add an extent to the list of free extents.
4755 *
4756 * This is a low-level routine.	 It does not handle overlaps or coalescing;
4757 * that is the responsibility of the caller.  This routine *does* make
4758 * sure that the extent it is adding is inserted in the correct location.
4759 * If the list is full, this routine will handle either removing the last
4760 * extent in the list to make room for the new extent, or ignoring the
4761 * new extent if it is "worse" than the last extent in the list.
4762 *
4763 * Inputs:
4764 *	startBlock - Start of extent to add
4765 *	blockCount - Number of blocks in extent to add
4766 *
4767 * Result:
4768 *	The index where the extent that was inserted, or kMaxFreeExtents
4769 *	if the extent was not inserted (the list was full, and the extent
4770 *	being added was "worse" than everything in the list).
4771 */
4772static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4773{
4774	uint32_t i;
4775
4776	/* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
4777	if (ALLOC_DEBUG) {
4778		uint32_t endBlock = startBlock + blockCount;
4779		for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4780			if (endBlock < hfsmp->vcbFreeExt[i].startBlock ||
4781					startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) {
4782				continue;
4783			}
4784			panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d",
4785					hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i);
4786		}
4787	}
4788
4789	/* Figure out what index the new extent should be inserted at. */
4790	for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4791		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4792			/* The list is sorted by increasing offset. */
4793			if (startBlock < hfsmp->vcbFreeExt[i].startBlock) {
4794				break;
4795			}
4796		} else {
4797			/* The list is sorted by decreasing size. */
4798			if (blockCount > hfsmp->vcbFreeExt[i].blockCount) {
4799				break;
4800			}
4801		}
4802	}
4803
4804	/* When we get here, i is the index where the extent should be inserted. */
4805	if (i == kMaxFreeExtents) {
4806		/*
4807		 * The new extent is worse than anything already in the list,
4808		 * and the list is full, so just ignore the extent to be added.
4809		 */
4810		return i;
4811	}
4812
4813	/*
4814	 * Grow the list (if possible) to make room for an insert.
4815	 */
4816	if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents)
4817		hfsmp->vcbFreeExtCnt++;
4818
4819	/*
4820	 * If we'll be keeping any extents after the insert position, then shift them.
4821	 */
4822	int shift_count = hfsmp->vcbFreeExtCnt - i - 1;
4823	if (shift_count > 0) {
4824		memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
4825	}
4826
4827	/* Finally, store the new extent at its correct position. */
4828	hfsmp->vcbFreeExt[i].startBlock = startBlock;
4829	hfsmp->vcbFreeExt[i].blockCount = blockCount;
4830	return i;
4831}
4832
4833
4834/*
4835 * Remove an entry from free extent cache after it has been allocated.
4836 *
4837 * This is a high-level routine.  It handles removing a portion of a
4838 * cached extent, potentially splitting it into two (if the cache was
4839 * already full, throwing away the extent that would sort last).  It
4840 * also handles removing an extent that overlaps multiple extents in
4841 * the cache.
4842 *
4843 * Inputs:
4844 *	hfsmp		- mount point structure
4845 *	startBlock	- starting block of the extent to be removed.
4846 *	blockCount	- number of blocks of the extent to be removed.
4847 */
4848static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4849{
4850	u_int32_t i, insertedIndex;
4851	u_int32_t currentStart, currentEnd, endBlock;
4852	int extentsRemoved = 0;
4853
4854	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4855		KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
4856
4857	endBlock = startBlock + blockCount;
4858
4859	lck_spin_lock(&hfsmp->vcbFreeExtLock);
4860
4861	/*
4862	 * Iterate over all of the extents in the free extent cache, removing or
4863	 * updating any entries that overlap with the input extent.
4864	 */
4865	for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4866		currentStart = hfsmp->vcbFreeExt[i].startBlock;
4867		currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount;
4868
4869		/*
4870		 * If the current extent is entirely before or entirely after the
4871		 * the extent to be removed, then we keep it as-is.
4872		 */
4873		if (currentEnd <= startBlock || currentStart >= endBlock) {
4874			continue;
4875		}
4876
4877		/*
4878		 * If the extent being removed entirely contains the current extent,
4879		 * then remove the current extent.
4880		 */
4881		if (startBlock <= currentStart && endBlock >= currentEnd) {
4882			remove_free_extent_list(hfsmp, i);
4883
4884			/*
4885			 * We just removed the extent at index i.  The extent at
4886			 * index i+1 just got shifted to index i.  So decrement i
4887			 * to undo the loop's "++i", and the next iteration will
4888			 * examine index i again, which contains the next extent
4889			 * in the list.
4890			 */
4891			--i;
4892			++extentsRemoved;
4893			continue;
4894		}
4895
4896		/*
4897		 * If the extent being removed is strictly "in the middle" of the
4898		 * current extent, then we need to split the current extent into
4899		 * two discontiguous extents (the "head" and "tail").  The good
4900		 * news is that we don't need to examine any other extents in
4901		 * the list.
4902		 */
4903		if (startBlock > currentStart && endBlock < currentEnd) {
4904			remove_free_extent_list(hfsmp, i);
4905			add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
4906			add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
4907			break;
4908		}
4909
4910		/*
4911		 * The only remaining possibility is that the extent to be removed
4912		 * overlaps the start or end (but not both!) of the current extent.
4913		 * So we need to replace the current extent with a shorter one.
4914		 *
4915		 * The only tricky part is that the updated extent might be at a
4916		 * different index than the original extent.  If the updated extent
4917		 * was inserted after the current extent, then we need to re-examine
4918		 * the entry at index i, since it now contains the extent that was
4919		 * previously at index i+1.	 If the updated extent was inserted
4920		 * before or at the same index as the removed extent, then the
4921		 * following extents haven't changed position.
4922		 */
4923		remove_free_extent_list(hfsmp, i);
4924		if (startBlock > currentStart) {
4925			/* Remove the tail of the current extent. */
4926			insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
4927		} else {
4928			/* Remove the head of the current extent. */
4929			insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
4930		}
4931		if (insertedIndex > i) {
4932			--i;	/* Undo the "++i" in the loop, so we examine the entry at index i again. */
4933		}
4934	}
4935
4936	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4937
4938	sanity_check_free_ext(hfsmp, 0);
4939
4940	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4941		KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
4942
4943	return;
4944}
4945
4946
4947/*
4948 * Add an entry to free extent cache after it has been deallocated.
4949 *
4950 * This is a high-level routine.  It will merge overlapping or contiguous
4951 * extents into a single, larger extent.
4952 *
4953 * If the extent provided has blocks beyond current allocLimit, it is
4954 * clipped to allocLimit (so that we won't accidentally find and allocate
4955 * space beyond allocLimit).
4956 *
4957 * Inputs:
4958 *	hfsmp		- mount point structure
4959 *	startBlock	- starting block of the extent to be removed.
4960 *	blockCount	- number of blocks of the extent to be removed.
4961 *
4962 * Returns:
4963 *	true		- if the extent was added successfully to the list
4964 *	false		- if the extent was not added to the list, maybe because
4965 *			  the extent was beyond allocLimit, or is not best
4966 *			  candidate to be put in the cache.
4967 */
4968static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4969{
4970	Boolean retval = false;
4971	uint32_t endBlock;
4972	uint32_t currentEnd;
4973	uint32_t i;
4974
4975	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4976		KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
4977
4978	/* No need to add extent that is beyond current allocLimit */
4979	if (startBlock >= hfsmp->allocLimit) {
4980		goto out_not_locked;
4981	}
4982
4983	/* If end of the free extent is beyond current allocLimit, clip the extent */
4984	if ((startBlock + blockCount) > hfsmp->allocLimit) {
4985		blockCount = hfsmp->allocLimit - startBlock;
4986	}
4987
4988	lck_spin_lock(&hfsmp->vcbFreeExtLock);
4989
4990	/*
4991	 * Make a pass through the free extent cache, looking for known extents that
4992	 * overlap or are contiguous with the extent to be added.  We'll remove those
4993	 * extents from the cache, and incorporate them into the new extent to be added.
4994	 */
4995	endBlock = startBlock + blockCount;
4996	for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) {
4997		currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount;
4998		if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) {
4999			/* Extent i does not overlap and is not contiguous, so keep it. */
5000			continue;
5001		} else {
5002			/* We need to remove extent i and combine it with the input extent. */
5003			if (hfsmp->vcbFreeExt[i].startBlock < startBlock)
5004				startBlock = hfsmp->vcbFreeExt[i].startBlock;
5005			if (currentEnd > endBlock)
5006				endBlock = currentEnd;
5007
5008			remove_free_extent_list(hfsmp, i);
5009			/*
5010			 * We just removed the extent at index i.  The extent at
5011			 * index i+1 just got shifted to index i.  So decrement i
5012			 * to undo the loop's "++i", and the next iteration will
5013			 * examine index i again, which contains the next extent
5014			 * in the list.
5015			 */
5016			--i;
5017		}
5018	}
5019	add_free_extent_list(hfsmp, startBlock, endBlock - startBlock);
5020
5021	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5022
5023out_not_locked:
5024	sanity_check_free_ext(hfsmp, 0);
5025
5026	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5027		KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
5028
5029	return retval;
5030}
5031
5032/* Debug function to check if the free extent cache is good or not */
5033static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
5034{
5035	u_int32_t i, j;
5036
5037	/* Do not do anything if debug is not on */
5038	if (ALLOC_DEBUG == 0) {
5039		return;
5040	}
5041
5042	lck_spin_lock(&hfsmp->vcbFreeExtLock);
5043
5044	if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents)
5045		panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt);
5046
5047	/*
5048	 * Iterate the Free extent cache and ensure no entries are bogus or refer to
5049	 * allocated blocks.
5050	 */
5051	for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
5052		u_int32_t start, nblocks;
5053
5054		start   = hfsmp->vcbFreeExt[i].startBlock;
5055		nblocks = hfsmp->vcbFreeExt[i].blockCount;
5056
5057		/* Check if any of the blocks in free extent cache are allocated.
5058		 * This should not be enabled always because it might take
5059		 * very long for large extents that get added to the list.
5060		 *
5061		 * We have to drop vcbFreeExtLock while we call hfs_isallocated
5062		 * because it is going to do I/O.  Note that the free extent
5063		 * cache could change.  That's a risk we take when using this
5064		 * debugging code.  (Another alternative would be to try to
5065		 * detect when the free extent cache changed, and perhaps
5066		 * restart if the list changed while we dropped the lock.)
5067		 */
5068		if (check_allocated) {
5069			lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5070			if (hfs_isallocated(hfsmp, start, nblocks)) {
5071				panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
5072						hfsmp, i, start, nblocks);
5073			}
5074			lck_spin_lock(&hfsmp->vcbFreeExtLock);
5075		}
5076
5077		/* Check if any part of the extent is beyond allocLimit */
5078		if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
5079			panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
5080					hfsmp, i, start, nblocks, hfsmp->allocLimit);
5081		}
5082
5083		/* Check if there are any duplicate start blocks */
5084		for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
5085			if (start == hfsmp->vcbFreeExt[j].startBlock) {
5086				panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n",
5087						hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock,
5088						hfsmp->vcbFreeExt[j].blockCount);
5089			}
5090		}
5091
5092		/* Check if the entries are out of order */
5093		if ((i+1) != hfsmp->vcbFreeExtCnt) {
5094			if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
5095				/* sparse devices are sorted by starting block number (ascending) */
5096				if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
5097					panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n",
5098							hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
5099							hfsmp->vcbFreeExt[i+1].blockCount);
5100				}
5101			} else {
5102				/* normally sorted by block count (descending) */
5103				if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
5104					panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n",
5105							hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
5106							hfsmp->vcbFreeExt[i+1].blockCount);
5107				}
5108			}
5109		}
5110	}
5111	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5112}
5113
5114