1/*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*	@(#)hfs_vfsutils.c	4.0
29*
30*	(c) 1997-2002 Apple Computer, Inc.  All Rights Reserved
31*
32*	hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
33*
34*/
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/malloc.h>
39#include <sys/stat.h>
40#include <sys/mount.h>
41#include <sys/mount_internal.h>
42#include <sys/buf.h>
43#include <sys/buf_internal.h>
44#include <sys/ubc.h>
45#include <sys/unistd.h>
46#include <sys/utfconv.h>
47#include <sys/kauth.h>
48#include <sys/fcntl.h>
49#include <sys/fsctl.h>
50#include <sys/vnode_internal.h>
51#include <kern/clock.h>
52#include <stdbool.h>
53
54#include <libkern/OSAtomic.h>
55
56/* for parsing boot-args */
57#include <pexpert/pexpert.h>
58
59#if CONFIG_PROTECT
60#include <sys/cprotect.h>
61#endif
62
63#include "hfs.h"
64#include "hfs_catalog.h"
65#include "hfs_dbg.h"
66#include "hfs_mount.h"
67#include "hfs_endian.h"
68#include "hfs_cnode.h"
69#include "hfs_fsctl.h"
70
71#include "hfscommon/headers/FileMgrInternal.h"
72#include "hfscommon/headers/BTreesInternal.h"
73#include "hfscommon/headers/HFSUnicodeWrappers.h"
74
75/* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */
76extern int hfs_resize_debug;
77
78static void ReleaseMetaFileVNode(struct vnode *vp);
79static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
80
81static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
82static void hfs_thaw_locked(struct hfsmount *hfsmp);
83
84#define HFS_MOUNT_DEBUG 1
85
86
87//*******************************************************************************
88// Note: Finder information in the HFS/HFS+ metadata are considered opaque and
89//       hence are not in the right byte order on little endian machines. It is
90//       the responsibility of the finder and other clients to swap the data.
91//*******************************************************************************
92
93//*******************************************************************************
94//	Routine:	hfs_MountHFSVolume
95//
96//
97//*******************************************************************************
98unsigned char hfs_catname[] = "Catalog B-tree";
99unsigned char hfs_extname[] = "Extents B-tree";
100unsigned char hfs_vbmname[] = "Volume Bitmap";
101unsigned char hfs_attrname[] = "Attribute B-tree";
102unsigned char hfs_startupname[] = "Startup File";
103
104#if CONFIG_HFS_STD
105OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
106		__unused struct proc *p)
107{
108	ExtendedVCB *vcb = HFSTOVCB(hfsmp);
109	int error;
110	ByteCount utf8chars;
111	struct cat_desc cndesc;
112	struct cat_attr cnattr;
113	struct cat_fork fork;
114	int newvnode_flags = 0;
115
116	/* Block size must be a multiple of 512 */
117	if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
118	    (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
119		return (EINVAL);
120
121	/* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
122	if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
123	    ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
124		return (EINVAL);
125	}
126	hfsmp->hfs_flags |= HFS_STANDARD;
127	/*
128	 * The MDB seems OK: transfer info from it into VCB
129	 * Note - the VCB starts out clear (all zeros)
130	 *
131	 */
132	vcb->vcbSigWord		= SWAP_BE16 (mdb->drSigWord);
133	vcb->hfs_itime		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
134	vcb->localCreateDate	= SWAP_BE32 (mdb->drCrDate);
135	vcb->vcbLsMod		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
136	vcb->vcbAtrb		= SWAP_BE16 (mdb->drAtrb);
137	vcb->vcbNmFls		= SWAP_BE16 (mdb->drNmFls);
138	vcb->vcbVBMSt		= SWAP_BE16 (mdb->drVBMSt);
139	vcb->nextAllocation	= SWAP_BE16 (mdb->drAllocPtr);
140	vcb->totalBlocks	= SWAP_BE16 (mdb->drNmAlBlks);
141	vcb->allocLimit		= vcb->totalBlocks;
142	vcb->blockSize		= SWAP_BE32 (mdb->drAlBlkSiz);
143	vcb->vcbClpSiz		= SWAP_BE32 (mdb->drClpSiz);
144	vcb->vcbAlBlSt		= SWAP_BE16 (mdb->drAlBlSt);
145	vcb->vcbNxtCNID		= SWAP_BE32 (mdb->drNxtCNID);
146	vcb->freeBlocks		= SWAP_BE16 (mdb->drFreeBks);
147	vcb->vcbVolBkUp		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
148	vcb->vcbWrCnt		= SWAP_BE32 (mdb->drWrCnt);
149	vcb->vcbNmRtDirs	= SWAP_BE16 (mdb->drNmRtDirs);
150	vcb->vcbFilCnt		= SWAP_BE32 (mdb->drFilCnt);
151	vcb->vcbDirCnt		= SWAP_BE32 (mdb->drDirCnt);
152	bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
153	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
154		vcb->vcbWrCnt++;	/* Compensate for write of MDB on last flush */
155
156	/* convert hfs encoded name into UTF-8 string */
157	error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
158	/*
159	 * When an HFS name cannot be encoded with the current
160	 * volume encoding we use MacRoman as a fallback.
161	 */
162	if (error || (utf8chars == 0)) {
163		error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
164		/* If we fail to encode to UTF8 from Mac Roman, the name is bad.  Deny the mount */
165		if (error) {
166			goto MtVolErr;
167		}
168	}
169
170	hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
171	vcb->vcbVBMIOSize = kHFSBlockSize;
172
173	/* Generate the partition-based AVH location */
174	hfsmp->hfs_partition_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
175	                                          hfsmp->hfs_logical_block_count);
176
177	/* HFS standard is read-only, so just stuff the FS location in here, too */
178	hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
179
180	bzero(&cndesc, sizeof(cndesc));
181	cndesc.cd_parentcnid = kHFSRootParentID;
182	cndesc.cd_flags |= CD_ISMETA;
183	bzero(&cnattr, sizeof(cnattr));
184	cnattr.ca_linkcount = 1;
185	cnattr.ca_mode = S_IFREG;
186	bzero(&fork, sizeof(fork));
187
188	/*
189	 * Set up Extents B-tree vnode
190	 */
191	cndesc.cd_nameptr = hfs_extname;
192	cndesc.cd_namelen = strlen((char *)hfs_extname);
193	cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
194	fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
195	fork.cf_blocks = fork.cf_size / vcb->blockSize;
196	fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
197	fork.cf_vblocks = 0;
198	fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
199	fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
200	fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
201	fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
202	fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
203	fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
204	cnattr.ca_blocks = fork.cf_blocks;
205
206	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
207	                        &hfsmp->hfs_extents_vp, &newvnode_flags);
208	if (error) {
209		if (HFS_MOUNT_DEBUG) {
210			printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
211		}
212		goto MtVolErr;
213	}
214	error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
215	                                 (KeyCompareProcPtr)CompareExtentKeys));
216	if (error) {
217		if (HFS_MOUNT_DEBUG) {
218			printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
219		}
220		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
221		goto MtVolErr;
222	}
223	hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
224
225	/*
226	 * Set up Catalog B-tree vnode...
227	 */
228	cndesc.cd_nameptr = hfs_catname;
229	cndesc.cd_namelen = strlen((char *)hfs_catname);
230	cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
231	fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
232	fork.cf_blocks = fork.cf_size / vcb->blockSize;
233	fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
234	fork.cf_vblocks = 0;
235	fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
236	fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
237	fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
238	fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
239	fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
240	fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
241	cnattr.ca_blocks = fork.cf_blocks;
242
243	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
244	                        &hfsmp->hfs_catalog_vp, &newvnode_flags);
245	if (error) {
246		if (HFS_MOUNT_DEBUG) {
247			printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
248		}
249		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
250		goto MtVolErr;
251	}
252	error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
253	                                 (KeyCompareProcPtr)CompareCatalogKeys));
254	if (error) {
255		if (HFS_MOUNT_DEBUG) {
256			printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
257		}
258		hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
259		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
260		goto MtVolErr;
261	}
262	hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
263
264	/*
265	 * Set up dummy Allocation file vnode (used only for locking bitmap)
266	 */
267	cndesc.cd_nameptr = hfs_vbmname;
268	cndesc.cd_namelen = strlen((char *)hfs_vbmname);
269	cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
270	bzero(&fork, sizeof(fork));
271	cnattr.ca_blocks = 0;
272
273	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
274	                         &hfsmp->hfs_allocation_vp, &newvnode_flags);
275	if (error) {
276		if (HFS_MOUNT_DEBUG) {
277			printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
278		}
279		hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
280		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
281		goto MtVolErr;
282	}
283	hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
284
285      	/* mark the volume dirty (clear clean unmount bit) */
286	vcb->vcbAtrb &=	~kHFSVolumeUnmountedMask;
287
288    if (error == noErr) {
289		error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
290		if (HFS_MOUNT_DEBUG) {
291			printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
292		}
293	}
294
295    if (error == noErr) {
296		/* If the disk isn't write protected.. */
297        if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
298            MarkVCBDirty (vcb); //	mark VCB dirty so it will be written
299		}
300	}
301
302	/*
303	 * all done with system files so we can unlock now...
304	 */
305	hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
306	hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
307	hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
308
309	if (error == noErr) {
310		/* If successful, then we can just return once we've unlocked the cnodes */
311		return error;
312	}
313
314    //--	Release any resources allocated so far before exiting with an error:
315MtVolErr:
316	hfsUnmount(hfsmp, NULL);
317
318    return (error);
319}
320
321#endif
322
323//*******************************************************************************
324//	Routine:	hfs_MountHFSPlusVolume
325//
326//
327//*******************************************************************************
328
329OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
330	off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
331{
332	register ExtendedVCB *vcb;
333	struct cat_desc cndesc;
334	struct cat_attr cnattr;
335	struct cat_fork cfork;
336	u_int32_t blockSize;
337	daddr64_t spare_sectors;
338	struct BTreeInfoRec btinfo;
339	u_int16_t  signature;
340	u_int16_t  hfs_version;
341	int newvnode_flags = 0;
342	int  i;
343	OSErr retval;
344	char converted_volname[256];
345	size_t volname_length = 0;
346	size_t conv_volname_length = 0;
347
348	signature = SWAP_BE16(vhp->signature);
349	hfs_version = SWAP_BE16(vhp->version);
350
351	if (signature == kHFSPlusSigWord) {
352		if (hfs_version != kHFSPlusVersion) {
353			printf("hfs_mount: invalid HFS+ version: %x\n", hfs_version);
354			return (EINVAL);
355		}
356	} else if (signature == kHFSXSigWord) {
357		if (hfs_version != kHFSXVersion) {
358			printf("hfs_mount: invalid HFSX version: %x\n", hfs_version);
359			return (EINVAL);
360		}
361		/* The in-memory signature is always 'H+'. */
362		signature = kHFSPlusSigWord;
363		hfsmp->hfs_flags |= HFS_X;
364	} else {
365		/* Removed printf for invalid HFS+ signature because it gives
366		 * false error for UFS root volume
367		 */
368		if (HFS_MOUNT_DEBUG) {
369			printf("hfs_mounthfsplus: unknown Volume Signature : %x\n", signature);
370		}
371		return (EINVAL);
372	}
373
374	/* Block size must be at least 512 and a power of 2 */
375	blockSize = SWAP_BE32(vhp->blockSize);
376	if (blockSize < 512 || !powerof2(blockSize)) {
377		if (HFS_MOUNT_DEBUG) {
378			printf("hfs_mounthfsplus: invalid blocksize (%d) \n", blockSize);
379		}
380		return (EINVAL);
381	}
382
383	/* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
384	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
385	    (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
386		if (HFS_MOUNT_DEBUG) {
387			printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
388		}
389		return (EINVAL);
390	}
391
392	/* Make sure we can live with the physical block size. */
393	if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
394	    (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
395	    (blockSize < hfsmp->hfs_logical_block_size)) {
396		if (HFS_MOUNT_DEBUG) {
397			printf("hfs_mounthfsplus: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
398					blockSize, hfsmp->hfs_logical_block_size);
399		}
400		return (ENXIO);
401	}
402
403	/* If allocation block size is less than the physical
404	 * block size, we assume that the physical block size
405	 * is same as logical block size.  The physical block
406	 * size value is used to round down the offsets for
407	 * reading and writing the primary and alternate volume
408	 * headers at physical block boundary and will cause
409	 * problems if it is less than the block size.
410	 */
411	if (blockSize < hfsmp->hfs_physical_block_size) {
412		hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
413		hfsmp->hfs_log_per_phys = 1;
414	}
415
416	/*
417	 * The VolumeHeader seems OK: transfer info from it into VCB
418	 * Note - the VCB starts out clear (all zeros)
419	 */
420	vcb = HFSTOVCB(hfsmp);
421
422	vcb->vcbSigWord	= signature;
423	vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
424	vcb->vcbLsMod	= to_bsd_time(SWAP_BE32(vhp->modifyDate));
425	vcb->vcbAtrb	= SWAP_BE32(vhp->attributes);
426	vcb->vcbClpSiz	= SWAP_BE32(vhp->rsrcClumpSize);
427	vcb->vcbNxtCNID	= SWAP_BE32(vhp->nextCatalogID);
428	vcb->vcbVolBkUp	= to_bsd_time(SWAP_BE32(vhp->backupDate));
429	vcb->vcbWrCnt	= SWAP_BE32(vhp->writeCount);
430	vcb->vcbFilCnt	= SWAP_BE32(vhp->fileCount);
431	vcb->vcbDirCnt	= SWAP_BE32(vhp->folderCount);
432
433	/* copy 32 bytes of Finder info */
434	bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
435
436	vcb->vcbAlBlSt = 0;		/* hfs+ allocation blocks start at first block of volume */
437	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
438		vcb->vcbWrCnt++;	/* compensate for write of Volume Header on last flush */
439
440	/* Now fill in the Extended VCB info */
441	vcb->nextAllocation	= SWAP_BE32(vhp->nextAllocation);
442	vcb->totalBlocks	= SWAP_BE32(vhp->totalBlocks);
443	vcb->allocLimit		= vcb->totalBlocks;
444	vcb->freeBlocks		= SWAP_BE32(vhp->freeBlocks);
445	vcb->blockSize		= blockSize;
446	vcb->encodingsBitmap	= SWAP_BE64(vhp->encodingsBitmap);
447	vcb->localCreateDate	= SWAP_BE32(vhp->createDate);
448
449	vcb->hfsPlusIOPosOffset	= embeddedOffset;
450
451	/* Default to no free block reserve */
452	vcb->reserveBlocks = 0;
453
454	/*
455	 * Update the logical block size in the mount struct
456	 * (currently set up from the wrapper MDB) using the
457	 * new blocksize value:
458	 */
459	hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
460	vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
461
462	/*
463	 * Validate and initialize the location of the alternate volume header.
464	 *
465	 * Note that there may be spare sectors beyond the end of the filesystem that still
466	 * belong to our partition.
467	 */
468
469	spare_sectors = hfsmp->hfs_logical_block_count -
470	                (((daddr64_t)vcb->totalBlocks * blockSize) /
471	                   hfsmp->hfs_logical_block_size);
472
473	/*
474	 * Differentiate between "innocuous" spare sectors and the more unusual
475	 * degenerate case:
476	 *
477	 * *** Innocuous spare sectors exist if:
478	 *
479	 * A) the number of bytes assigned to the partition (by multiplying logical
480	 * block size * logical block count) is greater than the filesystem size
481	 * (by multiplying allocation block count and allocation block size)
482	 *
483	 * and
484	 *
485	 * B) the remainder is less than the size of a full allocation block's worth of bytes.
486	 *
487	 * This handles the normal case where there may be a few extra sectors, but the two
488	 * are fundamentally in sync.
489	 *
490	 * *** Degenerate spare sectors exist if:
491	 * A) The number of bytes assigned to the partition (by multiplying logical
492	 * block size * logical block count) is greater than the filesystem size
493	 * (by multiplying allocation block count and block size).
494	 *
495	 * and
496	 *
497	 * B) the remainder is greater than a full allocation's block worth of bytes.
498	 * In this case,  a smaller file system exists in a larger partition.
499	 * This can happen in various ways, including when volume is resized but the
500	 * partition is yet to be resized.  Under this condition, we have to assume that
501	 * a partition management software may resize the partition to match
502	 * the file system size in the future.  Therefore we should update
503	 * alternate volume header at two locations on the disk,
504	 *   a. 1024 bytes before end of the partition
505	 *   b. 1024 bytes before end of the file system
506	 */
507
508	if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
509		/*
510		 * Handle the degenerate case above. FS < partition size.
511		 * AVH located at 1024 bytes from the end of the partition
512		 */
513		hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
514					   HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
515
516		/* AVH located at 1024 bytes from the end of the filesystem */
517		hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
518					   HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
519						(((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size));
520	}
521	else {
522		/* Innocuous spare sectors; Partition & FS notion are in sync */
523		hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
524					   HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
525
526		hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
527	}
528	if (hfs_resize_debug) {
529		printf ("hfs_MountHFSPlusVolume: partition_avh_sector=%qu, fs_avh_sector=%qu\n",
530				hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
531	}
532
533	bzero(&cndesc, sizeof(cndesc));
534	cndesc.cd_parentcnid = kHFSRootParentID;
535	cndesc.cd_flags |= CD_ISMETA;
536	bzero(&cnattr, sizeof(cnattr));
537	cnattr.ca_linkcount = 1;
538	cnattr.ca_mode = S_IFREG;
539
540	/*
541	 * Set up Extents B-tree vnode
542	 */
543	cndesc.cd_nameptr = hfs_extname;
544	cndesc.cd_namelen = strlen((char *)hfs_extname);
545	cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
546
547	cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
548	cfork.cf_new_size= 0;
549	cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
550	cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
551	cfork.cf_vblocks = 0;
552	cnattr.ca_blocks = cfork.cf_blocks;
553	for (i = 0; i < kHFSPlusExtentDensity; i++) {
554		cfork.cf_extents[i].startBlock =
555				SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
556		cfork.cf_extents[i].blockCount =
557				SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
558	}
559	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
560	                         &hfsmp->hfs_extents_vp, &newvnode_flags);
561	if (retval)
562	{
563		if (HFS_MOUNT_DEBUG) {
564			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
565		}
566		goto ErrorExit;
567	}
568	hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
569	hfs_unlock(hfsmp->hfs_extents_cp);
570
571	retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
572	                                  (KeyCompareProcPtr) CompareExtentKeysPlus));
573	if (retval)
574	{
575		if (HFS_MOUNT_DEBUG) {
576			printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
577		}
578		goto ErrorExit;
579	}
580	/*
581	 * Set up Catalog B-tree vnode
582	 */
583	cndesc.cd_nameptr = hfs_catname;
584	cndesc.cd_namelen = strlen((char *)hfs_catname);
585	cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
586
587	cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
588	cfork.cf_clump   = SWAP_BE32 (vhp->catalogFile.clumpSize);
589	cfork.cf_blocks  = SWAP_BE32 (vhp->catalogFile.totalBlocks);
590	cfork.cf_vblocks = 0;
591	cnattr.ca_blocks = cfork.cf_blocks;
592	for (i = 0; i < kHFSPlusExtentDensity; i++) {
593		cfork.cf_extents[i].startBlock =
594				SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
595		cfork.cf_extents[i].blockCount =
596				SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
597	}
598	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
599	                         &hfsmp->hfs_catalog_vp, &newvnode_flags);
600	if (retval) {
601		if (HFS_MOUNT_DEBUG) {
602			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
603		}
604		goto ErrorExit;
605	}
606	hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
607	hfs_unlock(hfsmp->hfs_catalog_cp);
608
609	retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
610	                                  (KeyCompareProcPtr) CompareExtendedCatalogKeys));
611	if (retval) {
612		if (HFS_MOUNT_DEBUG) {
613			printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
614		}
615		goto ErrorExit;
616	}
617	if ((hfsmp->hfs_flags & HFS_X) &&
618	    BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
619		if (btinfo.keyCompareType == kHFSBinaryCompare) {
620			hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
621			/* Install a case-sensitive key compare */
622			(void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
623			                  (KeyCompareProcPtr)cat_binarykeycompare);
624		}
625	}
626
627	/*
628	 * Set up Allocation file vnode
629	 */
630	cndesc.cd_nameptr = hfs_vbmname;
631	cndesc.cd_namelen = strlen((char *)hfs_vbmname);
632	cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
633
634	cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
635	cfork.cf_clump   = SWAP_BE32 (vhp->allocationFile.clumpSize);
636	cfork.cf_blocks  = SWAP_BE32 (vhp->allocationFile.totalBlocks);
637	cfork.cf_vblocks = 0;
638	cnattr.ca_blocks = cfork.cf_blocks;
639	for (i = 0; i < kHFSPlusExtentDensity; i++) {
640		cfork.cf_extents[i].startBlock =
641				SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
642		cfork.cf_extents[i].blockCount =
643				SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
644	}
645	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
646	                         &hfsmp->hfs_allocation_vp, &newvnode_flags);
647	if (retval) {
648		if (HFS_MOUNT_DEBUG) {
649			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
650		}
651		goto ErrorExit;
652	}
653	hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
654	hfs_unlock(hfsmp->hfs_allocation_cp);
655
656	/*
657	 * Set up Attribute B-tree vnode
658	 */
659	if (vhp->attributesFile.totalBlocks != 0) {
660		cndesc.cd_nameptr = hfs_attrname;
661		cndesc.cd_namelen = strlen((char *)hfs_attrname);
662		cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
663
664		cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
665		cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
666		cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
667		cfork.cf_vblocks = 0;
668		cnattr.ca_blocks = cfork.cf_blocks;
669		for (i = 0; i < kHFSPlusExtentDensity; i++) {
670			cfork.cf_extents[i].startBlock =
671					SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
672			cfork.cf_extents[i].blockCount =
673					SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
674		}
675		retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
676					 &hfsmp->hfs_attribute_vp, &newvnode_flags);
677		if (retval) {
678			if (HFS_MOUNT_DEBUG) {
679				printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
680			}
681			goto ErrorExit;
682		}
683		hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
684		hfs_unlock(hfsmp->hfs_attribute_cp);
685		retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
686						  (KeyCompareProcPtr) hfs_attrkeycompare));
687		if (retval) {
688			if (HFS_MOUNT_DEBUG) {
689				printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
690			}
691			goto ErrorExit;
692		}
693
694		/* Initialize vnode for virtual attribute data file that spans the
695		 * entire file system space for performing I/O to attribute btree
696		 * We hold iocount on the attrdata vnode for the entire duration
697		 * of mount (similar to btree vnodes)
698		 */
699		retval = init_attrdata_vnode(hfsmp);
700		if (retval) {
701			if (HFS_MOUNT_DEBUG) {
702				printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
703			}
704			goto ErrorExit;
705		}
706	}
707
708	/*
709	 * Set up Startup file vnode
710	 */
711	if (vhp->startupFile.totalBlocks != 0) {
712		cndesc.cd_nameptr = hfs_startupname;
713		cndesc.cd_namelen = strlen((char *)hfs_startupname);
714		cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
715
716		cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
717		cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
718		cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
719		cfork.cf_vblocks = 0;
720		cnattr.ca_blocks = cfork.cf_blocks;
721		for (i = 0; i < kHFSPlusExtentDensity; i++) {
722			cfork.cf_extents[i].startBlock =
723					SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
724			cfork.cf_extents[i].blockCount =
725					SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
726		}
727		retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
728					 &hfsmp->hfs_startup_vp, &newvnode_flags);
729		if (retval) {
730			if (HFS_MOUNT_DEBUG) {
731				printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
732			}
733			goto ErrorExit;
734		}
735		hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
736		hfs_unlock(hfsmp->hfs_startup_cp);
737	}
738
739	/*
740	 * Pick up volume name and create date
741	 *
742	 * Acquiring the volume name should not manipulate the bitmap, only the catalog
743	 * btree and possibly the extents overflow b-tree.
744	 */
745	retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
746	if (retval) {
747		if (HFS_MOUNT_DEBUG) {
748			printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
749		}
750		goto ErrorExit;
751	}
752	vcb->hfs_itime = cnattr.ca_itime;
753	vcb->volumeNameEncodingHint = cndesc.cd_encoding;
754	bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
755	volname_length = strlen ((const char*)vcb->vcbVN);
756	cat_releasedesc(&cndesc);
757
758#define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
759
760
761	/* Send the volume name down to CoreStorage if necessary */
762	retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
763	if (retval == 0) {
764		(void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
765	}
766
767	/* reset retval == 0. we don't care about errors in volname conversion */
768	retval = 0;
769
770
771	/*
772	 * We now always initiate a full bitmap scan even if the volume is read-only because this is
773	 * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily
774	 * expects. TRIMs will not be delivered to the underlying media if the volume is not
775	 * read-write though.
776	 */
777	thread_t allocator_scanner;
778	hfsmp->scan_var = 0;
779
780	/* Take the HFS mount mutex and wait on scan_var */
781	hfs_lock_mount (hfsmp);
782
783	kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner);
784	/* Wait until it registers that it's got the appropriate locks */
785	while ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) == 0) {
786		(void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, (PDROP | PINOD), "hfs_scan_blocks", 0);
787		if (hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) {
788			break;
789		}
790		else {
791			hfs_lock_mount (hfsmp);
792		}
793	}
794
795	thread_deallocate (allocator_scanner);
796
797	/* mark the volume dirty (clear clean unmount bit) */
798	vcb->vcbAtrb &=	~kHFSVolumeUnmountedMask;
799	if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
800		hfs_flushvolumeheader(hfsmp, TRUE, 0);
801	}
802
803	/* kHFSHasFolderCount is only supported/updated on HFSX volumes */
804	if ((hfsmp->hfs_flags & HFS_X) != 0) {
805		hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
806	}
807
808	//
809	// Check if we need to do late journal initialization.  This only
810	// happens if a previous version of MacOS X (or 9) touched the disk.
811	// In that case hfs_late_journal_init() will go re-locate the journal
812	// and journal_info_block files and validate that they're still kosher.
813	//
814	if (   (vcb->vcbAtrb & kHFSVolumeJournaledMask)
815		&& (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
816		&& (hfsmp->jnl == NULL)) {
817
818		retval = hfs_late_journal_init(hfsmp, vhp, args);
819		if (retval != 0) {
820			if (retval == EROFS) {
821				// EROFS is a special error code that means the volume has an external
822				// journal which we couldn't find.  in that case we do not want to
823				// rewrite the volume header - we'll just refuse to mount the volume.
824				if (HFS_MOUNT_DEBUG) {
825					printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
826				}
827				retval = EINVAL;
828				goto ErrorExit;
829			}
830
831			hfsmp->jnl = NULL;
832
833			// if the journal failed to open, then set the lastMountedVersion
834			// to be "FSK!" which fsck_hfs will see and force the fsck instead
835			// of just bailing out because the volume is journaled.
836			if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
837				HFSPlusVolumeHeader *jvhp;
838				daddr64_t mdb_offset;
839				struct buf *bp = NULL;
840
841				hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
842
843				mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
844
845				bp = NULL;
846				retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
847						HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
848						hfsmp->hfs_physical_block_size, cred, &bp);
849				if (retval == 0) {
850					jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
851
852					if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
853						printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
854						jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
855					   	buf_bwrite(bp);
856					} else {
857						buf_brelse(bp);
858					}
859					bp = NULL;
860				} else if (bp) {
861					buf_brelse(bp);
862					// clear this so the error exit path won't try to use it
863					bp = NULL;
864			    }
865			}
866
867			if (HFS_MOUNT_DEBUG) {
868				printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
869			}
870			retval = EINVAL;
871			goto ErrorExit;
872		} else if (hfsmp->jnl) {
873			vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
874		}
875	} else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
876		struct cat_attr jinfo_attr, jnl_attr;
877
878		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
879		    vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
880		}
881
882		// if we're here we need to fill in the fileid's for the
883		// journal and journal_info_block.
884		hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
885		hfsmp->hfs_jnlfileid    = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
886		if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
887			printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
888			printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
889		}
890
891		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
892		    vcb->vcbAtrb |= kHFSVolumeJournaledMask;
893		}
894
895		if (hfsmp->jnl == NULL) {
896		    vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
897		}
898	}
899
900	if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )	// if the disk is not write protected
901	{
902		MarkVCBDirty( vcb );	// mark VCB dirty so it will be written
903	}
904
905	/*
906	 * Distinguish 3 potential cases involving content protection:
907	 * 1. mount point bit set; vcbAtrb does not support it. Fail.
908	 * 2. mount point bit set; vcbattrb supports it. we're good.
909	 * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good.
910	 */
911	if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
912		/* Does the mount point support it ? */
913		if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) {
914			/* Case 1 above */
915			retval = EINVAL;
916			goto ErrorExit;
917		}
918	}
919	else {
920		/* not requested in the mount point. Is it in FS? */
921		if (vcb->vcbAtrb & kHFSContentProtectionMask) {
922			/* Case 3 above */
923			vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
924		}
925	}
926
927	/* At this point, if the mount point flag is set, we can enable it. */
928	if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
929		/* Cases 2+3 above */
930#if CONFIG_PROTECT
931		/* Get the EAs as needed. */
932		int cperr = 0;
933		uint16_t majorversion;
934		uint16_t minorversion;
935		uint64_t flags;
936		uint8_t cryptogen = 0;
937		struct cp_root_xattr *xattr = NULL;
938		MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK);
939		if (xattr == NULL) {
940			retval = ENOMEM;
941			goto ErrorExit;
942		}
943		bzero (xattr, sizeof(struct cp_root_xattr));
944
945		/* go get the EA to get the version information */
946		cperr = cp_getrootxattr (hfsmp, xattr);
947		/*
948		 * If there was no EA there, then write one out.
949		 * Assuming EA is not present on the root means
950		 * this is an erase install or a very old FS
951		 */
952
953		if (cperr == 0) {
954			/* Have to run a valid CP version. */
955			if ((xattr->major_version < CP_PREV_MAJOR_VERS) || (xattr->major_version > CP_NEW_MAJOR_VERS)) {
956				cperr = EINVAL;
957			}
958		}
959		else if (cperr == ENOATTR) {
960			printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS);
961			bzero(xattr, sizeof(struct cp_root_xattr));
962			xattr->major_version = CP_NEW_MAJOR_VERS;
963			xattr->minor_version = CP_MINOR_VERS;
964			cperr = cp_setrootxattr (hfsmp, xattr);
965		}
966		majorversion = xattr->major_version;
967		minorversion = xattr->minor_version;
968		flags = xattr->flags;
969		if (xattr->flags & CP_ROOT_CRYPTOG1) {
970			cryptogen = 1;
971		}
972
973		if (xattr) {
974			FREE(xattr, M_TEMP);
975		}
976
977		/* Recheck for good status */
978		if (cperr == 0) {
979			/* If we got here, then the CP version is valid. Set it in the mount point */
980			hfsmp->hfs_running_cp_major_vers = majorversion;
981			printf("Running with CP root xattr: %d.%d\n", majorversion, minorversion);
982			hfsmp->cproot_flags = flags;
983			hfsmp->cp_crypto_generation = cryptogen;
984
985			/*
986			 * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree.
987			 * Ensure that the boot-arg's value is valid for FILES (not directories),
988			 * since only files are actually protected for now.
989			 */
990
991			PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
992
993			if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
994				PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
995			}
996
997			if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
998				hfsmp->default_cp_class = PROTECTION_CLASS_C;
999			}
1000		}
1001		else {
1002			retval = EPERM;
1003			goto ErrorExit;
1004		}
1005#else
1006		/* If CONFIG_PROTECT not built, ignore CP */
1007		vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT);
1008#endif
1009	}
1010
1011	/*
1012	 * Establish a metadata allocation zone.
1013	 */
1014	hfs_metadatazone_init(hfsmp, false);
1015
1016	/*
1017	 * Make any metadata zone adjustments.
1018	 */
1019	if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
1020		/* Keep the roving allocator out of the metadata zone. */
1021		if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
1022		    vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
1023			HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1024		}
1025	} else {
1026		if (vcb->nextAllocation <= 1) {
1027			vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
1028		}
1029	}
1030	vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
1031
1032	/* Setup private/hidden directories for hardlinks. */
1033	hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
1034	hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
1035
1036	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1037		hfs_remove_orphans(hfsmp);
1038
1039	/* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
1040	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1041	{
1042		retval = hfs_erase_unused_nodes(hfsmp);
1043		if (retval) {
1044			if (HFS_MOUNT_DEBUG) {
1045				printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
1046			}
1047
1048			goto ErrorExit;
1049		}
1050	}
1051
1052	/*
1053	 * Allow hot file clustering if conditions allow.
1054	 */
1055	if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  &&
1056	    ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) {
1057		(void) hfs_recording_init(hfsmp);
1058	}
1059
1060	/* Force ACLs on HFS+ file systems. */
1061	vfs_setextendedsecurity(HFSTOVFS(hfsmp));
1062
1063	/* Enable extent-based extended attributes by default */
1064	hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
1065
1066	return (0);
1067
1068ErrorExit:
1069	/*
1070	 * A fatal error occurred and the volume cannot be mounted, so
1071	 * release any resources that we acquired...
1072	 */
1073	hfsUnmount(hfsmp, NULL);
1074
1075	if (HFS_MOUNT_DEBUG) {
1076		printf("hfs_mounthfsplus: encountered error (%d)\n", retval);
1077	}
1078	return (retval);
1079}
1080
1081
1082/*
1083 * ReleaseMetaFileVNode
1084 *
1085 * vp	L - -
1086 */
1087static void ReleaseMetaFileVNode(struct vnode *vp)
1088{
1089	struct filefork *fp;
1090
1091	if (vp && (fp = VTOF(vp))) {
1092		if (fp->fcbBTCBPtr != NULL) {
1093			(void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1094			(void) BTClosePath(fp);
1095			hfs_unlock(VTOC(vp));
1096		}
1097
1098		/* release the node even if BTClosePath fails */
1099		vnode_recycle(vp);
1100		vnode_put(vp);
1101	}
1102}
1103
1104
1105/*************************************************************
1106*
1107* Unmounts a hfs volume.
1108*	At this point vflush() has been called (to dump all non-metadata files)
1109*
1110*************************************************************/
1111
1112int
1113hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
1114{
1115	/* Get rid of our attribute data vnode (if any).  This is done
1116	 * after the vflush() during mount, so we don't need to worry
1117	 * about any locks.
1118	 */
1119	if (hfsmp->hfs_attrdata_vp) {
1120		ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
1121		hfsmp->hfs_attrdata_vp = NULLVP;
1122	}
1123
1124	if (hfsmp->hfs_startup_vp) {
1125		ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
1126		hfsmp->hfs_startup_cp = NULL;
1127		hfsmp->hfs_startup_vp = NULL;
1128	}
1129
1130	if (hfsmp->hfs_attribute_vp) {
1131		ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
1132		hfsmp->hfs_attribute_cp = NULL;
1133		hfsmp->hfs_attribute_vp = NULL;
1134	}
1135
1136	if (hfsmp->hfs_catalog_vp) {
1137		ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
1138		hfsmp->hfs_catalog_cp = NULL;
1139		hfsmp->hfs_catalog_vp = NULL;
1140	}
1141
1142	if (hfsmp->hfs_extents_vp) {
1143		ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
1144		hfsmp->hfs_extents_cp = NULL;
1145		hfsmp->hfs_extents_vp = NULL;
1146	}
1147
1148	if (hfsmp->hfs_allocation_vp) {
1149		ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
1150		hfsmp->hfs_allocation_cp = NULL;
1151		hfsmp->hfs_allocation_vp = NULL;
1152	}
1153
1154	return (0);
1155}
1156
1157
1158/*
1159 * Test if fork has overflow extents.
1160 *
1161 * Returns:
1162 * 	non-zero - overflow extents exist
1163 * 	zero     - overflow extents do not exist
1164 */
1165__private_extern__
1166bool overflow_extents(struct filefork *fp)
1167{
1168	u_int32_t blocks;
1169
1170	//
1171	// If the vnode pointer is NULL then we're being called
1172	// from hfs_remove_orphans() with a faked-up filefork
1173	// and therefore it has to be an HFS+ volume.  Otherwise
1174	// we check through the volume header to see what type
1175	// of volume we're on.
1176	//
1177
1178#if CONFIG_HFS_STD
1179	if (FTOV(fp) && VTOVCB(FTOV(fp))->vcbSigWord == kHFSSigWord) {
1180		if (fp->ff_extents[2].blockCount == 0)
1181			return false;
1182
1183		blocks = fp->ff_extents[0].blockCount +
1184			fp->ff_extents[1].blockCount +
1185			fp->ff_extents[2].blockCount;
1186
1187		return fp->ff_blocks > blocks;
1188	}
1189#endif
1190
1191	if (fp->ff_extents[7].blockCount == 0)
1192		return false;
1193
1194	blocks = fp->ff_extents[0].blockCount +
1195		fp->ff_extents[1].blockCount +
1196		fp->ff_extents[2].blockCount +
1197		fp->ff_extents[3].blockCount +
1198		fp->ff_extents[4].blockCount +
1199		fp->ff_extents[5].blockCount +
1200		fp->ff_extents[6].blockCount +
1201		fp->ff_extents[7].blockCount;
1202
1203	return fp->ff_blocks > blocks;
1204}
1205
1206static __attribute__((pure))
1207boolean_t hfs_is_frozen(struct hfsmount *hfsmp)
1208{
1209	return (hfsmp->hfs_freeze_state == HFS_FROZEN
1210			|| (hfsmp->hfs_freeze_state == HFS_FREEZING
1211				&& current_thread() != hfsmp->hfs_freezing_thread));
1212}
1213
1214/*
1215 * Lock the HFS global journal lock
1216 */
1217int
1218hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype)
1219{
1220	thread_t thread = current_thread();
1221
1222	if (hfsmp->hfs_global_lockowner == thread) {
1223		panic ("hfs_lock_global: locking against myself!");
1224	}
1225
1226	/*
1227	 * This check isn't really necessary but this stops us taking
1228	 * the mount lock in most cases.  The essential check is below.
1229	 */
1230	if (hfs_is_frozen(hfsmp)) {
1231		/*
1232		 * Unfortunately, there is no easy way of getting a notification
1233		 * for when a process is exiting and it's possible for the exiting
1234		 * process to get blocked somewhere else.  To catch this, we
1235		 * periodically monitor the frozen process here and thaw if
1236		 * we spot that it's exiting.
1237		 */
1238frozen:
1239		hfs_lock_mount(hfsmp);
1240
1241		struct timespec ts = { 0, 500 * NSEC_PER_MSEC };
1242
1243		while (hfs_is_frozen(hfsmp)) {
1244			if (hfsmp->hfs_freeze_state == HFS_FROZEN
1245				&& proc_exiting(hfsmp->hfs_freezing_proc)) {
1246				hfs_thaw_locked(hfsmp);
1247				break;
1248			}
1249
1250			msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
1251			       PWAIT, "hfs_lock_global (frozen)", &ts);
1252		}
1253		hfs_unlock_mount(hfsmp);
1254	}
1255
1256	/* HFS_SHARED_LOCK */
1257	if (locktype == HFS_SHARED_LOCK) {
1258		lck_rw_lock_shared (&hfsmp->hfs_global_lock);
1259		hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
1260	}
1261	/* HFS_EXCLUSIVE_LOCK */
1262	else {
1263		lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
1264		hfsmp->hfs_global_lockowner = thread;
1265	}
1266
1267	/*
1268	 * We have to check if we're frozen again because of the time
1269	 * between when we checked and when we took the global lock.
1270	 */
1271	if (hfs_is_frozen(hfsmp)) {
1272		hfs_unlock_global(hfsmp);
1273		goto frozen;
1274	}
1275
1276	return 0;
1277}
1278
1279
1280/*
1281 * Unlock the HFS global journal lock
1282 */
1283void
1284hfs_unlock_global (struct hfsmount *hfsmp)
1285{
1286	thread_t thread = current_thread();
1287
1288	/* HFS_LOCK_EXCLUSIVE */
1289	if (hfsmp->hfs_global_lockowner == thread) {
1290		hfsmp->hfs_global_lockowner = NULL;
1291		lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
1292	}
1293	/* HFS_LOCK_SHARED */
1294	else {
1295		lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
1296	}
1297}
1298
1299/*
1300 * Lock the HFS mount lock
1301 *
1302 * Note: this is a mutex, not a rw lock!
1303 */
1304inline
1305void hfs_lock_mount (struct hfsmount *hfsmp) {
1306	lck_mtx_lock (&(hfsmp->hfs_mutex));
1307}
1308
1309/*
1310 * Unlock the HFS mount lock
1311 *
1312 * Note: this is a mutex, not a rw lock!
1313 */
1314inline
1315void hfs_unlock_mount (struct hfsmount *hfsmp) {
1316	lck_mtx_unlock (&(hfsmp->hfs_mutex));
1317}
1318
1319/*
1320 * Lock HFS system file(s).
1321 */
1322int
1323hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype)
1324{
1325	/*
1326	 * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
1327	 */
1328	if (flags & SFL_CATALOG) {
1329#ifdef HFS_CHECK_LOCK_ORDER
1330		if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
1331			panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
1332		}
1333		if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1334			panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
1335		}
1336		if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1337			panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
1338		}
1339#endif /* HFS_CHECK_LOCK_ORDER */
1340
1341		if (hfsmp->hfs_catalog_cp) {
1342			(void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT);
1343			/*
1344			 * When the catalog file has overflow extents then
1345			 * also acquire the extents b-tree lock if its not
1346			 * already requested.
1347			 */
1348			if (((flags & SFL_EXTENTS) == 0) &&
1349			    (hfsmp->hfs_catalog_vp != NULL) &&
1350			    (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
1351				flags |= SFL_EXTENTS;
1352			}
1353		} else {
1354			flags &= ~SFL_CATALOG;
1355		}
1356	}
1357
1358	if (flags & SFL_ATTRIBUTE) {
1359#ifdef HFS_CHECK_LOCK_ORDER
1360		if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1361			panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
1362		}
1363		if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1364			panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
1365		}
1366#endif /* HFS_CHECK_LOCK_ORDER */
1367
1368		if (hfsmp->hfs_attribute_cp) {
1369			(void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT);
1370			/*
1371			 * When the attribute file has overflow extents then
1372			 * also acquire the extents b-tree lock if its not
1373			 * already requested.
1374			 */
1375			if (((flags & SFL_EXTENTS) == 0) &&
1376			    (hfsmp->hfs_attribute_vp != NULL) &&
1377			    (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) {
1378				flags |= SFL_EXTENTS;
1379			}
1380		} else {
1381			flags &= ~SFL_ATTRIBUTE;
1382		}
1383	}
1384
1385	if (flags & SFL_STARTUP) {
1386#ifdef HFS_CHECK_LOCK_ORDER
1387		if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1388			panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
1389		}
1390#endif /* HFS_CHECK_LOCK_ORDER */
1391
1392		if (hfsmp->hfs_startup_cp) {
1393			(void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT);
1394			/*
1395			 * When the startup file has overflow extents then
1396			 * also acquire the extents b-tree lock if its not
1397			 * already requested.
1398			 */
1399			if (((flags & SFL_EXTENTS) == 0) &&
1400			    (hfsmp->hfs_startup_vp != NULL) &&
1401			    (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) {
1402				flags |= SFL_EXTENTS;
1403			}
1404		} else {
1405			flags &= ~SFL_STARTUP;
1406		}
1407	}
1408
1409	/*
1410	 * To prevent locks being taken in the wrong order, the extent lock
1411	 * gets a bitmap lock as well.
1412	 */
1413	if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
1414		if (hfsmp->hfs_allocation_cp) {
1415			(void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1416			/*
1417			 * The bitmap lock is also grabbed when only extent lock
1418			 * was requested. Set the bitmap lock bit in the lock
1419			 * flags which callers will use during unlock.
1420			 */
1421			flags |= SFL_BITMAP;
1422		} else {
1423			flags &= ~SFL_BITMAP;
1424		}
1425	}
1426
1427	if (flags & SFL_EXTENTS) {
1428		/*
1429		 * Since the extents btree lock is recursive we always
1430		 * need exclusive access.
1431		 */
1432		if (hfsmp->hfs_extents_cp) {
1433			(void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1434
1435			if (hfsmp->hfs_mp->mnt_kern_flag & MNTK_SWAP_MOUNT) {
1436				/*
1437				 * because we may need this lock on the pageout path (if a swapfile allocation
1438				 * spills into the extents overflow tree), we will grant the holder of this
1439				 * lock the privilege of dipping into the reserve free pool in order to prevent
1440				 * a deadlock from occurring if we need those pageouts to complete before we
1441				 * will make any new pages available on the free list... the deadlock can occur
1442				 * if this thread needs to allocate memory while this lock is held
1443				 */
1444				if (set_vm_privilege(TRUE) == FALSE) {
1445					/*
1446					 * indicate that we need to drop vm_privilege
1447					 * when we unlock
1448					 */
1449					flags |= SFL_VM_PRIV;
1450				}
1451			}
1452		} else {
1453			flags &= ~SFL_EXTENTS;
1454		}
1455	}
1456
1457	return (flags);
1458}
1459
1460/*
1461 * unlock HFS system file(s).
1462 */
1463void
1464hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1465{
1466	struct timeval tv;
1467	u_int32_t lastfsync;
1468	int numOfLockedBuffs;
1469
1470	if (hfsmp->jnl == NULL) {
1471		microuptime(&tv);
1472		lastfsync = tv.tv_sec;
1473	}
1474	if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1475		hfs_unlock(hfsmp->hfs_startup_cp);
1476	}
1477	if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1478		if (hfsmp->jnl == NULL) {
1479			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1480			numOfLockedBuffs = count_lock_queue();
1481			if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1482			    ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1483			      kMaxSecsForFsync))) {
1484				hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1485			}
1486		}
1487		hfs_unlock(hfsmp->hfs_attribute_cp);
1488	}
1489	if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
1490		if (hfsmp->jnl == NULL) {
1491			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1492			numOfLockedBuffs = count_lock_queue();
1493			if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1494			    ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1495			      kMaxSecsForFsync))) {
1496				hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1497			}
1498		}
1499		hfs_unlock(hfsmp->hfs_catalog_cp);
1500	}
1501	if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
1502		hfs_unlock(hfsmp->hfs_allocation_cp);
1503	}
1504	if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
1505		if (hfsmp->jnl == NULL) {
1506			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1507			numOfLockedBuffs = count_lock_queue();
1508			if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1509			    ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1510			      kMaxSecsForFsync))) {
1511				hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1512			}
1513		}
1514		hfs_unlock(hfsmp->hfs_extents_cp);
1515
1516		if (flags & SFL_VM_PRIV) {
1517			/*
1518			 * revoke the vm_privilege we granted this thread
1519			 * now that we have unlocked the overflow extents
1520			 */
1521			set_vm_privilege(FALSE);
1522		}
1523	}
1524}
1525
1526
1527/*
1528 * RequireFileLock
1529 *
1530 * Check to see if a vnode is locked in the current context
1531 * This is to be used for debugging purposes only!!
1532 */
1533#if HFS_DIAGNOSTIC
1534void RequireFileLock(FileReference vp, int shareable)
1535{
1536	int locked;
1537
1538	/* The extents btree and allocation bitmap are always exclusive. */
1539	if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1540	    VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1541		shareable = 0;
1542	}
1543
1544	locked = VTOC(vp)->c_lockowner == current_thread();
1545
1546	if (!locked && !shareable) {
1547		switch (VTOC(vp)->c_fileid) {
1548		case kHFSExtentsFileID:
1549			panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1550			break;
1551		case kHFSCatalogFileID:
1552			panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1553			break;
1554		case kHFSAllocationFileID:
1555			/* The allocation file can hide behind the jornal lock. */
1556			if (VTOHFS(vp)->jnl == NULL)
1557				panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1558			break;
1559		case kHFSStartupFileID:
1560			panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1561		case kHFSAttributesFileID:
1562			panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1563			break;
1564		}
1565	}
1566}
1567#endif
1568
1569
1570/*
1571 * There are three ways to qualify for ownership rights on an object:
1572 *
1573 * 1. (a) Your UID matches the cnode's UID.
1574 *    (b) The object in question is owned by "unknown"
1575 * 2. (a) Permissions on the filesystem are being ignored and
1576 *        your UID matches the replacement UID.
1577 *    (b) Permissions on the filesystem are being ignored and
1578 *        the replacement UID is "unknown".
1579 * 3. You are root.
1580 *
1581 */
1582int
1583hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1584		__unused struct proc *p, int invokesuperuserstatus)
1585{
1586	if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
1587	    (cnode_uid == UNKNOWNUID) ||  									  /* [1b] */
1588	    ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
1589	      ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
1590	        (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
1591	    (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
1592		return (0);
1593	} else {
1594		return (EPERM);
1595	}
1596}
1597
1598
1599u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1600                               u_int32_t blockSizeLimit,
1601                               u_int32_t baseMultiple) {
1602    /*
1603       Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1604       specified limit but still an even multiple of the baseMultiple.
1605     */
1606    int baseBlockCount, blockCount;
1607    u_int32_t trialBlockSize;
1608
1609    if (allocationBlockSize % baseMultiple != 0) {
1610        /*
1611           Whoops: the allocation blocks aren't even multiples of the specified base:
1612           no amount of dividing them into even parts will be a multiple, either then!
1613        */
1614        return 512;		/* Hope for the best */
1615    };
1616
1617    /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1618       from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1619       Even though the former (the result of the loop below) is the larger allocation
1620       block size, the latter is more efficient: */
1621    if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1622
1623    /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1624    baseBlockCount = allocationBlockSize / baseMultiple;				/* Now guaranteed to be an even multiple */
1625
1626    for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1627        trialBlockSize = blockCount * baseMultiple;
1628        if (allocationBlockSize % trialBlockSize == 0) {				/* An even multiple? */
1629            if ((trialBlockSize <= blockSizeLimit) &&
1630                (trialBlockSize % baseMultiple == 0)) {
1631                return trialBlockSize;
1632            };
1633        };
1634    };
1635
1636    /* Note: we should never get here, since blockCount = 1 should always work,
1637       but this is nice and safe and makes the compiler happy, too ... */
1638    return 512;
1639}
1640
1641
1642u_int32_t
1643GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1644			struct cat_attr *fattr, struct cat_fork *forkinfo)
1645{
1646	struct hfsmount * hfsmp;
1647	struct cat_desc jdesc;
1648	int lockflags;
1649	int error;
1650
1651	if (vcb->vcbSigWord != kHFSPlusSigWord)
1652		return (0);
1653
1654	hfsmp = VCBTOHFS(vcb);
1655
1656	memset(&jdesc, 0, sizeof(struct cat_desc));
1657	jdesc.cd_parentcnid = kRootDirID;
1658	jdesc.cd_nameptr = (const u_int8_t *)name;
1659	jdesc.cd_namelen = strlen(name);
1660
1661	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1662	error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL);
1663	hfs_systemfile_unlock(hfsmp, lockflags);
1664
1665	if (error == 0) {
1666		return (fattr->ca_fileid);
1667	} else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1668		return (0);
1669	}
1670
1671	return (0);	/* XXX what callers expect on an error */
1672}
1673
1674
1675/*
1676 * On HFS Plus Volumes, there can be orphaned files or directories
1677 * These are files or directories that were unlinked while busy.
1678 * If the volume was not cleanly unmounted then some of these may
1679 * have persisted and need to be removed.
1680 */
1681void
1682hfs_remove_orphans(struct hfsmount * hfsmp)
1683{
1684	struct BTreeIterator * iterator = NULL;
1685	struct FSBufferDescriptor btdata;
1686	struct HFSPlusCatalogFile filerec;
1687	struct HFSPlusCatalogKey * keyp;
1688	struct proc *p = current_proc();
1689	FCB *fcb;
1690	ExtendedVCB *vcb;
1691	char filename[32];
1692	char tempname[32];
1693	size_t namelen;
1694	cat_cookie_t cookie;
1695	int catlock = 0;
1696	int catreserve = 0;
1697	int started_tr = 0;
1698	int lockflags;
1699	int result;
1700	int orphaned_files = 0;
1701	int orphaned_dirs = 0;
1702
1703	bzero(&cookie, sizeof(cookie));
1704
1705	if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1706		return;
1707
1708	vcb = HFSTOVCB(hfsmp);
1709	fcb = VTOF(hfsmp->hfs_catalog_vp);
1710
1711	btdata.bufferAddress = &filerec;
1712	btdata.itemSize = sizeof(filerec);
1713	btdata.itemCount = 1;
1714
1715	MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1716	bzero(iterator, sizeof(*iterator));
1717
1718	/* Build a key to "temp" */
1719	keyp = (HFSPlusCatalogKey*)&iterator->key;
1720	keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1721	keyp->nodeName.length = 4;  /* "temp" */
1722	keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1723	keyp->nodeName.unicode[0] = 't';
1724	keyp->nodeName.unicode[1] = 'e';
1725	keyp->nodeName.unicode[2] = 'm';
1726	keyp->nodeName.unicode[3] = 'p';
1727
1728	/*
1729	 * Position the iterator just before the first real temp file/dir.
1730	 */
1731	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1732	(void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1733	hfs_systemfile_unlock(hfsmp, lockflags);
1734
1735	/* Visit all the temp files/dirs in the HFS+ private directory. */
1736	for (;;) {
1737		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1738		result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1739		hfs_systemfile_unlock(hfsmp, lockflags);
1740		if (result)
1741			break;
1742		if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1743			break;
1744
1745		(void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1746		                      (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1747
1748		(void) snprintf(tempname, sizeof(tempname), "%s%d",
1749				HFS_DELETE_PREFIX, filerec.fileID);
1750
1751		/*
1752		 * Delete all files (and directories) named "tempxxx",
1753		 * where xxx is the file's cnid in decimal.
1754		 *
1755		 */
1756		if (bcmp(tempname, filename, namelen) == 0) {
1757   			struct filefork dfork;
1758    		struct filefork rfork;
1759  			struct cnode cnode;
1760			int mode = 0;
1761
1762			bzero(&dfork, sizeof(dfork));
1763			bzero(&rfork, sizeof(rfork));
1764			bzero(&cnode, sizeof(cnode));
1765
1766			/* Delete any attributes, ignore errors */
1767			(void) hfs_removeallattr(hfsmp, filerec.fileID);
1768
1769			if (hfs_start_transaction(hfsmp) != 0) {
1770			    printf("hfs_remove_orphans: failed to start transaction\n");
1771			    goto exit;
1772			}
1773			started_tr = 1;
1774
1775			/*
1776			 * Reserve some space in the Catalog file.
1777			 */
1778			if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1779			    printf("hfs_remove_orphans: cat_preflight failed\n");
1780				goto exit;
1781			}
1782			catreserve = 1;
1783
1784			lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1785			catlock = 1;
1786
1787			/* Build a fake cnode */
1788			cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1789			                &dfork.ff_data, &rfork.ff_data);
1790			cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1791			cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
1792			cnode.c_desc.cd_namelen = namelen;
1793			cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
1794			cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
1795
1796			/* Position iterator at previous entry */
1797			if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
1798			    NULL, NULL) != 0) {
1799				break;
1800			}
1801
1802			/* Truncate the file to zero (both forks) */
1803			if (dfork.ff_blocks > 0) {
1804				u_int64_t fsize;
1805
1806				dfork.ff_cp = &cnode;
1807				cnode.c_datafork = &dfork;
1808				cnode.c_rsrcfork = NULL;
1809				fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
1810				while (fsize > 0) {
1811				    if (fsize > HFS_BIGFILE_SIZE) {
1812						fsize -= HFS_BIGFILE_SIZE;
1813					} else {
1814						fsize = 0;
1815					}
1816
1817					if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0,
1818									  cnode.c_attr.ca_fileid, false) != 0) {
1819						printf("hfs: error truncating data fork!\n");
1820						break;
1821					}
1822
1823					//
1824					// if we're iteratively truncating this file down,
1825					// then end the transaction and start a new one so
1826					// that no one transaction gets too big.
1827					//
1828					if (fsize > 0 && started_tr) {
1829						/* Drop system file locks before starting
1830						 * another transaction to preserve lock order.
1831						 */
1832						hfs_systemfile_unlock(hfsmp, lockflags);
1833						catlock = 0;
1834						hfs_end_transaction(hfsmp);
1835
1836						if (hfs_start_transaction(hfsmp) != 0) {
1837							started_tr = 0;
1838							break;
1839						}
1840						lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1841						catlock = 1;
1842					}
1843				}
1844			}
1845
1846			if (rfork.ff_blocks > 0) {
1847				rfork.ff_cp = &cnode;
1848				cnode.c_datafork = NULL;
1849				cnode.c_rsrcfork = &rfork;
1850				if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
1851					printf("hfs: error truncating rsrc fork!\n");
1852					break;
1853				}
1854			}
1855
1856			/* Remove the file or folder record from the Catalog */
1857			if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
1858				printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
1859				hfs_systemfile_unlock(hfsmp, lockflags);
1860				catlock = 0;
1861				hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1862				break;
1863			}
1864
1865			mode = cnode.c_attr.ca_mode & S_IFMT;
1866
1867			if (mode == S_IFDIR) {
1868				orphaned_dirs++;
1869			}
1870			else {
1871				orphaned_files++;
1872			}
1873
1874			/* Update parent and volume counts */
1875			hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
1876			if (mode == S_IFDIR) {
1877				DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
1878			}
1879
1880			(void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
1881			                 &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
1882
1883			/* Drop locks and end the transaction */
1884			hfs_systemfile_unlock(hfsmp, lockflags);
1885			cat_postflight(hfsmp, &cookie, p);
1886			catlock = catreserve = 0;
1887
1888			/*
1889			   Now that Catalog is unlocked, update the volume info, making
1890			   sure to differentiate between files and directories
1891			*/
1892			if (mode == S_IFDIR) {
1893				hfs_volupdate(hfsmp, VOL_RMDIR, 0);
1894			}
1895			else{
1896 				hfs_volupdate(hfsmp, VOL_RMFILE, 0);
1897			}
1898
1899			if (started_tr) {
1900				hfs_end_transaction(hfsmp);
1901				started_tr = 0;
1902			}
1903
1904		} /* end if */
1905	} /* end for */
1906	if (orphaned_files > 0 || orphaned_dirs > 0)
1907		printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
1908exit:
1909	if (catlock) {
1910		hfs_systemfile_unlock(hfsmp, lockflags);
1911	}
1912	if (catreserve) {
1913		cat_postflight(hfsmp, &cookie, p);
1914	}
1915	if (started_tr) {
1916		hfs_end_transaction(hfsmp);
1917	}
1918
1919	FREE(iterator, M_TEMP);
1920	hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
1921}
1922
1923
1924/*
1925 * This will return the correct logical block size for a given vnode.
1926 * For most files, it is the allocation block size, for meta data like
1927 * BTrees, this is kept as part of the BTree private nodeSize
1928 */
1929u_int32_t
1930GetLogicalBlockSize(struct vnode *vp)
1931{
1932u_int32_t logBlockSize;
1933
1934	DBG_ASSERT(vp != NULL);
1935
1936	/* start with default */
1937	logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
1938
1939	if (vnode_issystem(vp)) {
1940		if (VTOF(vp)->fcbBTCBPtr != NULL) {
1941			BTreeInfoRec			bTreeInfo;
1942
1943			/*
1944			 * We do not lock the BTrees, because if we are getting block..then the tree
1945			 * should be locked in the first place.
1946			 * We just want the nodeSize wich will NEVER change..so even if the world
1947			 * is changing..the nodeSize should remain the same. Which argues why lock
1948			 * it in the first place??
1949			 */
1950
1951			(void) BTGetInformation	(VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
1952
1953			logBlockSize = bTreeInfo.nodeSize;
1954
1955		} else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1956				logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
1957		}
1958	}
1959
1960	DBG_ASSERT(logBlockSize > 0);
1961
1962	return logBlockSize;
1963}
1964
1965#if HFS_SPARSE_DEV
1966static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks)
1967{
1968	struct vfsstatfs *vfsp;  /* 272 bytes */
1969	uint64_t vfreeblks;
1970	struct timeval now;
1971
1972	hfs_lock_mount(hfsmp);
1973
1974	vnode_t backing_vp = hfsmp->hfs_backingfs_rootvp;
1975	if (!backing_vp) {
1976		hfs_unlock_mount(hfsmp);
1977		return false;
1978	}
1979
1980	// usecount is not enough; we need iocount
1981	if (vnode_get(backing_vp)) {
1982		hfs_unlock_mount(hfsmp);
1983		*pfree_blks = 0;
1984		return true;
1985	}
1986
1987	uint32_t loanedblks = hfsmp->loanedBlocks;
1988	uint32_t bandblks	= hfsmp->hfs_sparsebandblks;
1989	uint64_t maxblks	= hfsmp->hfs_backingfs_maxblocks;
1990
1991	hfs_unlock_mount(hfsmp);
1992
1993	mount_t backingfs_mp = vnode_mount(backing_vp);
1994
1995	microtime(&now);
1996	if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
1997		vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
1998		hfsmp->hfs_last_backingstatfs = now.tv_sec;
1999	}
2000
2001	if (!(vfsp = vfs_statfs(backingfs_mp))) {
2002		vnode_put(backing_vp);
2003		return false;
2004	}
2005
2006	vfreeblks = vfsp->f_bavail;
2007	/* Normalize block count if needed. */
2008	if (vfsp->f_bsize != hfsmp->blockSize)
2009		vfreeblks = vfreeblks * vfsp->f_bsize / hfsmp->blockSize;
2010	if (vfreeblks > bandblks)
2011		vfreeblks -= bandblks;
2012	else
2013		vfreeblks = 0;
2014
2015	/*
2016	 * Take into account any delayed allocations.  It is not
2017	 * certain what the original reason for the "2 *" is.  Most
2018	 * likely it is to allow for additional requirements in the
2019	 * host file system and metadata required by disk images.  The
2020	 * number of loaned blocks is likely to be small and we will
2021	 * stop using them as we get close to the limit.
2022	 */
2023	loanedblks = 2 * loanedblks;
2024	if (vfreeblks > loanedblks)
2025		vfreeblks -= loanedblks;
2026	else
2027		vfreeblks = 0;
2028
2029	if (maxblks)
2030		vfreeblks = MIN(vfreeblks, maxblks);
2031
2032	vnode_put(backing_vp);
2033
2034	*pfree_blks = vfreeblks;
2035
2036	return true;
2037}
2038#endif
2039
2040u_int32_t
2041hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
2042{
2043	u_int32_t freeblks;
2044	u_int32_t rsrvblks;
2045	u_int32_t loanblks;
2046
2047	/*
2048	 * We don't bother taking the mount lock
2049	 * to look at these values since the values
2050	 * themselves are each updated atomically
2051	 * on aligned addresses.
2052	 */
2053	freeblks = hfsmp->freeBlocks;
2054	rsrvblks = hfsmp->reserveBlocks;
2055	loanblks = hfsmp->loanedBlocks;
2056	if (wantreserve) {
2057		if (freeblks > rsrvblks)
2058			freeblks -= rsrvblks;
2059		else
2060			freeblks = 0;
2061	}
2062	if (freeblks > loanblks)
2063		freeblks -= loanblks;
2064	else
2065		freeblks = 0;
2066
2067#if HFS_SPARSE_DEV
2068	/*
2069	 * When the underlying device is sparse, check the
2070	 * available space on the backing store volume.
2071	 */
2072	uint64_t vfreeblks;
2073	if (hfs_get_backing_free_blks(hfsmp, &vfreeblks))
2074		freeblks = MIN(freeblks, vfreeblks);
2075#endif /* HFS_SPARSE_DEV */
2076
2077	if (hfsmp->hfs_flags & HFS_CS) {
2078		uint64_t cs_free_bytes;
2079		uint64_t cs_free_blks;
2080		if (VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSGETFREEBYTES,
2081		    (caddr_t)&cs_free_bytes, 0, vfs_context_kernel()) == 0) {
2082			cs_free_blks = cs_free_bytes / hfsmp->blockSize;
2083			if (cs_free_blks > loanblks)
2084				cs_free_blks -= loanblks;
2085			else
2086				cs_free_blks = 0;
2087			freeblks = MIN(cs_free_blks, freeblks);
2088		}
2089	}
2090
2091	return (freeblks);
2092}
2093
2094/*
2095 * Map HFS Common errors (negative) to BSD error codes (positive).
2096 * Positive errors (ie BSD errors) are passed through unchanged.
2097 */
2098short MacToVFSError(OSErr err)
2099{
2100	if (err >= 0)
2101        	return err;
2102
2103	/* BSD/VFS internal errnos */
2104	switch (err) {
2105		case ERESERVEDNAME: /* -8 */
2106			return err;
2107	}
2108
2109	switch (err) {
2110	case dskFulErr:			/*    -34 */
2111	case btNoSpaceAvail:		/* -32733 */
2112		return ENOSPC;
2113	case fxOvFlErr:			/* -32750 */
2114		return EOVERFLOW;
2115
2116	case btBadNode:			/* -32731 */
2117		return EIO;
2118
2119	case memFullErr:		/*  -108 */
2120		return ENOMEM;		/*   +12 */
2121
2122	case cmExists:			/* -32718 */
2123	case btExists:			/* -32734 */
2124		return EEXIST;		/*    +17 */
2125
2126	case cmNotFound:		/* -32719 */
2127	case btNotFound:		/* -32735 */
2128		return ENOENT;		/*     28 */
2129
2130	case cmNotEmpty:		/* -32717 */
2131		return ENOTEMPTY;	/*     66 */
2132
2133	case cmFThdDirErr:		/* -32714 */
2134		return EISDIR;		/*     21 */
2135
2136	case fxRangeErr:		/* -32751 */
2137		return ERANGE;
2138
2139	case bdNamErr:			/*   -37 */
2140		return ENAMETOOLONG;	/*    63 */
2141
2142	case paramErr:			/*   -50 */
2143	case fileBoundsErr:		/* -1309 */
2144		return EINVAL;		/*   +22 */
2145
2146	case fsBTBadNodeSize:
2147		return ENXIO;
2148
2149	default:
2150		return EIO;		/*   +5 */
2151	}
2152}
2153
2154
2155/*
2156 * Find the current thread's directory hint for a given index.
2157 *
2158 * Requires an exclusive lock on directory cnode.
2159 *
2160 * Use detach if the cnode lock must be dropped while the hint is still active.
2161 */
2162__private_extern__
2163directoryhint_t *
2164hfs_getdirhint(struct cnode *dcp, int index, int detach)
2165{
2166	struct timeval tv;
2167	directoryhint_t *hint;
2168	boolean_t need_remove, need_init;
2169	const u_int8_t * name;
2170
2171	microuptime(&tv);
2172
2173	/*
2174	 *  Look for an existing hint first.  If not found, create a new one (when
2175	 *  the list is not full) or recycle the oldest hint.  Since new hints are
2176	 *  always added to the head of the list, the last hint is always the
2177	 *  oldest.
2178	 */
2179	TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2180		if (hint->dh_index == index)
2181			break;
2182	}
2183	if (hint != NULL) { /* found an existing hint */
2184		need_init = false;
2185		need_remove = true;
2186	} else { /* cannot find an existing hint */
2187		need_init = true;
2188		if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
2189			/* Create a default directory hint */
2190			MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK);
2191			++dcp->c_dirhintcnt;
2192			need_remove = false;
2193		} else {				/* recycle the last (i.e., the oldest) hint */
2194			hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
2195			if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
2196			    (name = hint->dh_desc.cd_nameptr)) {
2197				hint->dh_desc.cd_nameptr = NULL;
2198				hint->dh_desc.cd_namelen = 0;
2199				hint->dh_desc.cd_flags &= ~CD_HASBUF;
2200				vfs_removename((const char *)name);
2201			}
2202			need_remove = true;
2203		}
2204	}
2205
2206	if (need_remove)
2207		TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2208
2209	if (detach)
2210		--dcp->c_dirhintcnt;
2211	else
2212		TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2213
2214	if (need_init) {
2215		hint->dh_index = index;
2216		hint->dh_desc.cd_flags = 0;
2217		hint->dh_desc.cd_encoding = 0;
2218		hint->dh_desc.cd_namelen = 0;
2219		hint->dh_desc.cd_nameptr = NULL;
2220		hint->dh_desc.cd_parentcnid = dcp->c_fileid;
2221		hint->dh_desc.cd_hint = dcp->c_childhint;
2222		hint->dh_desc.cd_cnid = 0;
2223	}
2224	hint->dh_time = tv.tv_sec;
2225	return (hint);
2226}
2227
2228/*
2229 * Release a single directory hint.
2230 *
2231 * Requires an exclusive lock on directory cnode.
2232 */
2233__private_extern__
2234void
2235hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
2236{
2237	const u_int8_t * name;
2238	directoryhint_t *hint;
2239
2240	/* Check if item is on list (could be detached) */
2241	TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2242		if (hint == relhint) {
2243			TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
2244			--dcp->c_dirhintcnt;
2245			break;
2246		}
2247	}
2248	name = relhint->dh_desc.cd_nameptr;
2249	if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2250		relhint->dh_desc.cd_nameptr = NULL;
2251		relhint->dh_desc.cd_namelen = 0;
2252		relhint->dh_desc.cd_flags &= ~CD_HASBUF;
2253		vfs_removename((const char *)name);
2254	}
2255	FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT);
2256}
2257
2258/*
2259 * Release directory hints for given directory
2260 *
2261 * Requires an exclusive lock on directory cnode.
2262 */
2263__private_extern__
2264void
2265hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
2266{
2267	struct timeval tv;
2268	directoryhint_t *hint, *prev;
2269	const u_int8_t * name;
2270
2271	if (stale_hints_only)
2272		microuptime(&tv);
2273
2274	/* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
2275	for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
2276		if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
2277			break;  /* stop here if this entry is too new */
2278		name = hint->dh_desc.cd_nameptr;
2279		if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2280			hint->dh_desc.cd_nameptr = NULL;
2281			hint->dh_desc.cd_namelen = 0;
2282			hint->dh_desc.cd_flags &= ~CD_HASBUF;
2283			vfs_removename((const char *)name);
2284		}
2285		prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
2286		TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2287		FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT);
2288		--dcp->c_dirhintcnt;
2289	}
2290}
2291
2292/*
2293 * Insert a detached directory hint back into the list of dirhints.
2294 *
2295 * Requires an exclusive lock on directory cnode.
2296 */
2297__private_extern__
2298void
2299hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
2300{
2301	directoryhint_t *test;
2302
2303	TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
2304		if (test == hint)
2305			panic("hfs_insertdirhint: hint %p already on list!", hint);
2306	}
2307
2308	TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2309	++dcp->c_dirhintcnt;
2310}
2311
2312/*
2313 * Perform a case-insensitive compare of two UTF-8 filenames.
2314 *
2315 * Returns 0 if the strings match.
2316 */
2317__private_extern__
2318int
2319hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
2320{
2321	u_int16_t *ustr1, *ustr2;
2322	size_t ulen1, ulen2;
2323	size_t maxbytes;
2324	int cmp = -1;
2325
2326	if (len1 != len2)
2327		return (cmp);
2328
2329	maxbytes = kHFSPlusMaxFileNameChars << 1;
2330	MALLOC(ustr1, u_int16_t *, maxbytes << 1, M_TEMP, M_WAITOK);
2331	ustr2 = ustr1 + (maxbytes >> 1);
2332
2333	if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
2334		goto out;
2335	if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
2336		goto out;
2337
2338	cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
2339out:
2340	FREE(ustr1, M_TEMP);
2341	return (cmp);
2342}
2343
2344
2345typedef struct jopen_cb_info {
2346	off_t   jsize;
2347	char   *desired_uuid;
2348        struct  vnode *jvp;
2349	size_t  blksize;
2350	int     need_clean;
2351	int     need_init;
2352} jopen_cb_info;
2353
2354static int
2355journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
2356{
2357	struct nameidata nd;
2358	jopen_cb_info *ji = (jopen_cb_info *)arg;
2359	char bsd_name[256];
2360	int error;
2361
2362	strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
2363	strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
2364
2365	if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
2366		return 1;   // keep iterating
2367	}
2368
2369	// if we're here, either the desired uuid matched or there was no
2370	// desired uuid so let's try to open the device for writing and
2371	// see if it works.  if it does, we'll use it.
2372
2373	NDINIT(&nd, LOOKUP, OP_LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
2374	if ((error = namei(&nd))) {
2375		printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
2376		return 1;   // keep iterating
2377	}
2378
2379	ji->jvp = nd.ni_vp;
2380	nameidone(&nd);
2381
2382	if (ji->jvp == NULL) {
2383		printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
2384	} else {
2385		error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2386		if (error == 0) {
2387			// if the journal is dirty and we didn't specify a desired
2388			// journal device uuid, then do not use the journal.  but
2389			// if the journal is just invalid (e.g. it hasn't been
2390			// initialized) then just set the need_init flag.
2391			if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2392				error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
2393				if (error == EBUSY) {
2394					VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2395					vnode_put(ji->jvp);
2396					ji->jvp = NULL;
2397					return 1;    // keep iterating
2398				} else if (error == EINVAL) {
2399					ji->need_init = 1;
2400				}
2401			}
2402
2403			if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2404				strlcpy(ji->desired_uuid, uuid_str, 128);
2405			}
2406			vnode_setmountedon(ji->jvp);
2407			return 0;   // stop iterating
2408		} else {
2409			vnode_put(ji->jvp);
2410			ji->jvp = NULL;
2411		}
2412	}
2413
2414	return 1;   // keep iterating
2415}
2416
2417extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
2418kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
2419
2420
2421static vnode_t
2422open_journal_dev(const char *vol_device,
2423		 int need_clean,
2424		 char *uuid_str,
2425		 char *machine_serial_num,
2426		 off_t jsize,
2427		 size_t blksize,
2428		 int *need_init)
2429{
2430    int retry_counter=0;
2431    jopen_cb_info ji;
2432
2433    ji.jsize        = jsize;
2434    ji.desired_uuid = uuid_str;
2435    ji.jvp          = NULL;
2436    ji.blksize      = blksize;
2437    ji.need_clean   = need_clean;
2438    ji.need_init    = 0;
2439
2440//    if (uuid_str[0] == '\0') {
2441//	    printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
2442//    } else {
2443//	    printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
2444//    }
2445    while (ji.jvp == NULL && retry_counter++ < 4) {
2446	    if (retry_counter > 1) {
2447		    if (uuid_str[0]) {
2448			    printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
2449		    } else {
2450			    printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
2451		    }
2452		    delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
2453	    }
2454
2455	    IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
2456    }
2457
2458    if (ji.jvp == NULL) {
2459	    printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
2460		   vol_device, uuid_str, machine_serial_num);
2461    }
2462
2463    *need_init = ji.need_init;
2464
2465    return ji.jvp;
2466}
2467
2468
2469int
2470hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
2471					   void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
2472					   HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
2473{
2474	JournalInfoBlock *jibp;
2475	struct buf       *jinfo_bp, *bp;
2476	int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2477	int               retval, write_jibp = 0;
2478	uint32_t		  blksize = hfsmp->hfs_logical_block_size;
2479	struct vnode     *devvp;
2480	struct hfs_mount_args *args = _args;
2481	u_int32_t	  jib_flags;
2482	u_int64_t	  jib_offset;
2483	u_int64_t	  jib_size;
2484	const char *dev_name;
2485
2486	devvp = hfsmp->hfs_devvp;
2487	dev_name = vnode_getname_printable(devvp);
2488
2489	if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2490		arg_flags  = args->journal_flags;
2491		arg_tbufsz = args->journal_tbuffer_size;
2492	}
2493
2494	sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
2495
2496	jinfo_bp = NULL;
2497	retval = (int)buf_meta_bread(devvp,
2498						(daddr64_t)((embeddedOffset/blksize) +
2499						((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2500						hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
2501	if (retval) {
2502		if (jinfo_bp) {
2503			buf_brelse(jinfo_bp);
2504		}
2505		goto cleanup_dev_name;
2506	}
2507
2508	jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2509	jib_flags  = SWAP_BE32(jibp->flags);
2510	jib_size   = SWAP_BE64(jibp->size);
2511
2512	if (jib_flags & kJIJournalInFSMask) {
2513		hfsmp->jvp = hfsmp->hfs_devvp;
2514		jib_offset = SWAP_BE64(jibp->offset);
2515	} else {
2516	    int need_init=0;
2517
2518	    // if the volume was unmounted cleanly then we'll pick any
2519	    // available external journal partition
2520	    //
2521	    if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2522		    *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2523	    }
2524
2525	    hfsmp->jvp = open_journal_dev(dev_name,
2526					  !(jib_flags & kJIJournalNeedInitMask),
2527					  (char *)&jibp->ext_jnl_uuid[0],
2528					  (char *)&jibp->machine_serial_num[0],
2529					  jib_size,
2530					  hfsmp->hfs_logical_block_size,
2531					  &need_init);
2532	    if (hfsmp->jvp == NULL) {
2533		    buf_brelse(jinfo_bp);
2534		    retval = EROFS;
2535		    goto cleanup_dev_name;
2536	    } else {
2537		    if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2538			    strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2539		    }
2540	    }
2541
2542	    jib_offset = 0;
2543	    write_jibp = 1;
2544	    if (need_init) {
2545		    jib_flags |= kJIJournalNeedInitMask;
2546	    }
2547	}
2548
2549	// save this off for the hack-y check in hfs_remove()
2550	hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2551	hfsmp->jnl_size  = jib_size;
2552
2553	if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2554	    // if the file system is read-only, check if the journal is empty.
2555	    // if it is, then we can allow the mount.  otherwise we have to
2556	    // return failure.
2557	    retval = journal_is_clean(hfsmp->jvp,
2558				      jib_offset + embeddedOffset,
2559				      jib_size,
2560				      devvp,
2561				      hfsmp->hfs_logical_block_size);
2562
2563	    hfsmp->jnl = NULL;
2564
2565	    buf_brelse(jinfo_bp);
2566
2567	    if (retval) {
2568		    const char *name = vnode_getname_printable(devvp);
2569		    printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2570		    name);
2571		    vnode_putname_printable(name);
2572	    }
2573
2574	    goto cleanup_dev_name;
2575	}
2576
2577	if (jib_flags & kJIJournalNeedInitMask) {
2578		printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2579			   jib_offset + embeddedOffset, jib_size);
2580		hfsmp->jnl = journal_create(hfsmp->jvp,
2581									jib_offset + embeddedOffset,
2582									jib_size,
2583									devvp,
2584									blksize,
2585									arg_flags,
2586									arg_tbufsz,
2587									hfs_sync_metadata, hfsmp->hfs_mp,
2588									hfsmp->hfs_mp);
2589		if (hfsmp->jnl)
2590			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2591
2592		// no need to start a transaction here... if this were to fail
2593		// we'd just re-init it on the next mount.
2594		jib_flags &= ~kJIJournalNeedInitMask;
2595		jibp->flags  = SWAP_BE32(jib_flags);
2596		buf_bwrite(jinfo_bp);
2597		jinfo_bp = NULL;
2598		jibp     = NULL;
2599	} else {
2600		//printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2601		//	   jib_offset + embeddedOffset,
2602		//	   jib_size, SWAP_BE32(vhp->blockSize));
2603
2604		hfsmp->jnl = journal_open(hfsmp->jvp,
2605								  jib_offset + embeddedOffset,
2606								  jib_size,
2607								  devvp,
2608								  blksize,
2609								  arg_flags,
2610								  arg_tbufsz,
2611								  hfs_sync_metadata, hfsmp->hfs_mp,
2612								  hfsmp->hfs_mp);
2613		if (hfsmp->jnl)
2614			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2615
2616		if (write_jibp) {
2617			buf_bwrite(jinfo_bp);
2618		} else {
2619			buf_brelse(jinfo_bp);
2620		}
2621		jinfo_bp = NULL;
2622		jibp     = NULL;
2623
2624		if (hfsmp->jnl && mdbp) {
2625			// reload the mdb because it could have changed
2626			// if the journal had to be replayed.
2627			if (mdb_offset == 0) {
2628				mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2629			}
2630			bp = NULL;
2631			retval = (int)buf_meta_bread(devvp,
2632					HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2633					hfsmp->hfs_physical_block_size, cred, &bp);
2634			if (retval) {
2635				if (bp) {
2636					buf_brelse(bp);
2637				}
2638				printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2639					   retval);
2640				goto cleanup_dev_name;
2641			}
2642			bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2643			buf_brelse(bp);
2644			bp = NULL;
2645		}
2646	}
2647
2648	// if we expected the journal to be there and we couldn't
2649	// create it or open it then we have to bail out.
2650	if (hfsmp->jnl == NULL) {
2651		printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2652		retval = EINVAL;
2653		goto cleanup_dev_name;
2654	}
2655
2656	retval = 0;
2657
2658cleanup_dev_name:
2659	vnode_putname_printable(dev_name);
2660	return retval;
2661}
2662
2663
2664//
2665// This function will go and re-locate the .journal_info_block and
2666// the .journal files in case they moved (which can happen if you
2667// run Norton SpeedDisk).  If we fail to find either file we just
2668// disable journaling for this volume and return.  We turn off the
2669// journaling bit in the vcb and assume it will get written to disk
2670// later (if it doesn't on the next mount we'd do the same thing
2671// again which is harmless).  If we disable journaling we don't
2672// return an error so that the volume is still mountable.
2673//
2674// If the info we find for the .journal_info_block and .journal files
2675// isn't what we had stored, we re-set our cached info and proceed
2676// with opening the journal normally.
2677//
2678static int
2679hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2680{
2681	JournalInfoBlock *jibp;
2682	struct buf       *jinfo_bp;
2683	int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2684	int               retval, write_jibp = 0, recreate_journal = 0;
2685	struct vnode     *devvp;
2686	struct cat_attr   jib_attr, jattr;
2687	struct cat_fork   jib_fork, jfork;
2688	ExtendedVCB      *vcb;
2689	u_int32_t            fid;
2690	struct hfs_mount_args *args = _args;
2691	u_int32_t	  jib_flags;
2692	u_int64_t	  jib_offset;
2693	u_int64_t	  jib_size;
2694
2695	devvp = hfsmp->hfs_devvp;
2696	vcb = HFSTOVCB(hfsmp);
2697
2698	if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2699		if (args->journal_disable) {
2700			return 0;
2701		}
2702
2703		arg_flags  = args->journal_flags;
2704		arg_tbufsz = args->journal_tbuffer_size;
2705	}
2706
2707	fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2708	if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2709		printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2710			   jib_fork.cf_extents[0].startBlock);
2711		vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2712		return 0;
2713	}
2714	hfsmp->hfs_jnlinfoblkid = fid;
2715
2716	// make sure the journal_info_block begins where we think it should.
2717	if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2718		printf("hfs: The journal_info_block moved (was: %d; is: %d).  Fixing up\n",
2719			   SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2720
2721		vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
2722		vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2723		recreate_journal = 1;
2724	}
2725
2726
2727	sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2728	jinfo_bp = NULL;
2729	retval = (int)buf_meta_bread(devvp,
2730						(vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2731						((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2732						hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2733	if (retval) {
2734		if (jinfo_bp) {
2735			buf_brelse(jinfo_bp);
2736		}
2737		printf("hfs: can't read journal info block. disabling journaling.\n");
2738		vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2739		return 0;
2740	}
2741
2742	jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2743	jib_flags  = SWAP_BE32(jibp->flags);
2744	jib_offset = SWAP_BE64(jibp->offset);
2745	jib_size   = SWAP_BE64(jibp->size);
2746
2747	fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2748	if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2749		printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2750			   jfork.cf_extents[0].startBlock);
2751		buf_brelse(jinfo_bp);
2752		vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2753		return 0;
2754	}
2755	hfsmp->hfs_jnlfileid = fid;
2756
2757	// make sure the journal file begins where we think it should.
2758	if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2759		printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
2760			   (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2761
2762		jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2763		write_jibp   = 1;
2764		recreate_journal = 1;
2765	}
2766
2767	// check the size of the journal file.
2768	if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2769		printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
2770			   jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2771
2772		jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2773		write_jibp = 1;
2774		recreate_journal = 1;
2775	}
2776
2777	if (jib_flags & kJIJournalInFSMask) {
2778		hfsmp->jvp = hfsmp->hfs_devvp;
2779		jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
2780	} else {
2781	    const char *dev_name;
2782	    int need_init = 0;
2783
2784	    dev_name = vnode_getname_printable(devvp);
2785
2786            // since the journal is empty, just use any available external journal
2787	    *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2788
2789	    // this fills in the uuid of the device we actually get
2790	    hfsmp->jvp = open_journal_dev(dev_name,
2791					  !(jib_flags & kJIJournalNeedInitMask),
2792					  (char *)&jibp->ext_jnl_uuid[0],
2793					  (char *)&jibp->machine_serial_num[0],
2794					  jib_size,
2795					  hfsmp->hfs_logical_block_size,
2796					  &need_init);
2797	    if (hfsmp->jvp == NULL) {
2798		    buf_brelse(jinfo_bp);
2799		    vnode_putname_printable(dev_name);
2800		    return EROFS;
2801	    } else {
2802		    if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2803			    strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
2804		    }
2805	    }
2806	    jib_offset = 0;
2807	    recreate_journal = 1;
2808	    write_jibp = 1;
2809	    if (need_init) {
2810		    jib_flags |= kJIJournalNeedInitMask;
2811	    }
2812	    vnode_putname_printable(dev_name);
2813	}
2814
2815	// save this off for the hack-y check in hfs_remove()
2816	hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2817	hfsmp->jnl_size  = jib_size;
2818
2819	if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2820	    // if the file system is read-only, check if the journal is empty.
2821	    // if it is, then we can allow the mount.  otherwise we have to
2822	    // return failure.
2823	    retval = journal_is_clean(hfsmp->jvp,
2824				      jib_offset,
2825				      jib_size,
2826				      devvp,
2827		                      hfsmp->hfs_logical_block_size);
2828
2829	    hfsmp->jnl = NULL;
2830
2831	    buf_brelse(jinfo_bp);
2832
2833	    if (retval) {
2834		    const char *name = vnode_getname_printable(devvp);
2835		    printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2836		    name);
2837		    vnode_putname_printable(name);
2838	    }
2839
2840	    return retval;
2841	}
2842
2843	if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
2844		printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2845			   jib_offset, jib_size);
2846		hfsmp->jnl = journal_create(hfsmp->jvp,
2847									jib_offset,
2848									jib_size,
2849									devvp,
2850									hfsmp->hfs_logical_block_size,
2851									arg_flags,
2852									arg_tbufsz,
2853									hfs_sync_metadata, hfsmp->hfs_mp,
2854									hfsmp->hfs_mp);
2855		if (hfsmp->jnl)
2856			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2857
2858		// no need to start a transaction here... if this were to fail
2859		// we'd just re-init it on the next mount.
2860		jib_flags &= ~kJIJournalNeedInitMask;
2861		write_jibp   = 1;
2862
2863	} else {
2864		//
2865		// if we weren't the last person to mount this volume
2866		// then we need to throw away the journal because it
2867		// is likely that someone else mucked with the disk.
2868		// if the journal is empty this is no big deal.  if the
2869		// disk is dirty this prevents us from replaying the
2870		// journal over top of changes that someone else made.
2871		//
2872		arg_flags |= JOURNAL_RESET;
2873
2874		//printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2875		//	   jib_offset,
2876		//	   jib_size, SWAP_BE32(vhp->blockSize));
2877
2878		hfsmp->jnl = journal_open(hfsmp->jvp,
2879								  jib_offset,
2880								  jib_size,
2881								  devvp,
2882								  hfsmp->hfs_logical_block_size,
2883								  arg_flags,
2884								  arg_tbufsz,
2885								  hfs_sync_metadata, hfsmp->hfs_mp,
2886								  hfsmp->hfs_mp);
2887		if (hfsmp->jnl)
2888			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2889	}
2890
2891
2892	if (write_jibp) {
2893		jibp->flags  = SWAP_BE32(jib_flags);
2894		jibp->offset = SWAP_BE64(jib_offset);
2895		jibp->size   = SWAP_BE64(jib_size);
2896
2897		buf_bwrite(jinfo_bp);
2898	} else {
2899		buf_brelse(jinfo_bp);
2900	}
2901	jinfo_bp = NULL;
2902	jibp     = NULL;
2903
2904	// if we expected the journal to be there and we couldn't
2905	// create it or open it then we have to bail out.
2906	if (hfsmp->jnl == NULL) {
2907		printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
2908		return EINVAL;
2909	}
2910
2911	return 0;
2912}
2913
2914/*
2915 * Calculate the allocation zone for metadata.
2916 *
2917 * This zone includes the following:
2918 *	Allocation Bitmap file
2919 *	Overflow Extents file
2920 *	Journal file
2921 *	Quota files
2922 *	Clustered Hot files
2923 *	Catalog file
2924 *
2925 *                          METADATA ALLOCATION ZONE
2926 * ____________________________________________________________________________
2927 * |    |    |     |               |                              |           |
2928 * | BM | JF | OEF |    CATALOG    |--->                          | HOT FILES |
2929 * |____|____|_____|_______________|______________________________|___________|
2930 *
2931 * <------------------------------- N * 128 MB ------------------------------->
2932 *
2933 */
2934#define GIGABYTE  (u_int64_t)(1024*1024*1024)
2935
2936#define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
2937#define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
2938#define JOURNAL_DEFAULT_SIZE  (8*1024*1024)
2939#define JOURNAL_MAXIMUM_SIZE  (512*1024*1024)
2940#define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
2941#define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
2942
2943/* Initialize the metadata zone.
2944 *
2945 * If the size of  the volume is less than the minimum size for
2946 * metadata zone, metadata zone is disabled.
2947 *
2948 * If disable is true, disable metadata zone unconditionally.
2949 */
2950void
2951hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
2952{
2953	ExtendedVCB  *vcb;
2954	u_int64_t  fs_size;
2955	u_int64_t  zonesize;
2956	u_int64_t  temp;
2957	u_int64_t  filesize;
2958	u_int32_t  blk;
2959	int  items, really_do_it=1;
2960
2961	vcb = HFSTOVCB(hfsmp);
2962	fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
2963
2964	/*
2965	 * For volumes less than 10 GB, don't bother.
2966	 */
2967	if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
2968		really_do_it = 0;
2969	}
2970
2971	/*
2972	 * Skip non-journaled volumes as well.
2973	 */
2974	if (hfsmp->jnl == NULL) {
2975		really_do_it = 0;
2976	}
2977
2978	/* If caller wants to disable metadata zone, do it */
2979	if (disable == true) {
2980		really_do_it = 0;
2981	}
2982
2983	/*
2984	 * Start with space for the boot blocks and Volume Header.
2985	 * 1536 = byte offset from start of volume to end of volume header:
2986	 * 1024 bytes is the offset from the start of the volume to the
2987	 * start of the volume header (defined by the volume format)
2988	 * + 512 bytes (the size of the volume header).
2989	 */
2990	zonesize = roundup(1536, hfsmp->blockSize);
2991
2992	/*
2993	 * Add the on-disk size of allocation bitmap.
2994	 */
2995	zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2996
2997	/*
2998	 * Add space for the Journal Info Block and Journal (if they're in
2999	 * this file system).
3000	 */
3001	if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
3002		zonesize += hfsmp->blockSize + hfsmp->jnl_size;
3003	}
3004
3005	/*
3006	 * Add the existing size of the Extents Overflow B-tree.
3007	 * (It rarely grows, so don't bother reserving additional room for it.)
3008	 */
3009	zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
3010
3011	/*
3012	 * If there is an Attributes B-tree, leave room for 11 clumps worth.
3013	 * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
3014	 * When installing a full OS install onto a 20GB volume, we use
3015	 * 7 to 8 clumps worth of space (depending on packages), so that leaves
3016	 * us with another 3 or 4 clumps worth before we need another extent.
3017	 */
3018	if (hfsmp->hfs_attribute_cp) {
3019		zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
3020	}
3021
3022	/*
3023	 * Leave room for 11 clumps of the Catalog B-tree.
3024	 * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
3025	 * When installing a full OS install onto a 20GB volume, we use
3026	 * 7 to 8 clumps worth of space (depending on packages), so that leaves
3027	 * us with another 3 or 4 clumps worth before we need another extent.
3028	 */
3029	zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
3030
3031	/*
3032	 * Add space for hot file region.
3033	 *
3034	 * ...for now, use 5 MB per 1 GB (0.5 %)
3035	 */
3036	filesize = (fs_size / 1024) * 5;
3037	if (filesize > HOTBAND_MAXIMUM_SIZE)
3038		filesize = HOTBAND_MAXIMUM_SIZE;
3039	else if (filesize < HOTBAND_MINIMUM_SIZE)
3040		filesize = HOTBAND_MINIMUM_SIZE;
3041	/*
3042	 * Calculate user quota file requirements.
3043	 */
3044	if (hfsmp->hfs_flags & HFS_QUOTAS) {
3045		items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
3046		if (items < QF_MIN_USERS)
3047			items = QF_MIN_USERS;
3048		else if (items > QF_MAX_USERS)
3049			items = QF_MAX_USERS;
3050		if (!powerof2(items)) {
3051			int x = items;
3052			items = 4;
3053			while (x>>1 != 1) {
3054				x = x >> 1;
3055				items = items << 1;
3056			}
3057		}
3058		filesize += (items + 1) * sizeof(struct dqblk);
3059		/*
3060		 * Calculate group quota file requirements.
3061		 *
3062		 */
3063		items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
3064		if (items < QF_MIN_GROUPS)
3065			items = QF_MIN_GROUPS;
3066		else if (items > QF_MAX_GROUPS)
3067			items = QF_MAX_GROUPS;
3068		if (!powerof2(items)) {
3069			int x = items;
3070			items = 4;
3071			while (x>>1 != 1) {
3072				x = x >> 1;
3073				items = items << 1;
3074			}
3075		}
3076		filesize += (items + 1) * sizeof(struct dqblk);
3077	}
3078	zonesize += filesize;
3079
3080	/*
3081	 * Round up entire zone to a bitmap block's worth.
3082	 * The extra space goes to the catalog file and hot file area.
3083	 */
3084	temp = zonesize;
3085	zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
3086	hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
3087	/*
3088	 * If doing the round up for hfs_min_alloc_start would push us past
3089	 * allocLimit, then just reset it back to 0.  Though using a value
3090	 * bigger than allocLimit would not cause damage in the block allocator
3091	 * code, this value could get stored in the volume header and make it out
3092	 * to disk, making the volume header technically corrupt.
3093	 */
3094	if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
3095		hfsmp->hfs_min_alloc_start = 0;
3096	}
3097
3098	if (really_do_it == 0) {
3099		/* If metadata zone needs to be disabled because the
3100		 * volume was truncated, clear the bit and zero out
3101		 * the values that are no longer needed.
3102		 */
3103		if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3104			/* Disable metadata zone */
3105			hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
3106
3107			/* Zero out mount point values that are not required */
3108			hfsmp->hfs_catalog_maxblks = 0;
3109			hfsmp->hfs_hotfile_maxblks = 0;
3110			hfsmp->hfs_hotfile_start = 0;
3111			hfsmp->hfs_hotfile_end = 0;
3112			hfsmp->hfs_hotfile_freeblks = 0;
3113			hfsmp->hfs_metazone_start = 0;
3114			hfsmp->hfs_metazone_end = 0;
3115		}
3116
3117		return;
3118	}
3119
3120	temp = zonesize - temp;  /* temp has extra space */
3121	filesize += temp / 3;
3122	hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
3123
3124	hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
3125
3126	/* Convert to allocation blocks. */
3127	blk = zonesize / vcb->blockSize;
3128
3129	/* The default metadata zone location is at the start of volume. */
3130	hfsmp->hfs_metazone_start = 1;
3131	hfsmp->hfs_metazone_end = blk - 1;
3132
3133	/* The default hotfile area is at the end of the zone. */
3134	if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
3135		hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
3136		hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
3137		hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
3138	}
3139	else {
3140		hfsmp->hfs_hotfile_start = 0;
3141		hfsmp->hfs_hotfile_end = 0;
3142		hfsmp->hfs_hotfile_freeblks = 0;
3143	}
3144#if 0
3145	printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
3146	printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
3147	printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
3148#endif
3149	hfsmp->hfs_flags |= HFS_METADATA_ZONE;
3150}
3151
3152
3153static u_int32_t
3154hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
3155{
3156	ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
3157	int  lockflags;
3158	int  freeblocks;
3159
3160	lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3161	freeblocks = MetaZoneFreeBlocks(vcb);
3162	hfs_systemfile_unlock(hfsmp, lockflags);
3163
3164	/* Minus Extents overflow file reserve. */
3165	freeblocks -=
3166		hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
3167	/* Minus catalog file reserve. */
3168	freeblocks -=
3169		hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
3170	if (freeblocks < 0)
3171		freeblocks = 0;
3172
3173	return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
3174}
3175
3176/*
3177 * Determine if a file is a "virtual" metadata file.
3178 * This includes journal and quota files.
3179 */
3180int
3181hfs_virtualmetafile(struct cnode *cp)
3182{
3183	const char * filename;
3184
3185
3186	if (cp->c_parentcnid != kHFSRootFolderID)
3187		return (0);
3188
3189	filename = (const char *)cp->c_desc.cd_nameptr;
3190	if (filename == NULL)
3191		return (0);
3192
3193	if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
3194	    (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
3195	    (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
3196	    (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
3197	    (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
3198		return (1);
3199
3200	return (0);
3201}
3202
3203__private_extern__
3204void hfs_syncer_lock(struct hfsmount *hfsmp)
3205{
3206    hfs_lock_mount(hfsmp);
3207}
3208
3209__private_extern__
3210void hfs_syncer_unlock(struct hfsmount *hfsmp)
3211{
3212    hfs_unlock_mount(hfsmp);
3213}
3214
3215__private_extern__
3216void hfs_syncer_wait(struct hfsmount *hfsmp)
3217{
3218    msleep(&hfsmp->hfs_sync_incomplete, &hfsmp->hfs_mutex, PWAIT,
3219           "hfs_syncer_wait", NULL);
3220}
3221
3222__private_extern__
3223void hfs_syncer_wakeup(struct hfsmount *hfsmp)
3224{
3225    wakeup(&hfsmp->hfs_sync_incomplete);
3226}
3227
3228__private_extern__
3229uint64_t hfs_usecs_to_deadline(uint64_t usecs)
3230{
3231    uint64_t deadline;
3232    clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
3233    return deadline;
3234}
3235
3236__private_extern__
3237void hfs_syncer_queue(thread_call_t syncer)
3238{
3239    if (thread_call_enter_delayed_with_leeway(syncer,
3240                                              NULL,
3241                                              hfs_usecs_to_deadline(HFS_META_DELAY),
3242                                              0,
3243                                              THREAD_CALL_DELAY_SYS_BACKGROUND)) {
3244		printf("hfs: syncer already scheduled!\n");
3245    }
3246}
3247
3248//
3249// Fire off a timed callback to sync the disk if the
3250// volume is on ejectable media.
3251//
3252 __private_extern__
3253void
3254hfs_sync_ejectable(struct hfsmount *hfsmp)
3255{
3256    // If we don't have a syncer or we get called by the syncer, just return
3257    if (!hfsmp->hfs_syncer || current_thread() == hfsmp->hfs_syncer_thread)
3258        return;
3259
3260    hfs_syncer_lock(hfsmp);
3261
3262    if (!timerisset(&hfsmp->hfs_sync_req_oldest))
3263        microuptime(&hfsmp->hfs_sync_req_oldest);
3264
3265    /* If hfs_unmount is running, it will set hfs_syncer to NULL. Also we
3266       don't want to queue again if there is a sync outstanding. */
3267    if (!hfsmp->hfs_syncer || hfsmp->hfs_sync_incomplete) {
3268        hfs_syncer_unlock(hfsmp);
3269        return;
3270    }
3271
3272    hfsmp->hfs_sync_incomplete = TRUE;
3273
3274    thread_call_t syncer = hfsmp->hfs_syncer;
3275
3276    hfs_syncer_unlock(hfsmp);
3277
3278    hfs_syncer_queue(syncer);
3279}
3280
3281int
3282hfs_start_transaction(struct hfsmount *hfsmp)
3283{
3284	int ret = 0, unlock_on_err = 0;
3285	thread_t thread = current_thread();
3286
3287#ifdef HFS_CHECK_LOCK_ORDER
3288	/*
3289	 * You cannot start a transaction while holding a system
3290	 * file lock. (unless the transaction is nested.)
3291	 */
3292	if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
3293		if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
3294			panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
3295		}
3296		if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
3297			panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
3298		}
3299		if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
3300			panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
3301		}
3302	}
3303#endif /* HFS_CHECK_LOCK_ORDER */
3304
3305	if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
3306		/*
3307		 * The global lock should be held shared if journal is
3308		 * active to prevent disabling.  If we're not the owner
3309		 * of the journal lock, verify that we're not already
3310		 * holding the global lock exclusive before moving on.
3311		 */
3312		if (hfsmp->hfs_global_lockowner == thread) {
3313			ret = EBUSY;
3314			goto out;
3315		}
3316
3317		hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3318		OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3319		unlock_on_err = 1;
3320	}
3321
3322	/* If a downgrade to read-only mount is in progress, no other
3323	 * thread than the downgrade thread is allowed to modify
3324	 * the file system.
3325	 */
3326	if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
3327	    hfsmp->hfs_downgrading_thread != thread) {
3328		ret = EROFS;
3329		goto out;
3330	}
3331
3332	if (hfsmp->jnl) {
3333		ret = journal_start_transaction(hfsmp->jnl);
3334		if (ret == 0) {
3335			OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
3336		}
3337	} else {
3338		ret = 0;
3339	}
3340
3341out:
3342	if (ret != 0 && unlock_on_err) {
3343		hfs_unlock_global (hfsmp);
3344		OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3345	}
3346
3347    return ret;
3348}
3349
3350int
3351hfs_end_transaction(struct hfsmount *hfsmp)
3352{
3353    int need_unlock=0, ret;
3354
3355    if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread()
3356	    && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
3357	    need_unlock = 1;
3358    }
3359
3360	if (hfsmp->jnl) {
3361		ret = journal_end_transaction(hfsmp->jnl);
3362	} else {
3363		ret = 0;
3364	}
3365
3366	if (need_unlock) {
3367		OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3368		hfs_unlock_global (hfsmp);
3369		hfs_sync_ejectable(hfsmp);
3370	}
3371
3372    return ret;
3373}
3374
3375
3376void
3377hfs_journal_lock(struct hfsmount *hfsmp)
3378{
3379	/* Only peek at hfsmp->jnl while holding the global lock */
3380	hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3381	if (hfsmp->jnl) {
3382		journal_lock(hfsmp->jnl);
3383	}
3384	hfs_unlock_global (hfsmp);
3385}
3386
3387void
3388hfs_journal_unlock(struct hfsmount *hfsmp)
3389{
3390	/* Only peek at hfsmp->jnl while holding the global lock */
3391	hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3392	if (hfsmp->jnl) {
3393		journal_unlock(hfsmp->jnl);
3394	}
3395	hfs_unlock_global (hfsmp);
3396}
3397
3398/*
3399 * Flush the contents of the journal to the disk.
3400 *
3401 *  Input:
3402 *  	wait_for_IO -
3403 *  	If TRUE, wait to write in-memory journal to the disk
3404 *  	consistently, and also wait to write all asynchronous
3405 *  	metadata blocks to its corresponding locations
3406 *  	consistently on the disk.  This means that the journal
3407 *  	is empty at this point and does not contain any
3408 *  	transactions.  This is overkill in normal scenarios
3409 *  	but is useful whenever the metadata blocks are required
3410 *  	to be consistent on-disk instead of just the journal
3411 *  	being consistent; like before live verification
3412 *  	and live volume resizing.
3413 *
3414 *  	If FALSE, only wait to write in-memory journal to the
3415 *  	disk consistently.  This means that the journal still
3416 *  	contains uncommitted transactions and the file system
3417 *  	metadata blocks in the journal transactions might be
3418 *  	written asynchronously to the disk.  But there is no
3419 *  	guarantee that they are written to the disk before
3420 *  	returning to the caller.  Note that this option is
3421 *  	sufficient for file system data integrity as it
3422 *  	guarantees consistent journal content on the disk.
3423 */
3424int
3425hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO)
3426{
3427	int ret;
3428
3429	/* Only peek at hfsmp->jnl while holding the global lock */
3430	hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3431	if (hfsmp->jnl) {
3432		ret = journal_flush(hfsmp->jnl, wait_for_IO);
3433	} else {
3434		ret = 0;
3435	}
3436	hfs_unlock_global (hfsmp);
3437
3438	return ret;
3439}
3440
3441
3442/*
3443 * hfs_erase_unused_nodes
3444 *
3445 * Check wheter a volume may suffer from unused Catalog B-tree nodes that
3446 * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
3447 * zeroes to the unused nodes.
3448 *
3449 * How do we detect when a volume needs this repair?  We can't always be
3450 * certain.  If a volume was created after a certain date, then it may have
3451 * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
3452 * clump, we can assume that if a Catalog B-tree is larger than its clump size,
3453 * that means that the entire first clump must have been written to, which means
3454 * there shouldn't be unused and unwritten nodes in that first clump, and this
3455 * repair is not needed.
3456 *
3457 * We have defined a bit in the Volume Header's attributes to indicate when the
3458 * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
3459 * As will fsck_hfs when it repairs the unused nodes.
3460 */
3461int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
3462{
3463	int result;
3464	struct filefork *catalog;
3465	int lockflags;
3466
3467	if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
3468	{
3469		/* This volume has already been checked and repaired. */
3470		return 0;
3471	}
3472
3473	if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
3474	{
3475		/* This volume is too old to have had the problem. */
3476		hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3477		return 0;
3478	}
3479
3480	catalog = hfsmp->hfs_catalog_cp->c_datafork;
3481	if (catalog->ff_size > catalog->ff_clumpsize)
3482	{
3483		/* The entire first clump must have been in use at some point. */
3484		hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3485		return 0;
3486	}
3487
3488	/*
3489	 * If we get here, we need to zero out those unused nodes.
3490	 *
3491	 * We start a transaction and lock the catalog since we're going to be
3492	 * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
3493	 * do its writing via the journal, because that would be too much I/O
3494	 * to fit in a transaction, and it's a pain to break it up into multiple
3495	 * transactions.  (It behaves more like growing a B-tree would.)
3496	 */
3497	printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
3498	result = hfs_start_transaction(hfsmp);
3499	if (result)
3500		goto done;
3501	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3502	result = BTZeroUnusedNodes(catalog);
3503	vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
3504	hfs_systemfile_unlock(hfsmp, lockflags);
3505	hfs_end_transaction(hfsmp);
3506	if (result == 0)
3507		hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3508	printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
3509
3510done:
3511	return result;
3512}
3513
3514
3515extern time_t snapshot_timestamp;
3516
3517int
3518check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
3519{
3520	int snapshot_error = 0;
3521
3522	if (vp == NULL) {
3523		return 0;
3524	}
3525
3526	/* Swap files are special; skip them */
3527	if (vnode_isswap(vp)) {
3528		return 0;
3529	}
3530
3531	if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
3532		// the change time is within this epoch
3533		int error;
3534
3535		error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
3536		if (error == EDEADLK) {
3537			snapshot_error = 0;
3538		} else if (error) {
3539			if (error == EAGAIN) {
3540				printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
3541			} else if (error == EINTR) {
3542				// printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
3543				snapshot_error = EINTR;
3544			}
3545		}
3546	}
3547
3548	if (snapshot_error) return snapshot_error;
3549
3550	return 0;
3551}
3552
3553int
3554check_for_dataless_file(struct vnode *vp, uint64_t op_type)
3555{
3556	int error;
3557
3558	if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
3559		// there's nothing to do, it's not dataless
3560		return 0;
3561	}
3562
3563	/* Swap files are special; ignore them */
3564	if (vnode_isswap(vp)) {
3565		return 0;
3566	}
3567
3568	// printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
3569	error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
3570	if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
3571		error = 0;
3572	} else if (error) {
3573		if (error == EAGAIN) {
3574			printf("hfs: dataless: timed out waiting for namespace handler...\n");
3575			// XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
3576			return 0;
3577		} else if (error == EINTR) {
3578			// printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
3579			return EINTR;
3580		}
3581	} else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
3582		//
3583		// if we're here, the dataless bit is still set on the file
3584		// which means it didn't get handled.  we return an error
3585		// but it's presently ignored by all callers of this function.
3586		//
3587		// XXXdbg - EDATANOTPRESENT is what we really need...
3588		//
3589		return EBADF;
3590	}
3591
3592	return error;
3593}
3594
3595
3596//
3597// NOTE: this function takes care of starting a transaction and
3598//       acquiring the systemfile lock so that it can call
3599//       cat_update().
3600//
3601// NOTE: do NOT hold and cnode locks while calling this function
3602//       to avoid deadlocks (because we take a lock on the root
3603//       cnode)
3604//
3605int
3606hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
3607{
3608	struct vnode *rvp;
3609	struct cnode *cp;
3610	int error;
3611
3612	error = VFS_ROOT(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
3613	if (error) {
3614		return error;
3615	}
3616
3617	cp = VTOC(rvp);
3618	if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
3619		return error;
3620	}
3621	struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
3622
3623	int lockflags;
3624	if (hfs_start_transaction(hfsmp) != 0) {
3625		return error;
3626	}
3627	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3628
3629	if (extinfo->document_id == 0) {
3630		// initialize this to start at 3 (one greater than the root-dir id)
3631		extinfo->document_id = 3;
3632	}
3633
3634	*docid = extinfo->document_id++;
3635
3636	// mark the root cnode dirty
3637	cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
3638	(void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
3639
3640	hfs_systemfile_unlock (hfsmp, lockflags);
3641	(void) hfs_end_transaction(hfsmp);
3642
3643	(void) hfs_unlock(cp);
3644
3645	vnode_put(rvp);
3646	rvp = NULL;
3647
3648	return 0;
3649}
3650
3651
3652/*
3653 * Return information about number of file system allocation blocks
3654 * taken by metadata on a volume.
3655 *
3656 * This function populates struct hfsinfo_metadata with allocation blocks
3657 * used by extents overflow btree, catalog btree, bitmap, attribute btree,
3658 * journal file, and sum of all of the above.
3659 */
3660int
3661hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo)
3662{
3663	int lockflags = 0;
3664	int ret_lockflags = 0;
3665
3666	/* Zero out the output buffer */
3667	bzero(hinfo, sizeof(struct hfsinfo_metadata));
3668
3669	/*
3670	 * Getting number of allocation blocks for all btrees
3671	 * should be a quick operation, so we grab locks for
3672	 * all of them at the same time
3673	 */
3674	lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
3675	ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3676	/*
3677	 * Make sure that we were able to acquire all locks requested
3678	 * to protect us against conditions like unmount in progress.
3679	 */
3680	if ((lockflags & ret_lockflags) != lockflags) {
3681		/* Release any locks that were acquired */
3682		hfs_systemfile_unlock(hfsmp, ret_lockflags);
3683		return EPERM;
3684	}
3685
3686	/* Get information about all the btrees */
3687	hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks;
3688	hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks;
3689	hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks;
3690	hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks;
3691
3692	/* Done with btrees, give up the locks */
3693	hfs_systemfile_unlock(hfsmp, ret_lockflags);
3694
3695	/* Get information about journal file */
3696	hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize);
3697
3698	/* Calculate total number of metadata blocks */
3699	hinfo->total = hinfo->extents + hinfo->catalog +
3700			hinfo->allocation + hinfo->attribute +
3701			hinfo->journal;
3702
3703	return 0;
3704}
3705
3706static int
3707hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
3708{
3709	vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8");
3710
3711	return 0;
3712}
3713
3714__private_extern__
3715int hfs_freeze(struct hfsmount *hfsmp)
3716{
3717	// First make sure some other process isn't freezing
3718	hfs_lock_mount(hfsmp);
3719	while (hfsmp->hfs_freeze_state != HFS_THAWED) {
3720		if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3721				   PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) {
3722			hfs_unlock_mount(hfsmp);
3723			return EINTR;
3724		}
3725	}
3726
3727	// Stop new syncers from starting
3728	hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE;
3729
3730	// Now wait for all syncers to finish
3731	while (hfsmp->hfs_syncers) {
3732		if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3733			   PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) {
3734			hfs_thaw_locked(hfsmp);
3735			hfs_unlock_mount(hfsmp);
3736			return EINTR;
3737		}
3738	}
3739	hfs_unlock_mount(hfsmp);
3740
3741	// flush things before we get started to try and prevent
3742	// dirty data from being paged out while we're frozen.
3743	// note: we can't do this once we're in the freezing state because
3744	// other threads will need to take the global lock
3745	vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL);
3746
3747	// Block everything in hfs_lock_global now
3748	hfs_lock_mount(hfsmp);
3749	hfsmp->hfs_freeze_state = HFS_FREEZING;
3750	hfsmp->hfs_freezing_thread = current_thread();
3751	hfs_unlock_mount(hfsmp);
3752
3753	/* Take the exclusive lock to flush out anything else that
3754	   might have the global lock at the moment and also so we
3755	   can flush the journal. */
3756	hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
3757	journal_flush(hfsmp->jnl, TRUE);
3758	hfs_unlock_global(hfsmp);
3759
3760	// don't need to iterate on all vnodes, we just need to
3761	// wait for writes to the system files and the device vnode
3762	//
3763	// Now that journal flush waits for all metadata blocks to
3764	// be written out, waiting for btree writes is probably no
3765	// longer required.
3766	if (HFSTOVCB(hfsmp)->extentsRefNum)
3767		vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3");
3768	if (HFSTOVCB(hfsmp)->catalogRefNum)
3769		vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4");
3770	if (HFSTOVCB(hfsmp)->allocationsRefNum)
3771		vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5");
3772	if (hfsmp->hfs_attribute_vp)
3773		vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6");
3774	vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7");
3775
3776	// We're done, mark frozen
3777	hfs_lock_mount(hfsmp);
3778	hfsmp->hfs_freeze_state  = HFS_FROZEN;
3779	hfsmp->hfs_freezing_proc = current_proc();
3780	hfs_unlock_mount(hfsmp);
3781
3782	return 0;
3783}
3784
3785__private_extern__
3786int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process)
3787{
3788	hfs_lock_mount(hfsmp);
3789
3790	if (hfsmp->hfs_freeze_state != HFS_FROZEN) {
3791		hfs_unlock_mount(hfsmp);
3792		return EINVAL;
3793	}
3794	if (process && hfsmp->hfs_freezing_proc != process) {
3795		hfs_unlock_mount(hfsmp);
3796		return EPERM;
3797	}
3798
3799	hfs_thaw_locked(hfsmp);
3800
3801	hfs_unlock_mount(hfsmp);
3802
3803	return 0;
3804}
3805
3806static void hfs_thaw_locked(struct hfsmount *hfsmp)
3807{
3808	hfsmp->hfs_freezing_proc = NULL;
3809	hfsmp->hfs_freeze_state = HFS_THAWED;
3810
3811	wakeup(&hfsmp->hfs_freeze_state);
3812}
3813