1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*	@(#)hfs_vfsutils.c	4.0
29*
30*	(c) 1997-2002 Apple Computer, Inc.  All Rights Reserved
31*
32*	hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
33*
34*/
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/malloc.h>
39#include <sys/stat.h>
40#include <sys/mount.h>
41#include <sys/mount_internal.h>
42#include <sys/buf.h>
43#include <sys/buf_internal.h>
44#include <sys/ubc.h>
45#include <sys/unistd.h>
46#include <sys/utfconv.h>
47#include <sys/kauth.h>
48#include <sys/fcntl.h>
49#include <sys/fsctl.h>
50#include <sys/vnode_internal.h>
51#include <kern/clock.h>
52
53#include <libkern/OSAtomic.h>
54
55/* for parsing boot-args */
56#include <pexpert/pexpert.h>
57
58#if CONFIG_PROTECT
59#include <sys/cprotect.h>
60#endif
61
62#include "hfs.h"
63#include "hfs_catalog.h"
64#include "hfs_dbg.h"
65#include "hfs_mount.h"
66#include "hfs_endian.h"
67#include "hfs_cnode.h"
68#include "hfs_fsctl.h"
69
70#include "hfscommon/headers/FileMgrInternal.h"
71#include "hfscommon/headers/BTreesInternal.h"
72#include "hfscommon/headers/HFSUnicodeWrappers.h"
73
74static void ReleaseMetaFileVNode(struct vnode *vp);
75static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
76
77static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
78
79#define HFS_MOUNT_DEBUG 1
80
81
82//*******************************************************************************
83// Note: Finder information in the HFS/HFS+ metadata are considered opaque and
84//       hence are not in the right byte order on little endian machines. It is
85//       the responsibility of the finder and other clients to swap the data.
86//*******************************************************************************
87
88//*******************************************************************************
89//	Routine:	hfs_MountHFSVolume
90//
91//
92//*******************************************************************************
93unsigned char hfs_catname[] = "Catalog B-tree";
94unsigned char hfs_extname[] = "Extents B-tree";
95unsigned char hfs_vbmname[] = "Volume Bitmap";
96unsigned char hfs_attrname[] = "Attribute B-tree";
97unsigned char hfs_startupname[] = "Startup File";
98
99#if CONFIG_HFS_STD
100OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
101		__unused struct proc *p)
102{
103	ExtendedVCB *vcb = HFSTOVCB(hfsmp);
104	int error;
105	ByteCount utf8chars;
106	struct cat_desc cndesc;
107	struct cat_attr cnattr;
108	struct cat_fork fork;
109	int newvnode_flags = 0;
110
111	/* Block size must be a multiple of 512 */
112	if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
113	    (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
114		return (EINVAL);
115
116	/* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
117	if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
118	    ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
119		return (EINVAL);
120	}
121	hfsmp->hfs_flags |= HFS_STANDARD;
122	/*
123	 * The MDB seems OK: transfer info from it into VCB
124	 * Note - the VCB starts out clear (all zeros)
125	 *
126	 */
127	vcb->vcbSigWord		= SWAP_BE16 (mdb->drSigWord);
128	vcb->hfs_itime		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
129	vcb->localCreateDate	= SWAP_BE32 (mdb->drCrDate);
130	vcb->vcbLsMod		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
131	vcb->vcbAtrb		= SWAP_BE16 (mdb->drAtrb);
132	vcb->vcbNmFls		= SWAP_BE16 (mdb->drNmFls);
133	vcb->vcbVBMSt		= SWAP_BE16 (mdb->drVBMSt);
134	vcb->nextAllocation	= SWAP_BE16 (mdb->drAllocPtr);
135	vcb->totalBlocks	= SWAP_BE16 (mdb->drNmAlBlks);
136	vcb->allocLimit		= vcb->totalBlocks;
137	vcb->blockSize		= SWAP_BE32 (mdb->drAlBlkSiz);
138	vcb->vcbClpSiz		= SWAP_BE32 (mdb->drClpSiz);
139	vcb->vcbAlBlSt		= SWAP_BE16 (mdb->drAlBlSt);
140	vcb->vcbNxtCNID		= SWAP_BE32 (mdb->drNxtCNID);
141	vcb->freeBlocks		= SWAP_BE16 (mdb->drFreeBks);
142	vcb->vcbVolBkUp		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
143	vcb->vcbWrCnt		= SWAP_BE32 (mdb->drWrCnt);
144	vcb->vcbNmRtDirs	= SWAP_BE16 (mdb->drNmRtDirs);
145	vcb->vcbFilCnt		= SWAP_BE32 (mdb->drFilCnt);
146	vcb->vcbDirCnt		= SWAP_BE32 (mdb->drDirCnt);
147	bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
148	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
149		vcb->vcbWrCnt++;	/* Compensate for write of MDB on last flush */
150
151	/* convert hfs encoded name into UTF-8 string */
152	error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
153	/*
154	 * When an HFS name cannot be encoded with the current
155	 * volume encoding we use MacRoman as a fallback.
156	 */
157	if (error || (utf8chars == 0)) {
158		error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
159		/* If we fail to encode to UTF8 from Mac Roman, the name is bad.  Deny the mount */
160		if (error) {
161			goto MtVolErr;
162		}
163	}
164
165	hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
166	vcb->vcbVBMIOSize = kHFSBlockSize;
167
168	hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
169	                                          hfsmp->hfs_logical_block_count);
170
171	bzero(&cndesc, sizeof(cndesc));
172	cndesc.cd_parentcnid = kHFSRootParentID;
173	cndesc.cd_flags |= CD_ISMETA;
174	bzero(&cnattr, sizeof(cnattr));
175	cnattr.ca_linkcount = 1;
176	cnattr.ca_mode = S_IFREG;
177	bzero(&fork, sizeof(fork));
178
179	/*
180	 * Set up Extents B-tree vnode
181	 */
182	cndesc.cd_nameptr = hfs_extname;
183	cndesc.cd_namelen = strlen((char *)hfs_extname);
184	cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
185	fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
186	fork.cf_blocks = fork.cf_size / vcb->blockSize;
187	fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
188	fork.cf_vblocks = 0;
189	fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
190	fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
191	fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
192	fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
193	fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
194	fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
195	cnattr.ca_blocks = fork.cf_blocks;
196
197	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
198	                        &hfsmp->hfs_extents_vp, &newvnode_flags);
199	if (error) {
200		if (HFS_MOUNT_DEBUG) {
201			printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
202		}
203		goto MtVolErr;
204	}
205	error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
206	                                 (KeyCompareProcPtr)CompareExtentKeys));
207	if (error) {
208		if (HFS_MOUNT_DEBUG) {
209			printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
210		}
211		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
212		goto MtVolErr;
213	}
214	hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
215
216	/*
217	 * Set up Catalog B-tree vnode...
218	 */
219	cndesc.cd_nameptr = hfs_catname;
220	cndesc.cd_namelen = strlen((char *)hfs_catname);
221	cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
222	fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
223	fork.cf_blocks = fork.cf_size / vcb->blockSize;
224	fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
225	fork.cf_vblocks = 0;
226	fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
227	fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
228	fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
229	fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
230	fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
231	fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
232	cnattr.ca_blocks = fork.cf_blocks;
233
234	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
235	                        &hfsmp->hfs_catalog_vp, &newvnode_flags);
236	if (error) {
237		if (HFS_MOUNT_DEBUG) {
238			printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
239		}
240		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
241		goto MtVolErr;
242	}
243	error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
244	                                 (KeyCompareProcPtr)CompareCatalogKeys));
245	if (error) {
246		if (HFS_MOUNT_DEBUG) {
247			printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
248		}
249		hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
250		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
251		goto MtVolErr;
252	}
253	hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
254
255	/*
256	 * Set up dummy Allocation file vnode (used only for locking bitmap)
257	 */
258	cndesc.cd_nameptr = hfs_vbmname;
259	cndesc.cd_namelen = strlen((char *)hfs_vbmname);
260	cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
261	bzero(&fork, sizeof(fork));
262	cnattr.ca_blocks = 0;
263
264	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
265	                         &hfsmp->hfs_allocation_vp, &newvnode_flags);
266	if (error) {
267		if (HFS_MOUNT_DEBUG) {
268			printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
269		}
270		hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
271		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
272		goto MtVolErr;
273	}
274	hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
275
276      	/* mark the volume dirty (clear clean unmount bit) */
277	vcb->vcbAtrb &=	~kHFSVolumeUnmountedMask;
278
279    if (error == noErr) {
280		error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
281		if (HFS_MOUNT_DEBUG) {
282			printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
283		}
284	}
285
286    if (error == noErr) {
287		/* If the disk isn't write protected.. */
288        if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
289            MarkVCBDirty (vcb); //	mark VCB dirty so it will be written
290		}
291	}
292
293	/*
294	 * all done with system files so we can unlock now...
295	 */
296	hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
297	hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
298	hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
299
300	if (error == noErr) {
301		/* If successful, then we can just return once we've unlocked the cnodes */
302		return error;
303	}
304
305    //--	Release any resources allocated so far before exiting with an error:
306MtVolErr:
307	hfsUnmount(hfsmp, NULL);
308
309    return (error);
310}
311
312#endif
313
314//*******************************************************************************
315//	Routine:	hfs_MountHFSPlusVolume
316//
317//
318//*******************************************************************************
319
320OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
321	off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
322{
323	register ExtendedVCB *vcb;
324	struct cat_desc cndesc;
325	struct cat_attr cnattr;
326	struct cat_fork cfork;
327	u_int32_t blockSize;
328	daddr64_t spare_sectors;
329	struct BTreeInfoRec btinfo;
330	u_int16_t  signature;
331	u_int16_t  hfs_version;
332	int newvnode_flags = 0;
333	int  i;
334	OSErr retval;
335	char converted_volname[256];
336	size_t volname_length = 0;
337	size_t conv_volname_length = 0;
338
339	signature = SWAP_BE16(vhp->signature);
340	hfs_version = SWAP_BE16(vhp->version);
341
342	if (signature == kHFSPlusSigWord) {
343		if (hfs_version != kHFSPlusVersion) {
344			printf("hfs_mount: invalid HFS+ version: %x\n", hfs_version);
345			return (EINVAL);
346		}
347	} else if (signature == kHFSXSigWord) {
348		if (hfs_version != kHFSXVersion) {
349			printf("hfs_mount: invalid HFSX version: %x\n", hfs_version);
350			return (EINVAL);
351		}
352		/* The in-memory signature is always 'H+'. */
353		signature = kHFSPlusSigWord;
354		hfsmp->hfs_flags |= HFS_X;
355	} else {
356		/* Removed printf for invalid HFS+ signature because it gives
357		 * false error for UFS root volume
358		 */
359		if (HFS_MOUNT_DEBUG) {
360			printf("hfs_mounthfsplus: unknown Volume Signature : %x\n", signature);
361		}
362		return (EINVAL);
363	}
364
365	/* Block size must be at least 512 and a power of 2 */
366	blockSize = SWAP_BE32(vhp->blockSize);
367	if (blockSize < 512 || !powerof2(blockSize)) {
368		if (HFS_MOUNT_DEBUG) {
369			printf("hfs_mounthfsplus: invalid blocksize (%d) \n", blockSize);
370		}
371		return (EINVAL);
372	}
373
374	/* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
375	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
376	    (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
377		if (HFS_MOUNT_DEBUG) {
378			printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
379		}
380		return (EINVAL);
381	}
382
383	/* Make sure we can live with the physical block size. */
384	if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
385	    (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
386	    (blockSize < hfsmp->hfs_logical_block_size)) {
387		if (HFS_MOUNT_DEBUG) {
388			printf("hfs_mounthfsplus: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
389					blockSize, hfsmp->hfs_logical_block_size);
390		}
391		return (ENXIO);
392	}
393
394	/* If allocation block size is less than the physical
395	 * block size, we assume that the physical block size
396	 * is same as logical block size.  The physical block
397	 * size value is used to round down the offsets for
398	 * reading and writing the primary and alternate volume
399	 * headers at physical block boundary and will cause
400	 * problems if it is less than the block size.
401	 */
402	if (blockSize < hfsmp->hfs_physical_block_size) {
403		hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
404		hfsmp->hfs_log_per_phys = 1;
405	}
406
407	/*
408	 * The VolumeHeader seems OK: transfer info from it into VCB
409	 * Note - the VCB starts out clear (all zeros)
410	 */
411	vcb = HFSTOVCB(hfsmp);
412
413	vcb->vcbSigWord	= signature;
414	vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
415	vcb->vcbLsMod	= to_bsd_time(SWAP_BE32(vhp->modifyDate));
416	vcb->vcbAtrb	= SWAP_BE32(vhp->attributes);
417	vcb->vcbClpSiz	= SWAP_BE32(vhp->rsrcClumpSize);
418	vcb->vcbNxtCNID	= SWAP_BE32(vhp->nextCatalogID);
419	vcb->vcbVolBkUp	= to_bsd_time(SWAP_BE32(vhp->backupDate));
420	vcb->vcbWrCnt	= SWAP_BE32(vhp->writeCount);
421	vcb->vcbFilCnt	= SWAP_BE32(vhp->fileCount);
422	vcb->vcbDirCnt	= SWAP_BE32(vhp->folderCount);
423
424	/* copy 32 bytes of Finder info */
425	bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
426
427	vcb->vcbAlBlSt = 0;		/* hfs+ allocation blocks start at first block of volume */
428	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
429		vcb->vcbWrCnt++;	/* compensate for write of Volume Header on last flush */
430
431	/* Now fill in the Extended VCB info */
432	vcb->nextAllocation	= SWAP_BE32(vhp->nextAllocation);
433	vcb->totalBlocks	= SWAP_BE32(vhp->totalBlocks);
434	vcb->allocLimit		= vcb->totalBlocks;
435	vcb->freeBlocks		= SWAP_BE32(vhp->freeBlocks);
436	vcb->blockSize		= blockSize;
437	vcb->encodingsBitmap	= SWAP_BE64(vhp->encodingsBitmap);
438	vcb->localCreateDate	= SWAP_BE32(vhp->createDate);
439
440	vcb->hfsPlusIOPosOffset	= embeddedOffset;
441
442	/* Default to no free block reserve */
443	vcb->reserveBlocks = 0;
444
445	/*
446	 * Update the logical block size in the mount struct
447	 * (currently set up from the wrapper MDB) using the
448	 * new blocksize value:
449	 */
450	hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
451	vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
452
453	/*
454	 * Validate and initialize the location of the alternate volume header.
455	 */
456	spare_sectors = hfsmp->hfs_logical_block_count -
457	                (((daddr64_t)vcb->totalBlocks * blockSize) /
458	                   hfsmp->hfs_logical_block_size);
459
460	if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
461		hfsmp->hfs_alt_id_sector = 0;  /* partition has grown! */
462	} else {
463		hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
464					   HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
465							  hfsmp->hfs_logical_block_count);
466	}
467
468	bzero(&cndesc, sizeof(cndesc));
469	cndesc.cd_parentcnid = kHFSRootParentID;
470	cndesc.cd_flags |= CD_ISMETA;
471	bzero(&cnattr, sizeof(cnattr));
472	cnattr.ca_linkcount = 1;
473	cnattr.ca_mode = S_IFREG;
474
475	/*
476	 * Set up Extents B-tree vnode
477	 */
478	cndesc.cd_nameptr = hfs_extname;
479	cndesc.cd_namelen = strlen((char *)hfs_extname);
480	cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
481
482	cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
483	cfork.cf_new_size= 0;
484	cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
485	cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
486	cfork.cf_vblocks = 0;
487	cnattr.ca_blocks = cfork.cf_blocks;
488	for (i = 0; i < kHFSPlusExtentDensity; i++) {
489		cfork.cf_extents[i].startBlock =
490				SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
491		cfork.cf_extents[i].blockCount =
492				SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
493	}
494	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
495	                         &hfsmp->hfs_extents_vp, &newvnode_flags);
496	if (retval)
497	{
498		if (HFS_MOUNT_DEBUG) {
499			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
500		}
501		goto ErrorExit;
502	}
503	hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
504	hfs_unlock(hfsmp->hfs_extents_cp);
505
506	retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
507	                                  (KeyCompareProcPtr) CompareExtentKeysPlus));
508	if (retval)
509	{
510		if (HFS_MOUNT_DEBUG) {
511			printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
512		}
513		goto ErrorExit;
514	}
515	/*
516	 * Set up Catalog B-tree vnode
517	 */
518	cndesc.cd_nameptr = hfs_catname;
519	cndesc.cd_namelen = strlen((char *)hfs_catname);
520	cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
521
522	cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
523	cfork.cf_clump   = SWAP_BE32 (vhp->catalogFile.clumpSize);
524	cfork.cf_blocks  = SWAP_BE32 (vhp->catalogFile.totalBlocks);
525	cfork.cf_vblocks = 0;
526	cnattr.ca_blocks = cfork.cf_blocks;
527	for (i = 0; i < kHFSPlusExtentDensity; i++) {
528		cfork.cf_extents[i].startBlock =
529				SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
530		cfork.cf_extents[i].blockCount =
531				SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
532	}
533	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
534	                         &hfsmp->hfs_catalog_vp, &newvnode_flags);
535	if (retval) {
536		if (HFS_MOUNT_DEBUG) {
537			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
538		}
539		goto ErrorExit;
540	}
541	hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
542	hfs_unlock(hfsmp->hfs_catalog_cp);
543
544	retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
545	                                  (KeyCompareProcPtr) CompareExtendedCatalogKeys));
546	if (retval) {
547		if (HFS_MOUNT_DEBUG) {
548			printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
549		}
550		goto ErrorExit;
551	}
552	if ((hfsmp->hfs_flags & HFS_X) &&
553	    BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
554		if (btinfo.keyCompareType == kHFSBinaryCompare) {
555			hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
556			/* Install a case-sensitive key compare */
557			(void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
558			                  (KeyCompareProcPtr)cat_binarykeycompare);
559		}
560	}
561
562	/*
563	 * Set up Allocation file vnode
564	 */
565	cndesc.cd_nameptr = hfs_vbmname;
566	cndesc.cd_namelen = strlen((char *)hfs_vbmname);
567	cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
568
569	cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
570	cfork.cf_clump   = SWAP_BE32 (vhp->allocationFile.clumpSize);
571	cfork.cf_blocks  = SWAP_BE32 (vhp->allocationFile.totalBlocks);
572	cfork.cf_vblocks = 0;
573	cnattr.ca_blocks = cfork.cf_blocks;
574	for (i = 0; i < kHFSPlusExtentDensity; i++) {
575		cfork.cf_extents[i].startBlock =
576				SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
577		cfork.cf_extents[i].blockCount =
578				SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
579	}
580	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
581	                         &hfsmp->hfs_allocation_vp, &newvnode_flags);
582	if (retval) {
583		if (HFS_MOUNT_DEBUG) {
584			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
585		}
586		goto ErrorExit;
587	}
588	hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
589	hfs_unlock(hfsmp->hfs_allocation_cp);
590
591	/*
592	 * Set up Attribute B-tree vnode
593	 */
594	if (vhp->attributesFile.totalBlocks != 0) {
595		cndesc.cd_nameptr = hfs_attrname;
596		cndesc.cd_namelen = strlen((char *)hfs_attrname);
597		cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
598
599		cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
600		cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
601		cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
602		cfork.cf_vblocks = 0;
603		cnattr.ca_blocks = cfork.cf_blocks;
604		for (i = 0; i < kHFSPlusExtentDensity; i++) {
605			cfork.cf_extents[i].startBlock =
606					SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
607			cfork.cf_extents[i].blockCount =
608					SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
609		}
610		retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
611					 &hfsmp->hfs_attribute_vp, &newvnode_flags);
612		if (retval) {
613			if (HFS_MOUNT_DEBUG) {
614				printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
615			}
616			goto ErrorExit;
617		}
618		hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
619		hfs_unlock(hfsmp->hfs_attribute_cp);
620		retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
621						  (KeyCompareProcPtr) hfs_attrkeycompare));
622		if (retval) {
623			if (HFS_MOUNT_DEBUG) {
624				printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
625			}
626			goto ErrorExit;
627		}
628
629		/* Initialize vnode for virtual attribute data file that spans the
630		 * entire file system space for performing I/O to attribute btree
631		 * We hold iocount on the attrdata vnode for the entire duration
632		 * of mount (similar to btree vnodes)
633		 */
634		retval = init_attrdata_vnode(hfsmp);
635		if (retval) {
636			if (HFS_MOUNT_DEBUG) {
637				printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
638			}
639			goto ErrorExit;
640		}
641	}
642
643	/*
644	 * Set up Startup file vnode
645	 */
646	if (vhp->startupFile.totalBlocks != 0) {
647		cndesc.cd_nameptr = hfs_startupname;
648		cndesc.cd_namelen = strlen((char *)hfs_startupname);
649		cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
650
651		cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
652		cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
653		cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
654		cfork.cf_vblocks = 0;
655		cnattr.ca_blocks = cfork.cf_blocks;
656		for (i = 0; i < kHFSPlusExtentDensity; i++) {
657			cfork.cf_extents[i].startBlock =
658					SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
659			cfork.cf_extents[i].blockCount =
660					SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
661		}
662		retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
663					 &hfsmp->hfs_startup_vp, &newvnode_flags);
664		if (retval) {
665			if (HFS_MOUNT_DEBUG) {
666				printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
667			}
668			goto ErrorExit;
669		}
670		hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
671		hfs_unlock(hfsmp->hfs_startup_cp);
672	}
673
674	/*
675	 * Pick up volume name and create date
676	 *
677	 * Acquiring the volume name should not manipulate the bitmap, only the catalog
678	 * btree and possibly the extents overflow b-tree.
679	 */
680	retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
681	if (retval) {
682		if (HFS_MOUNT_DEBUG) {
683			printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
684		}
685		goto ErrorExit;
686	}
687	vcb->hfs_itime = cnattr.ca_itime;
688	vcb->volumeNameEncodingHint = cndesc.cd_encoding;
689	bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
690	volname_length = strlen ((const char*)vcb->vcbVN);
691	cat_releasedesc(&cndesc);
692
693#define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
694
695
696	/* Send the volume name down to CoreStorage if necessary */
697	retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
698	if (retval == 0) {
699		(void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
700	}
701
702	/* reset retval == 0. we don't care about errors in volname conversion */
703	retval = 0;
704
705
706	/*
707	 * We now always initiate a full bitmap scan even if the volume is read-only because this is
708	 * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily
709	 * expects. TRIMs will not be delivered to the underlying media if the volume is not
710	 * read-write though.
711	 */
712	thread_t allocator_scanner;
713	hfsmp->scan_var = 0;
714
715	/* Take the HFS mount mutex and wait on scan_var */
716	hfs_lock_mount (hfsmp);
717
718	kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner);
719	/* Wait until it registers that it's got the appropriate locks */
720	while ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) == 0) {
721		(void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, (PDROP | PINOD), "hfs_scan_blocks", 0);
722		if (hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) {
723			break;
724		}
725		else {
726			hfs_lock_mount (hfsmp);
727		}
728	}
729
730	thread_deallocate (allocator_scanner);
731
732	/* mark the volume dirty (clear clean unmount bit) */
733	vcb->vcbAtrb &=	~kHFSVolumeUnmountedMask;
734	if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
735		hfs_flushvolumeheader(hfsmp, TRUE, 0);
736	}
737
738	/* kHFSHasFolderCount is only supported/updated on HFSX volumes */
739	if ((hfsmp->hfs_flags & HFS_X) != 0) {
740		hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
741	}
742
743	//
744	// Check if we need to do late journal initialization.  This only
745	// happens if a previous version of MacOS X (or 9) touched the disk.
746	// In that case hfs_late_journal_init() will go re-locate the journal
747	// and journal_info_block files and validate that they're still kosher.
748	//
749	if (   (vcb->vcbAtrb & kHFSVolumeJournaledMask)
750		&& (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
751		&& (hfsmp->jnl == NULL)) {
752
753		retval = hfs_late_journal_init(hfsmp, vhp, args);
754		if (retval != 0) {
755			if (retval == EROFS) {
756				// EROFS is a special error code that means the volume has an external
757				// journal which we couldn't find.  in that case we do not want to
758				// rewrite the volume header - we'll just refuse to mount the volume.
759				if (HFS_MOUNT_DEBUG) {
760					printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
761				}
762				retval = EINVAL;
763				goto ErrorExit;
764			}
765
766			hfsmp->jnl = NULL;
767
768			// if the journal failed to open, then set the lastMountedVersion
769			// to be "FSK!" which fsck_hfs will see and force the fsck instead
770			// of just bailing out because the volume is journaled.
771			if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
772				HFSPlusVolumeHeader *jvhp;
773				daddr64_t mdb_offset;
774				struct buf *bp = NULL;
775
776				hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
777
778				mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
779
780				bp = NULL;
781				retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
782						HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
783						hfsmp->hfs_physical_block_size, cred, &bp);
784				if (retval == 0) {
785					jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
786
787					if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
788						printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
789						jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
790					   	buf_bwrite(bp);
791					} else {
792						buf_brelse(bp);
793					}
794					bp = NULL;
795				} else if (bp) {
796					buf_brelse(bp);
797					// clear this so the error exit path won't try to use it
798					bp = NULL;
799			    }
800			}
801
802			if (HFS_MOUNT_DEBUG) {
803				printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
804			}
805			retval = EINVAL;
806			goto ErrorExit;
807		} else if (hfsmp->jnl) {
808			vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
809		}
810	} else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
811		struct cat_attr jinfo_attr, jnl_attr;
812
813		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
814		    vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
815		}
816
817		// if we're here we need to fill in the fileid's for the
818		// journal and journal_info_block.
819		hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
820		hfsmp->hfs_jnlfileid    = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
821		if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
822			printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
823			printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
824		}
825
826		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
827		    vcb->vcbAtrb |= kHFSVolumeJournaledMask;
828		}
829
830		if (hfsmp->jnl == NULL) {
831		    vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
832		}
833	}
834
835	if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )	// if the disk is not write protected
836	{
837		MarkVCBDirty( vcb );	// mark VCB dirty so it will be written
838	}
839
840	/*
841	 * Distinguish 3 potential cases involving content protection:
842	 * 1. mount point bit set; vcbAtrb does not support it. Fail.
843	 * 2. mount point bit set; vcbattrb supports it. we're good.
844	 * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good.
845	 */
846	if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
847		/* Does the mount point support it ? */
848		if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) {
849			/* Case 1 above */
850			retval = EINVAL;
851			goto ErrorExit;
852		}
853	}
854	else {
855		/* not requested in the mount point. Is it in FS? */
856		if (vcb->vcbAtrb & kHFSContentProtectionMask) {
857			/* Case 3 above */
858			vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
859		}
860	}
861
862	/* At this point, if the mount point flag is set, we can enable it. */
863	if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
864		/* Cases 2+3 above */
865#if CONFIG_PROTECT
866		/* Get the EAs as needed. */
867		int cperr = 0;
868		uint16_t majorversion;
869		uint16_t minorversion;
870
871		struct cp_root_xattr *xattr = NULL;
872		MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK);
873		if (xattr == NULL) {
874			retval = ENOMEM;
875			goto ErrorExit;
876		}
877		bzero (xattr, sizeof(struct cp_root_xattr));
878
879		/* go get the EA to get the version information */
880		cperr = cp_getrootxattr (hfsmp, xattr);
881		/*
882		 * If there was no EA there, then write one out.
883		 * Assuming EA is not present on the root means
884		 * this is an erase install or a very old FS
885		 */
886
887		if (cperr == 0) {
888			/* Have to run a valid CP version. */
889			if ((xattr->major_version < CP_PREV_MAJOR_VERS) || (xattr->major_version > CP_NEW_MAJOR_VERS)) {
890				cperr = EINVAL;
891			}
892		}
893		else if (cperr == ENOATTR) {
894			printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS);
895			bzero(xattr, sizeof(struct cp_root_xattr));
896			xattr->major_version = CP_NEW_MAJOR_VERS;
897			xattr->minor_version = CP_MINOR_VERS;
898			xattr->flags = 0;
899			cperr = cp_setrootxattr (hfsmp, xattr);
900		}
901		majorversion = xattr->major_version;
902		minorversion = xattr->minor_version;
903		if (xattr) {
904			FREE(xattr, M_TEMP);
905		}
906
907		/* Recheck for good status */
908		if (cperr == 0) {
909			/* If we got here, then the CP version is valid. Set it in the mount point */
910			hfsmp->hfs_running_cp_major_vers = majorversion;
911			printf("Running with CP root xattr: %d.%d\n", majorversion, minorversion);
912
913			/*
914			 * Acquire the boot-arg for the AKS default key.
915			 * Ensure that the boot-arg's value is valid for FILES (not directories),
916			 * since only files are actually protected for now.
917			 */
918			PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
919			if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
920				hfsmp->default_cp_class = PROTECTION_CLASS_D;
921			}
922		}
923		else {
924			retval = EPERM;
925			goto ErrorExit;
926		}
927#else
928		/* If CONFIG_PROTECT not built, ignore CP */
929		vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT);
930#endif
931	}
932
933	/*
934	 * Establish a metadata allocation zone.
935	 */
936	hfs_metadatazone_init(hfsmp, false);
937
938	/*
939	 * Make any metadata zone adjustments.
940	 */
941	if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
942		/* Keep the roving allocator out of the metadata zone. */
943		if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
944		    vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
945			HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
946		}
947	} else {
948		if (vcb->nextAllocation <= 1) {
949			vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
950		}
951	}
952	vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
953
954	/* Setup private/hidden directories for hardlinks. */
955	hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
956	hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
957
958	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
959		hfs_remove_orphans(hfsmp);
960
961	/* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
962	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
963	{
964		retval = hfs_erase_unused_nodes(hfsmp);
965		if (retval) {
966			if (HFS_MOUNT_DEBUG) {
967				printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
968			}
969
970			goto ErrorExit;
971		}
972	}
973
974	/*
975	 * Allow hot file clustering if conditions allow.
976	 */
977	if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  &&
978	    ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) {
979		(void) hfs_recording_init(hfsmp);
980	}
981
982	/* Force ACLs on HFS+ file systems. */
983	vfs_setextendedsecurity(HFSTOVFS(hfsmp));
984
985	/* Enable extent-based extended attributes by default */
986	hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
987
988	return (0);
989
990ErrorExit:
991	/*
992	 * A fatal error occurred and the volume cannot be mounted, so
993	 * release any resources that we acquired...
994	 */
995	hfsUnmount(hfsmp, NULL);
996
997	if (HFS_MOUNT_DEBUG) {
998		printf("hfs_mounthfsplus: encountered error (%d)\n", retval);
999	}
1000	return (retval);
1001}
1002
1003
1004/*
1005 * ReleaseMetaFileVNode
1006 *
1007 * vp	L - -
1008 */
1009static void ReleaseMetaFileVNode(struct vnode *vp)
1010{
1011	struct filefork *fp;
1012
1013	if (vp && (fp = VTOF(vp))) {
1014		if (fp->fcbBTCBPtr != NULL) {
1015			(void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1016			(void) BTClosePath(fp);
1017			hfs_unlock(VTOC(vp));
1018		}
1019
1020		/* release the node even if BTClosePath fails */
1021		vnode_recycle(vp);
1022		vnode_put(vp);
1023	}
1024}
1025
1026
1027/*************************************************************
1028*
1029* Unmounts a hfs volume.
1030*	At this point vflush() has been called (to dump all non-metadata files)
1031*
1032*************************************************************/
1033
1034int
1035hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
1036{
1037	/* Get rid of our attribute data vnode (if any).  This is done
1038	 * after the vflush() during mount, so we don't need to worry
1039	 * about any locks.
1040	 */
1041	if (hfsmp->hfs_attrdata_vp) {
1042		ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
1043		hfsmp->hfs_attrdata_vp = NULLVP;
1044	}
1045
1046	if (hfsmp->hfs_startup_vp) {
1047		ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
1048		hfsmp->hfs_startup_cp = NULL;
1049		hfsmp->hfs_startup_vp = NULL;
1050	}
1051
1052	if (hfsmp->hfs_attribute_vp) {
1053		ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
1054		hfsmp->hfs_attribute_cp = NULL;
1055		hfsmp->hfs_attribute_vp = NULL;
1056	}
1057
1058	if (hfsmp->hfs_catalog_vp) {
1059		ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
1060		hfsmp->hfs_catalog_cp = NULL;
1061		hfsmp->hfs_catalog_vp = NULL;
1062	}
1063
1064	if (hfsmp->hfs_extents_vp) {
1065		ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
1066		hfsmp->hfs_extents_cp = NULL;
1067		hfsmp->hfs_extents_vp = NULL;
1068	}
1069
1070	if (hfsmp->hfs_allocation_vp) {
1071		ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
1072		hfsmp->hfs_allocation_cp = NULL;
1073		hfsmp->hfs_allocation_vp = NULL;
1074	}
1075
1076	return (0);
1077}
1078
1079
1080/*
1081 * Test if fork has overflow extents.
1082 *
1083 * Returns:
1084 * 	non-zero - overflow extents exist
1085 * 	zero     - overflow extents do not exist
1086 */
1087__private_extern__
1088int
1089overflow_extents(struct filefork *fp)
1090{
1091	u_int32_t blocks;
1092
1093	//
1094	// If the vnode pointer is NULL then we're being called
1095	// from hfs_remove_orphans() with a faked-up filefork
1096	// and therefore it has to be an HFS+ volume.  Otherwise
1097	// we check through the volume header to see what type
1098	// of volume we're on.
1099        //
1100	if (FTOV(fp) == NULL || VTOVCB(FTOV(fp))->vcbSigWord == kHFSPlusSigWord) {
1101		if (fp->ff_extents[7].blockCount == 0)
1102			return (0);
1103
1104		blocks = fp->ff_extents[0].blockCount +
1105		         fp->ff_extents[1].blockCount +
1106		         fp->ff_extents[2].blockCount +
1107		         fp->ff_extents[3].blockCount +
1108		         fp->ff_extents[4].blockCount +
1109		         fp->ff_extents[5].blockCount +
1110		         fp->ff_extents[6].blockCount +
1111		         fp->ff_extents[7].blockCount;
1112	} else {
1113		if (fp->ff_extents[2].blockCount == 0)
1114			return false;
1115
1116		blocks = fp->ff_extents[0].blockCount +
1117		         fp->ff_extents[1].blockCount +
1118		         fp->ff_extents[2].blockCount;
1119	  }
1120
1121	return (fp->ff_blocks > blocks);
1122}
1123
1124/*
1125 * Lock the HFS global journal lock
1126 */
1127int
1128hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype)
1129{
1130	void *thread = current_thread();
1131
1132	if (hfsmp->hfs_global_lockowner == thread) {
1133		panic ("hfs_lock_global: locking against myself!");
1134	}
1135
1136    /* HFS_SHARED_LOCK */
1137	if (locktype == HFS_SHARED_LOCK) {
1138		lck_rw_lock_shared (&hfsmp->hfs_global_lock);
1139		hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
1140	}
1141    /* HFS_EXCLUSIVE_LOCK */
1142	else {
1143		lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
1144		hfsmp->hfs_global_lockowner = thread;
1145	}
1146
1147	return 0;
1148}
1149
1150
1151/*
1152 * Unlock the HFS global journal lock
1153 */
1154void
1155hfs_unlock_global (struct hfsmount *hfsmp)
1156{
1157
1158	void *thread = current_thread();
1159
1160    /* HFS_LOCK_EXCLUSIVE */
1161	if (hfsmp->hfs_global_lockowner == thread) {
1162		hfsmp->hfs_global_lockowner = NULL;
1163		lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
1164	}
1165    /* HFS_LOCK_SHARED */
1166	else {
1167		lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
1168	}
1169}
1170
1171/*
1172 * Lock the HFS mount lock
1173 *
1174 * Note: this is a mutex, not a rw lock!
1175 */
1176inline
1177void hfs_lock_mount (struct hfsmount *hfsmp) {
1178	lck_mtx_lock (&(hfsmp->hfs_mutex));
1179}
1180
1181/*
1182 * Unlock the HFS mount lock
1183 *
1184 * Note: this is a mutex, not a rw lock!
1185 */
1186inline
1187void hfs_unlock_mount (struct hfsmount *hfsmp) {
1188	lck_mtx_unlock (&(hfsmp->hfs_mutex));
1189}
1190
1191/*
1192 * Lock HFS system file(s).
1193 */
1194int
1195hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype)
1196{
1197	/*
1198	 * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
1199	 */
1200	if (flags & SFL_CATALOG) {
1201#ifdef HFS_CHECK_LOCK_ORDER
1202		if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
1203			panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
1204		}
1205		if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1206			panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
1207		}
1208		if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1209			panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
1210		}
1211#endif /* HFS_CHECK_LOCK_ORDER */
1212
1213		if (hfsmp->hfs_catalog_cp) {
1214			(void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT);
1215			/*
1216			 * When the catalog file has overflow extents then
1217			 * also acquire the extents b-tree lock if its not
1218			 * already requested.
1219			 */
1220			if (((flags & SFL_EXTENTS) == 0) &&
1221			    (hfsmp->hfs_catalog_vp != NULL) &&
1222			    (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
1223				flags |= SFL_EXTENTS;
1224			}
1225		} else {
1226			flags &= ~SFL_CATALOG;
1227		}
1228	}
1229
1230	if (flags & SFL_ATTRIBUTE) {
1231#ifdef HFS_CHECK_LOCK_ORDER
1232		if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1233			panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
1234		}
1235		if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1236			panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
1237		}
1238#endif /* HFS_CHECK_LOCK_ORDER */
1239
1240		if (hfsmp->hfs_attribute_cp) {
1241			(void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT);
1242			/*
1243			 * When the attribute file has overflow extents then
1244			 * also acquire the extents b-tree lock if its not
1245			 * already requested.
1246			 */
1247			if (((flags & SFL_EXTENTS) == 0) &&
1248			    (hfsmp->hfs_attribute_vp != NULL) &&
1249			    (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) {
1250				flags |= SFL_EXTENTS;
1251			}
1252		} else {
1253			flags &= ~SFL_ATTRIBUTE;
1254		}
1255	}
1256
1257	if (flags & SFL_STARTUP) {
1258#ifdef HFS_CHECK_LOCK_ORDER
1259		if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1260			panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
1261		}
1262#endif /* HFS_CHECK_LOCK_ORDER */
1263
1264		if (hfsmp->hfs_startup_cp) {
1265			(void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT);
1266			/*
1267			 * When the startup file has overflow extents then
1268			 * also acquire the extents b-tree lock if its not
1269			 * already requested.
1270			 */
1271			if (((flags & SFL_EXTENTS) == 0) &&
1272			    (hfsmp->hfs_startup_vp != NULL) &&
1273			    (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) {
1274				flags |= SFL_EXTENTS;
1275			}
1276		} else {
1277			flags &= ~SFL_STARTUP;
1278		}
1279	}
1280
1281	/*
1282	 * To prevent locks being taken in the wrong order, the extent lock
1283	 * gets a bitmap lock as well.
1284	 */
1285	if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
1286		if (hfsmp->hfs_allocation_cp) {
1287			(void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1288			/*
1289			 * The bitmap lock is also grabbed when only extent lock
1290			 * was requested. Set the bitmap lock bit in the lock
1291			 * flags which callers will use during unlock.
1292			 */
1293			flags |= SFL_BITMAP;
1294		} else {
1295			flags &= ~SFL_BITMAP;
1296		}
1297	}
1298
1299	if (flags & SFL_EXTENTS) {
1300		/*
1301		 * Since the extents btree lock is recursive we always
1302		 * need exclusive access.
1303		 */
1304		if (hfsmp->hfs_extents_cp) {
1305			(void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1306		} else {
1307			flags &= ~SFL_EXTENTS;
1308		}
1309	}
1310
1311	return (flags);
1312}
1313
1314/*
1315 * unlock HFS system file(s).
1316 */
1317void
1318hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1319{
1320	struct timeval tv;
1321	u_int32_t lastfsync;
1322	int numOfLockedBuffs;
1323
1324	if (hfsmp->jnl == NULL) {
1325		microuptime(&tv);
1326		lastfsync = tv.tv_sec;
1327	}
1328	if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1329		hfs_unlock(hfsmp->hfs_startup_cp);
1330	}
1331	if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1332		if (hfsmp->jnl == NULL) {
1333			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1334			numOfLockedBuffs = count_lock_queue();
1335			if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1336			    ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1337			      kMaxSecsForFsync))) {
1338				hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1339			}
1340		}
1341		hfs_unlock(hfsmp->hfs_attribute_cp);
1342	}
1343	if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
1344		if (hfsmp->jnl == NULL) {
1345			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1346			numOfLockedBuffs = count_lock_queue();
1347			if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1348			    ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1349			      kMaxSecsForFsync))) {
1350				hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1351			}
1352		}
1353		hfs_unlock(hfsmp->hfs_catalog_cp);
1354	}
1355	if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
1356		hfs_unlock(hfsmp->hfs_allocation_cp);
1357	}
1358	if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
1359		if (hfsmp->jnl == NULL) {
1360			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1361			numOfLockedBuffs = count_lock_queue();
1362			if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1363			    ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1364			      kMaxSecsForFsync))) {
1365				hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1366			}
1367		}
1368		hfs_unlock(hfsmp->hfs_extents_cp);
1369	}
1370}
1371
1372
1373/*
1374 * RequireFileLock
1375 *
1376 * Check to see if a vnode is locked in the current context
1377 * This is to be used for debugging purposes only!!
1378 */
1379#if HFS_DIAGNOSTIC
1380void RequireFileLock(FileReference vp, int shareable)
1381{
1382	int locked;
1383
1384	/* The extents btree and allocation bitmap are always exclusive. */
1385	if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1386	    VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1387		shareable = 0;
1388	}
1389
1390	locked = VTOC(vp)->c_lockowner == (void *)current_thread();
1391
1392	if (!locked && !shareable) {
1393		switch (VTOC(vp)->c_fileid) {
1394		case kHFSExtentsFileID:
1395			panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1396			break;
1397		case kHFSCatalogFileID:
1398			panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1399			break;
1400		case kHFSAllocationFileID:
1401			/* The allocation file can hide behind the jornal lock. */
1402			if (VTOHFS(vp)->jnl == NULL)
1403				panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1404			break;
1405		case kHFSStartupFileID:
1406			panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1407		case kHFSAttributesFileID:
1408			panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1409			break;
1410		}
1411	}
1412}
1413#endif
1414
1415
1416/*
1417 * There are three ways to qualify for ownership rights on an object:
1418 *
1419 * 1. (a) Your UID matches the cnode's UID.
1420 *    (b) The object in question is owned by "unknown"
1421 * 2. (a) Permissions on the filesystem are being ignored and
1422 *        your UID matches the replacement UID.
1423 *    (b) Permissions on the filesystem are being ignored and
1424 *        the replacement UID is "unknown".
1425 * 3. You are root.
1426 *
1427 */
1428int
1429hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1430		__unused struct proc *p, int invokesuperuserstatus)
1431{
1432	if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
1433	    (cnode_uid == UNKNOWNUID) ||  									  /* [1b] */
1434	    ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
1435	      ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
1436	        (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
1437	    (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
1438		return (0);
1439	} else {
1440		return (EPERM);
1441	}
1442}
1443
1444
1445u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1446                               u_int32_t blockSizeLimit,
1447                               u_int32_t baseMultiple) {
1448    /*
1449       Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1450       specified limit but still an even multiple of the baseMultiple.
1451     */
1452    int baseBlockCount, blockCount;
1453    u_int32_t trialBlockSize;
1454
1455    if (allocationBlockSize % baseMultiple != 0) {
1456        /*
1457           Whoops: the allocation blocks aren't even multiples of the specified base:
1458           no amount of dividing them into even parts will be a multiple, either then!
1459        */
1460        return 512;		/* Hope for the best */
1461    };
1462
1463    /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1464       from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1465       Even though the former (the result of the loop below) is the larger allocation
1466       block size, the latter is more efficient: */
1467    if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1468
1469    /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1470    baseBlockCount = allocationBlockSize / baseMultiple;				/* Now guaranteed to be an even multiple */
1471
1472    for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1473        trialBlockSize = blockCount * baseMultiple;
1474        if (allocationBlockSize % trialBlockSize == 0) {				/* An even multiple? */
1475            if ((trialBlockSize <= blockSizeLimit) &&
1476                (trialBlockSize % baseMultiple == 0)) {
1477                return trialBlockSize;
1478            };
1479        };
1480    };
1481
1482    /* Note: we should never get here, since blockCount = 1 should always work,
1483       but this is nice and safe and makes the compiler happy, too ... */
1484    return 512;
1485}
1486
1487
1488u_int32_t
1489GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1490			struct cat_attr *fattr, struct cat_fork *forkinfo)
1491{
1492	struct hfsmount * hfsmp;
1493	struct cat_desc jdesc;
1494	int lockflags;
1495	int error;
1496
1497	if (vcb->vcbSigWord != kHFSPlusSigWord)
1498		return (0);
1499
1500	hfsmp = VCBTOHFS(vcb);
1501
1502	memset(&jdesc, 0, sizeof(struct cat_desc));
1503	jdesc.cd_parentcnid = kRootDirID;
1504	jdesc.cd_nameptr = (const u_int8_t *)name;
1505	jdesc.cd_namelen = strlen(name);
1506
1507	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1508	error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL);
1509	hfs_systemfile_unlock(hfsmp, lockflags);
1510
1511	if (error == 0) {
1512		return (fattr->ca_fileid);
1513	} else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1514		return (0);
1515	}
1516
1517	return (0);	/* XXX what callers expect on an error */
1518}
1519
1520
1521/*
1522 * On HFS Plus Volumes, there can be orphaned files or directories
1523 * These are files or directories that were unlinked while busy.
1524 * If the volume was not cleanly unmounted then some of these may
1525 * have persisted and need to be removed.
1526 */
1527void
1528hfs_remove_orphans(struct hfsmount * hfsmp)
1529{
1530	struct BTreeIterator * iterator = NULL;
1531	struct FSBufferDescriptor btdata;
1532	struct HFSPlusCatalogFile filerec;
1533	struct HFSPlusCatalogKey * keyp;
1534	struct proc *p = current_proc();
1535	FCB *fcb;
1536	ExtendedVCB *vcb;
1537	char filename[32];
1538	char tempname[32];
1539	size_t namelen;
1540	cat_cookie_t cookie;
1541	int catlock = 0;
1542	int catreserve = 0;
1543	int started_tr = 0;
1544	int lockflags;
1545	int result;
1546	int orphaned_files = 0;
1547	int orphaned_dirs = 0;
1548
1549	bzero(&cookie, sizeof(cookie));
1550
1551	if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1552		return;
1553
1554	vcb = HFSTOVCB(hfsmp);
1555	fcb = VTOF(hfsmp->hfs_catalog_vp);
1556
1557	btdata.bufferAddress = &filerec;
1558	btdata.itemSize = sizeof(filerec);
1559	btdata.itemCount = 1;
1560
1561	MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1562	bzero(iterator, sizeof(*iterator));
1563
1564	/* Build a key to "temp" */
1565	keyp = (HFSPlusCatalogKey*)&iterator->key;
1566	keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1567	keyp->nodeName.length = 4;  /* "temp" */
1568	keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1569	keyp->nodeName.unicode[0] = 't';
1570	keyp->nodeName.unicode[1] = 'e';
1571	keyp->nodeName.unicode[2] = 'm';
1572	keyp->nodeName.unicode[3] = 'p';
1573
1574	/*
1575	 * Position the iterator just before the first real temp file/dir.
1576	 */
1577	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1578	(void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1579	hfs_systemfile_unlock(hfsmp, lockflags);
1580
1581	/* Visit all the temp files/dirs in the HFS+ private directory. */
1582	for (;;) {
1583		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1584		result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1585		hfs_systemfile_unlock(hfsmp, lockflags);
1586		if (result)
1587			break;
1588		if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1589			break;
1590
1591		(void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1592		                      (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1593
1594		(void) snprintf(tempname, sizeof(tempname), "%s%d",
1595				HFS_DELETE_PREFIX, filerec.fileID);
1596
1597		/*
1598		 * Delete all files (and directories) named "tempxxx",
1599		 * where xxx is the file's cnid in decimal.
1600		 *
1601		 */
1602		if (bcmp(tempname, filename, namelen) == 0) {
1603   			struct filefork dfork;
1604    		struct filefork rfork;
1605  			struct cnode cnode;
1606			int mode = 0;
1607
1608			bzero(&dfork, sizeof(dfork));
1609			bzero(&rfork, sizeof(rfork));
1610			bzero(&cnode, sizeof(cnode));
1611
1612			/* Delete any attributes, ignore errors */
1613			(void) hfs_removeallattr(hfsmp, filerec.fileID);
1614
1615			if (hfs_start_transaction(hfsmp) != 0) {
1616			    printf("hfs_remove_orphans: failed to start transaction\n");
1617			    goto exit;
1618			}
1619			started_tr = 1;
1620
1621			/*
1622			 * Reserve some space in the Catalog file.
1623			 */
1624			if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1625			    printf("hfs_remove_orphans: cat_preflight failed\n");
1626				goto exit;
1627			}
1628			catreserve = 1;
1629
1630			lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1631			catlock = 1;
1632
1633			/* Build a fake cnode */
1634			cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1635			                &dfork.ff_data, &rfork.ff_data);
1636			cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1637			cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
1638			cnode.c_desc.cd_namelen = namelen;
1639			cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
1640			cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
1641
1642			/* Position iterator at previous entry */
1643			if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
1644			    NULL, NULL) != 0) {
1645				break;
1646			}
1647
1648			/* Truncate the file to zero (both forks) */
1649			if (dfork.ff_blocks > 0) {
1650				u_int64_t fsize;
1651
1652				dfork.ff_cp = &cnode;
1653				cnode.c_datafork = &dfork;
1654				cnode.c_rsrcfork = NULL;
1655				fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
1656				while (fsize > 0) {
1657				    if (fsize > HFS_BIGFILE_SIZE && overflow_extents(&dfork)) {
1658						fsize -= HFS_BIGFILE_SIZE;
1659					} else {
1660						fsize = 0;
1661					}
1662
1663					if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0,
1664									  cnode.c_attr.ca_fileid, false) != 0) {
1665						printf("hfs: error truncating data fork!\n");
1666						break;
1667					}
1668
1669					//
1670					// if we're iteratively truncating this file down,
1671					// then end the transaction and start a new one so
1672					// that no one transaction gets too big.
1673					//
1674					if (fsize > 0 && started_tr) {
1675						/* Drop system file locks before starting
1676						 * another transaction to preserve lock order.
1677						 */
1678						hfs_systemfile_unlock(hfsmp, lockflags);
1679						catlock = 0;
1680						hfs_end_transaction(hfsmp);
1681
1682						if (hfs_start_transaction(hfsmp) != 0) {
1683							started_tr = 0;
1684							break;
1685						}
1686						lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1687						catlock = 1;
1688					}
1689				}
1690			}
1691
1692			if (rfork.ff_blocks > 0) {
1693				rfork.ff_cp = &cnode;
1694				cnode.c_datafork = NULL;
1695				cnode.c_rsrcfork = &rfork;
1696				if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
1697					printf("hfs: error truncating rsrc fork!\n");
1698					break;
1699				}
1700			}
1701
1702			/* Remove the file or folder record from the Catalog */
1703			if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
1704				printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
1705				hfs_systemfile_unlock(hfsmp, lockflags);
1706				catlock = 0;
1707				hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1708				break;
1709			}
1710
1711			mode = cnode.c_attr.ca_mode & S_IFMT;
1712
1713			if (mode == S_IFDIR) {
1714				orphaned_dirs++;
1715			}
1716			else {
1717				orphaned_files++;
1718			}
1719
1720			/* Update parent and volume counts */
1721			hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
1722			if (mode == S_IFDIR) {
1723				DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
1724			}
1725
1726			(void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
1727			                 &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
1728
1729			/* Drop locks and end the transaction */
1730			hfs_systemfile_unlock(hfsmp, lockflags);
1731			cat_postflight(hfsmp, &cookie, p);
1732			catlock = catreserve = 0;
1733
1734			/*
1735			   Now that Catalog is unlocked, update the volume info, making
1736			   sure to differentiate between files and directories
1737			*/
1738			if (mode == S_IFDIR) {
1739				hfs_volupdate(hfsmp, VOL_RMDIR, 0);
1740			}
1741			else{
1742 				hfs_volupdate(hfsmp, VOL_RMFILE, 0);
1743			}
1744
1745			if (started_tr) {
1746				hfs_end_transaction(hfsmp);
1747				started_tr = 0;
1748			}
1749
1750		} /* end if */
1751	} /* end for */
1752	if (orphaned_files > 0 || orphaned_dirs > 0)
1753		printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
1754exit:
1755	if (catlock) {
1756		hfs_systemfile_unlock(hfsmp, lockflags);
1757	}
1758	if (catreserve) {
1759		cat_postflight(hfsmp, &cookie, p);
1760	}
1761	if (started_tr) {
1762		hfs_end_transaction(hfsmp);
1763	}
1764
1765	FREE(iterator, M_TEMP);
1766	hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
1767}
1768
1769
1770/*
1771 * This will return the correct logical block size for a given vnode.
1772 * For most files, it is the allocation block size, for meta data like
1773 * BTrees, this is kept as part of the BTree private nodeSize
1774 */
1775u_int32_t
1776GetLogicalBlockSize(struct vnode *vp)
1777{
1778u_int32_t logBlockSize;
1779
1780	DBG_ASSERT(vp != NULL);
1781
1782	/* start with default */
1783	logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
1784
1785	if (vnode_issystem(vp)) {
1786		if (VTOF(vp)->fcbBTCBPtr != NULL) {
1787			BTreeInfoRec			bTreeInfo;
1788
1789			/*
1790			 * We do not lock the BTrees, because if we are getting block..then the tree
1791			 * should be locked in the first place.
1792			 * We just want the nodeSize wich will NEVER change..so even if the world
1793			 * is changing..the nodeSize should remain the same. Which argues why lock
1794			 * it in the first place??
1795			 */
1796
1797			(void) BTGetInformation	(VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
1798
1799			logBlockSize = bTreeInfo.nodeSize;
1800
1801		} else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1802				logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
1803		}
1804	}
1805
1806	DBG_ASSERT(logBlockSize > 0);
1807
1808	return logBlockSize;
1809}
1810
1811u_int32_t
1812hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
1813{
1814	u_int32_t freeblks;
1815	u_int32_t rsrvblks;
1816	u_int32_t loanblks;
1817
1818	/*
1819	 * We don't bother taking the mount lock
1820	 * to look at these values since the values
1821	 * themselves are each updated atomically
1822	 * on aligned addresses.
1823	 */
1824	freeblks = hfsmp->freeBlocks;
1825	rsrvblks = hfsmp->reserveBlocks;
1826	loanblks = hfsmp->loanedBlocks;
1827	if (wantreserve) {
1828		if (freeblks > rsrvblks)
1829			freeblks -= rsrvblks;
1830		else
1831			freeblks = 0;
1832	}
1833	if (freeblks > loanblks)
1834		freeblks -= loanblks;
1835	else
1836		freeblks = 0;
1837
1838#if HFS_SPARSE_DEV
1839	/*
1840	 * When the underlying device is sparse, check the
1841	 * available space on the backing store volume.
1842	 */
1843	if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
1844		struct vfsstatfs *vfsp;  /* 272 bytes */
1845		u_int64_t vfreeblks;
1846		u_int32_t loanedblks;
1847		struct mount * backingfs_mp;
1848		struct timeval now;
1849
1850		backingfs_mp = vnode_mount(hfsmp->hfs_backingfs_rootvp);
1851
1852		microtime(&now);
1853		if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
1854		    vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
1855		    hfsmp->hfs_last_backingstatfs = now.tv_sec;
1856		}
1857
1858		if ((vfsp = vfs_statfs(backingfs_mp))) {
1859			hfs_lock_mount (hfsmp);
1860			vfreeblks = vfsp->f_bavail;
1861			/* Normalize block count if needed. */
1862			if (vfsp->f_bsize != hfsmp->blockSize) {
1863				vfreeblks = ((u_int64_t)vfreeblks * (u_int64_t)(vfsp->f_bsize)) / hfsmp->blockSize;
1864			}
1865			if (vfreeblks > (unsigned int)hfsmp->hfs_sparsebandblks)
1866				vfreeblks -= hfsmp->hfs_sparsebandblks;
1867			else
1868				vfreeblks = 0;
1869
1870			/* Take into account any delayed allocations. */
1871			loanedblks = 2 * hfsmp->loanedBlocks;
1872			if (vfreeblks > loanedblks)
1873				vfreeblks -= loanedblks;
1874			else
1875				vfreeblks = 0;
1876
1877			if (hfsmp->hfs_backingfs_maxblocks) {
1878				vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks);
1879			}
1880			freeblks = MIN(vfreeblks, freeblks);
1881			hfs_unlock_mount (hfsmp);
1882		}
1883	}
1884#endif /* HFS_SPARSE_DEV */
1885	if (hfsmp->hfs_flags & HFS_CS) {
1886		uint64_t cs_free_bytes;
1887		uint64_t cs_free_blks;
1888		if (VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSGETFREEBYTES,
1889		    (caddr_t)&cs_free_bytes, 0, vfs_context_kernel()) == 0) {
1890			cs_free_blks = cs_free_bytes / hfsmp->blockSize;
1891			if (cs_free_blks > loanblks)
1892				cs_free_blks -= loanblks;
1893			else
1894				cs_free_blks = 0;
1895			freeblks = MIN(cs_free_blks, freeblks);
1896		}
1897	}
1898
1899	return (freeblks);
1900}
1901
1902/*
1903 * Map HFS Common errors (negative) to BSD error codes (positive).
1904 * Positive errors (ie BSD errors) are passed through unchanged.
1905 */
1906short MacToVFSError(OSErr err)
1907{
1908	if (err >= 0)
1909        	return err;
1910
1911	/* BSD/VFS internal errnos */
1912	switch (err) {
1913		case ERESERVEDNAME: /* -8 */
1914			return err;
1915	}
1916
1917	switch (err) {
1918	case dskFulErr:			/*    -34 */
1919	case btNoSpaceAvail:		/* -32733 */
1920		return ENOSPC;
1921	case fxOvFlErr:			/* -32750 */
1922		return EOVERFLOW;
1923
1924	case btBadNode:			/* -32731 */
1925		return EIO;
1926
1927	case memFullErr:		/*  -108 */
1928		return ENOMEM;		/*   +12 */
1929
1930	case cmExists:			/* -32718 */
1931	case btExists:			/* -32734 */
1932		return EEXIST;		/*    +17 */
1933
1934	case cmNotFound:		/* -32719 */
1935	case btNotFound:		/* -32735 */
1936		return ENOENT;		/*     28 */
1937
1938	case cmNotEmpty:		/* -32717 */
1939		return ENOTEMPTY;	/*     66 */
1940
1941	case cmFThdDirErr:		/* -32714 */
1942		return EISDIR;		/*     21 */
1943
1944	case fxRangeErr:		/* -32751 */
1945		return ERANGE;
1946
1947	case bdNamErr:			/*   -37 */
1948		return ENAMETOOLONG;	/*    63 */
1949
1950	case paramErr:			/*   -50 */
1951	case fileBoundsErr:		/* -1309 */
1952		return EINVAL;		/*   +22 */
1953
1954	case fsBTBadNodeSize:
1955		return ENXIO;
1956
1957	default:
1958		return EIO;		/*   +5 */
1959	}
1960}
1961
1962
1963/*
1964 * Find the current thread's directory hint for a given index.
1965 *
1966 * Requires an exclusive lock on directory cnode.
1967 *
1968 * Use detach if the cnode lock must be dropped while the hint is still active.
1969 */
1970__private_extern__
1971directoryhint_t *
1972hfs_getdirhint(struct cnode *dcp, int index, int detach)
1973{
1974	struct timeval tv;
1975	directoryhint_t *hint;
1976	boolean_t need_remove, need_init;
1977	const u_int8_t * name;
1978
1979	microuptime(&tv);
1980
1981	/*
1982	 *  Look for an existing hint first.  If not found, create a new one (when
1983	 *  the list is not full) or recycle the oldest hint.  Since new hints are
1984	 *  always added to the head of the list, the last hint is always the
1985	 *  oldest.
1986	 */
1987	TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
1988		if (hint->dh_index == index)
1989			break;
1990	}
1991	if (hint != NULL) { /* found an existing hint */
1992		need_init = false;
1993		need_remove = true;
1994	} else { /* cannot find an existing hint */
1995		need_init = true;
1996		if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
1997			/* Create a default directory hint */
1998			MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK);
1999			++dcp->c_dirhintcnt;
2000			need_remove = false;
2001		} else {				/* recycle the last (i.e., the oldest) hint */
2002			hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
2003			if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
2004			    (name = hint->dh_desc.cd_nameptr)) {
2005				hint->dh_desc.cd_nameptr = NULL;
2006				hint->dh_desc.cd_namelen = 0;
2007				hint->dh_desc.cd_flags &= ~CD_HASBUF;
2008				vfs_removename((const char *)name);
2009			}
2010			need_remove = true;
2011		}
2012	}
2013
2014	if (need_remove)
2015		TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2016
2017	if (detach)
2018		--dcp->c_dirhintcnt;
2019	else
2020		TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2021
2022	if (need_init) {
2023		hint->dh_index = index;
2024		hint->dh_desc.cd_flags = 0;
2025		hint->dh_desc.cd_encoding = 0;
2026		hint->dh_desc.cd_namelen = 0;
2027		hint->dh_desc.cd_nameptr = NULL;
2028		hint->dh_desc.cd_parentcnid = dcp->c_fileid;
2029		hint->dh_desc.cd_hint = dcp->c_childhint;
2030		hint->dh_desc.cd_cnid = 0;
2031	}
2032	hint->dh_time = tv.tv_sec;
2033	return (hint);
2034}
2035
2036/*
2037 * Release a single directory hint.
2038 *
2039 * Requires an exclusive lock on directory cnode.
2040 */
2041__private_extern__
2042void
2043hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
2044{
2045	const u_int8_t * name;
2046	directoryhint_t *hint;
2047
2048	/* Check if item is on list (could be detached) */
2049	TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2050		if (hint == relhint) {
2051			TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
2052			--dcp->c_dirhintcnt;
2053			break;
2054		}
2055	}
2056	name = relhint->dh_desc.cd_nameptr;
2057	if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2058		relhint->dh_desc.cd_nameptr = NULL;
2059		relhint->dh_desc.cd_namelen = 0;
2060		relhint->dh_desc.cd_flags &= ~CD_HASBUF;
2061		vfs_removename((const char *)name);
2062	}
2063	FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT);
2064}
2065
2066/*
2067 * Release directory hints for given directory
2068 *
2069 * Requires an exclusive lock on directory cnode.
2070 */
2071__private_extern__
2072void
2073hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
2074{
2075	struct timeval tv;
2076	directoryhint_t *hint, *prev;
2077	const u_int8_t * name;
2078
2079	if (stale_hints_only)
2080		microuptime(&tv);
2081
2082	/* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
2083	for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
2084		if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
2085			break;  /* stop here if this entry is too new */
2086		name = hint->dh_desc.cd_nameptr;
2087		if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2088			hint->dh_desc.cd_nameptr = NULL;
2089			hint->dh_desc.cd_namelen = 0;
2090			hint->dh_desc.cd_flags &= ~CD_HASBUF;
2091			vfs_removename((const char *)name);
2092		}
2093		prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
2094		TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2095		FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT);
2096		--dcp->c_dirhintcnt;
2097	}
2098}
2099
2100/*
2101 * Insert a detached directory hint back into the list of dirhints.
2102 *
2103 * Requires an exclusive lock on directory cnode.
2104 */
2105__private_extern__
2106void
2107hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
2108{
2109	directoryhint_t *test;
2110
2111	TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
2112		if (test == hint)
2113			panic("hfs_insertdirhint: hint %p already on list!", hint);
2114	}
2115
2116	TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2117	++dcp->c_dirhintcnt;
2118}
2119
2120/*
2121 * Perform a case-insensitive compare of two UTF-8 filenames.
2122 *
2123 * Returns 0 if the strings match.
2124 */
2125__private_extern__
2126int
2127hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
2128{
2129	u_int16_t *ustr1, *ustr2;
2130	size_t ulen1, ulen2;
2131	size_t maxbytes;
2132	int cmp = -1;
2133
2134	if (len1 != len2)
2135		return (cmp);
2136
2137	maxbytes = kHFSPlusMaxFileNameChars << 1;
2138	MALLOC(ustr1, u_int16_t *, maxbytes << 1, M_TEMP, M_WAITOK);
2139	ustr2 = ustr1 + (maxbytes >> 1);
2140
2141	if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
2142		goto out;
2143	if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
2144		goto out;
2145
2146	cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
2147out:
2148	FREE(ustr1, M_TEMP);
2149	return (cmp);
2150}
2151
2152
2153typedef struct jopen_cb_info {
2154	off_t   jsize;
2155	char   *desired_uuid;
2156        struct  vnode *jvp;
2157	size_t  blksize;
2158	int     need_clean;
2159	int     need_init;
2160} jopen_cb_info;
2161
2162static int
2163journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
2164{
2165	struct nameidata nd;
2166	jopen_cb_info *ji = (jopen_cb_info *)arg;
2167	char bsd_name[256];
2168	int error;
2169
2170	strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
2171	strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
2172
2173	if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
2174		return 1;   // keep iterating
2175	}
2176
2177	// if we're here, either the desired uuid matched or there was no
2178	// desired uuid so let's try to open the device for writing and
2179	// see if it works.  if it does, we'll use it.
2180
2181	NDINIT(&nd, LOOKUP, OP_LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
2182	if ((error = namei(&nd))) {
2183		printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
2184		return 1;   // keep iterating
2185	}
2186
2187	ji->jvp = nd.ni_vp;
2188	nameidone(&nd);
2189
2190	if (ji->jvp == NULL) {
2191		printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
2192	} else {
2193		error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2194		if (error == 0) {
2195			// if the journal is dirty and we didn't specify a desired
2196			// journal device uuid, then do not use the journal.  but
2197			// if the journal is just invalid (e.g. it hasn't been
2198			// initialized) then just set the need_init flag.
2199			if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2200				error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
2201				if (error == EBUSY) {
2202					VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2203					vnode_put(ji->jvp);
2204					ji->jvp = NULL;
2205					return 1;    // keep iterating
2206				} else if (error == EINVAL) {
2207					ji->need_init = 1;
2208				}
2209			}
2210
2211			if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2212				strlcpy(ji->desired_uuid, uuid_str, 128);
2213			}
2214			vnode_setmountedon(ji->jvp);
2215			return 0;   // stop iterating
2216		} else {
2217			vnode_put(ji->jvp);
2218			ji->jvp = NULL;
2219		}
2220	}
2221
2222	return 1;   // keep iterating
2223}
2224
2225extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
2226kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
2227
2228
2229static vnode_t
2230open_journal_dev(const char *vol_device,
2231		 int need_clean,
2232		 char *uuid_str,
2233		 char *machine_serial_num,
2234		 off_t jsize,
2235		 size_t blksize,
2236		 int *need_init)
2237{
2238    int retry_counter=0;
2239    jopen_cb_info ji;
2240
2241    ji.jsize        = jsize;
2242    ji.desired_uuid = uuid_str;
2243    ji.jvp          = NULL;
2244    ji.blksize      = blksize;
2245    ji.need_clean   = need_clean;
2246    ji.need_init    = 0;
2247
2248//    if (uuid_str[0] == '\0') {
2249//	    printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
2250//    } else {
2251//	    printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
2252//    }
2253    while (ji.jvp == NULL && retry_counter++ < 4) {
2254	    if (retry_counter > 1) {
2255		    if (uuid_str[0]) {
2256			    printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
2257		    } else {
2258			    printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
2259		    }
2260		    delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
2261	    }
2262
2263	    IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
2264    }
2265
2266    if (ji.jvp == NULL) {
2267	    printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
2268		   vol_device, uuid_str, machine_serial_num);
2269    }
2270
2271    *need_init = ji.need_init;
2272
2273    return ji.jvp;
2274}
2275
2276
2277int
2278hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
2279					   void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
2280					   HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
2281{
2282	JournalInfoBlock *jibp;
2283	struct buf       *jinfo_bp, *bp;
2284	int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2285	int               retval, write_jibp = 0;
2286	uint32_t		  blksize = hfsmp->hfs_logical_block_size;
2287	struct vnode     *devvp;
2288	struct hfs_mount_args *args = _args;
2289	u_int32_t	  jib_flags;
2290	u_int64_t	  jib_offset;
2291	u_int64_t	  jib_size;
2292	const char *dev_name;
2293
2294	devvp = hfsmp->hfs_devvp;
2295	dev_name = vnode_getname_printable(devvp);
2296
2297	if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2298		arg_flags  = args->journal_flags;
2299		arg_tbufsz = args->journal_tbuffer_size;
2300	}
2301
2302	sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
2303
2304	jinfo_bp = NULL;
2305	retval = (int)buf_meta_bread(devvp,
2306						(daddr64_t)((embeddedOffset/blksize) +
2307						((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2308						hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
2309	if (retval) {
2310		if (jinfo_bp) {
2311			buf_brelse(jinfo_bp);
2312		}
2313		goto cleanup_dev_name;
2314	}
2315
2316	jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2317	jib_flags  = SWAP_BE32(jibp->flags);
2318	jib_size   = SWAP_BE64(jibp->size);
2319
2320	if (jib_flags & kJIJournalInFSMask) {
2321		hfsmp->jvp = hfsmp->hfs_devvp;
2322		jib_offset = SWAP_BE64(jibp->offset);
2323	} else {
2324	    int need_init=0;
2325
2326	    // if the volume was unmounted cleanly then we'll pick any
2327	    // available external journal partition
2328	    //
2329	    if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2330		    *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2331	    }
2332
2333	    hfsmp->jvp = open_journal_dev(dev_name,
2334					  !(jib_flags & kJIJournalNeedInitMask),
2335					  (char *)&jibp->ext_jnl_uuid[0],
2336					  (char *)&jibp->machine_serial_num[0],
2337					  jib_size,
2338					  hfsmp->hfs_logical_block_size,
2339					  &need_init);
2340	    if (hfsmp->jvp == NULL) {
2341		    buf_brelse(jinfo_bp);
2342		    retval = EROFS;
2343		    goto cleanup_dev_name;
2344	    } else {
2345		    if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2346			    strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2347		    }
2348	    }
2349
2350	    jib_offset = 0;
2351	    write_jibp = 1;
2352	    if (need_init) {
2353		    jib_flags |= kJIJournalNeedInitMask;
2354	    }
2355	}
2356
2357	// save this off for the hack-y check in hfs_remove()
2358	hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2359	hfsmp->jnl_size  = jib_size;
2360
2361	if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2362	    // if the file system is read-only, check if the journal is empty.
2363	    // if it is, then we can allow the mount.  otherwise we have to
2364	    // return failure.
2365	    retval = journal_is_clean(hfsmp->jvp,
2366				      jib_offset + embeddedOffset,
2367				      jib_size,
2368				      devvp,
2369				      hfsmp->hfs_logical_block_size);
2370
2371	    hfsmp->jnl = NULL;
2372
2373	    buf_brelse(jinfo_bp);
2374
2375	    if (retval) {
2376		    const char *name = vnode_getname_printable(devvp);
2377		    printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2378		    name);
2379		    vnode_putname_printable(name);
2380	    }
2381
2382	    goto cleanup_dev_name;
2383	}
2384
2385	if (jib_flags & kJIJournalNeedInitMask) {
2386		printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2387			   jib_offset + embeddedOffset, jib_size);
2388		hfsmp->jnl = journal_create(hfsmp->jvp,
2389									jib_offset + embeddedOffset,
2390									jib_size,
2391									devvp,
2392									blksize,
2393									arg_flags,
2394									arg_tbufsz,
2395									hfs_sync_metadata, hfsmp->hfs_mp,
2396									hfsmp->hfs_mp);
2397		if (hfsmp->jnl)
2398			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2399
2400		// no need to start a transaction here... if this were to fail
2401		// we'd just re-init it on the next mount.
2402		jib_flags &= ~kJIJournalNeedInitMask;
2403		jibp->flags  = SWAP_BE32(jib_flags);
2404		buf_bwrite(jinfo_bp);
2405		jinfo_bp = NULL;
2406		jibp     = NULL;
2407	} else {
2408		//printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2409		//	   jib_offset + embeddedOffset,
2410		//	   jib_size, SWAP_BE32(vhp->blockSize));
2411
2412		hfsmp->jnl = journal_open(hfsmp->jvp,
2413								  jib_offset + embeddedOffset,
2414								  jib_size,
2415								  devvp,
2416								  blksize,
2417								  arg_flags,
2418								  arg_tbufsz,
2419								  hfs_sync_metadata, hfsmp->hfs_mp,
2420								  hfsmp->hfs_mp);
2421		if (hfsmp->jnl)
2422			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2423
2424		if (write_jibp) {
2425			buf_bwrite(jinfo_bp);
2426		} else {
2427			buf_brelse(jinfo_bp);
2428		}
2429		jinfo_bp = NULL;
2430		jibp     = NULL;
2431
2432		if (hfsmp->jnl && mdbp) {
2433			// reload the mdb because it could have changed
2434			// if the journal had to be replayed.
2435			if (mdb_offset == 0) {
2436				mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2437			}
2438			bp = NULL;
2439			retval = (int)buf_meta_bread(devvp,
2440					HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2441					hfsmp->hfs_physical_block_size, cred, &bp);
2442			if (retval) {
2443				if (bp) {
2444					buf_brelse(bp);
2445				}
2446				printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2447					   retval);
2448				goto cleanup_dev_name;
2449			}
2450			bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2451			buf_brelse(bp);
2452			bp = NULL;
2453		}
2454	}
2455
2456	// if we expected the journal to be there and we couldn't
2457	// create it or open it then we have to bail out.
2458	if (hfsmp->jnl == NULL) {
2459		printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2460		retval = EINVAL;
2461		goto cleanup_dev_name;
2462	}
2463
2464	retval = 0;
2465
2466cleanup_dev_name:
2467	vnode_putname_printable(dev_name);
2468	return retval;
2469}
2470
2471
2472//
2473// This function will go and re-locate the .journal_info_block and
2474// the .journal files in case they moved (which can happen if you
2475// run Norton SpeedDisk).  If we fail to find either file we just
2476// disable journaling for this volume and return.  We turn off the
2477// journaling bit in the vcb and assume it will get written to disk
2478// later (if it doesn't on the next mount we'd do the same thing
2479// again which is harmless).  If we disable journaling we don't
2480// return an error so that the volume is still mountable.
2481//
2482// If the info we find for the .journal_info_block and .journal files
2483// isn't what we had stored, we re-set our cached info and proceed
2484// with opening the journal normally.
2485//
2486static int
2487hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2488{
2489	JournalInfoBlock *jibp;
2490	struct buf       *jinfo_bp;
2491	int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2492	int               retval, write_jibp = 0, recreate_journal = 0;
2493	struct vnode     *devvp;
2494	struct cat_attr   jib_attr, jattr;
2495	struct cat_fork   jib_fork, jfork;
2496	ExtendedVCB      *vcb;
2497	u_int32_t            fid;
2498	struct hfs_mount_args *args = _args;
2499	u_int32_t	  jib_flags;
2500	u_int64_t	  jib_offset;
2501	u_int64_t	  jib_size;
2502
2503	devvp = hfsmp->hfs_devvp;
2504	vcb = HFSTOVCB(hfsmp);
2505
2506	if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2507		if (args->journal_disable) {
2508			return 0;
2509		}
2510
2511		arg_flags  = args->journal_flags;
2512		arg_tbufsz = args->journal_tbuffer_size;
2513	}
2514
2515	fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2516	if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2517		printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2518			   jib_fork.cf_extents[0].startBlock);
2519		vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2520		return 0;
2521	}
2522	hfsmp->hfs_jnlinfoblkid = fid;
2523
2524	// make sure the journal_info_block begins where we think it should.
2525	if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2526		printf("hfs: The journal_info_block moved (was: %d; is: %d).  Fixing up\n",
2527			   SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2528
2529		vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
2530		vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2531		recreate_journal = 1;
2532	}
2533
2534
2535	sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2536	jinfo_bp = NULL;
2537	retval = (int)buf_meta_bread(devvp,
2538						(vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2539						((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2540						hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2541	if (retval) {
2542		if (jinfo_bp) {
2543			buf_brelse(jinfo_bp);
2544		}
2545		printf("hfs: can't read journal info block. disabling journaling.\n");
2546		vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2547		return 0;
2548	}
2549
2550	jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2551	jib_flags  = SWAP_BE32(jibp->flags);
2552	jib_offset = SWAP_BE64(jibp->offset);
2553	jib_size   = SWAP_BE64(jibp->size);
2554
2555	fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2556	if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2557		printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2558			   jfork.cf_extents[0].startBlock);
2559		buf_brelse(jinfo_bp);
2560		vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2561		return 0;
2562	}
2563	hfsmp->hfs_jnlfileid = fid;
2564
2565	// make sure the journal file begins where we think it should.
2566	if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2567		printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
2568			   (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2569
2570		jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2571		write_jibp   = 1;
2572		recreate_journal = 1;
2573	}
2574
2575	// check the size of the journal file.
2576	if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2577		printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
2578			   jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2579
2580		jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2581		write_jibp = 1;
2582		recreate_journal = 1;
2583	}
2584
2585	if (jib_flags & kJIJournalInFSMask) {
2586		hfsmp->jvp = hfsmp->hfs_devvp;
2587		jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
2588	} else {
2589	    const char *dev_name;
2590	    int need_init = 0;
2591
2592	    dev_name = vnode_getname_printable(devvp);
2593
2594            // since the journal is empty, just use any available external journal
2595	    *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2596
2597	    // this fills in the uuid of the device we actually get
2598	    hfsmp->jvp = open_journal_dev(dev_name,
2599					  !(jib_flags & kJIJournalNeedInitMask),
2600					  (char *)&jibp->ext_jnl_uuid[0],
2601					  (char *)&jibp->machine_serial_num[0],
2602					  jib_size,
2603					  hfsmp->hfs_logical_block_size,
2604					  &need_init);
2605	    if (hfsmp->jvp == NULL) {
2606		    buf_brelse(jinfo_bp);
2607		    vnode_putname_printable(dev_name);
2608		    return EROFS;
2609	    } else {
2610		    if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2611			    strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
2612		    }
2613	    }
2614	    jib_offset = 0;
2615	    recreate_journal = 1;
2616	    write_jibp = 1;
2617	    if (need_init) {
2618		    jib_flags |= kJIJournalNeedInitMask;
2619	    }
2620	    vnode_putname_printable(dev_name);
2621	}
2622
2623	// save this off for the hack-y check in hfs_remove()
2624	hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2625	hfsmp->jnl_size  = jib_size;
2626
2627	if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2628	    // if the file system is read-only, check if the journal is empty.
2629	    // if it is, then we can allow the mount.  otherwise we have to
2630	    // return failure.
2631	    retval = journal_is_clean(hfsmp->jvp,
2632				      jib_offset,
2633				      jib_size,
2634				      devvp,
2635		                      hfsmp->hfs_logical_block_size);
2636
2637	    hfsmp->jnl = NULL;
2638
2639	    buf_brelse(jinfo_bp);
2640
2641	    if (retval) {
2642		    const char *name = vnode_getname_printable(devvp);
2643		    printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2644		    name);
2645		    vnode_putname_printable(name);
2646	    }
2647
2648	    return retval;
2649	}
2650
2651	if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
2652		printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2653			   jib_offset, jib_size);
2654		hfsmp->jnl = journal_create(hfsmp->jvp,
2655									jib_offset,
2656									jib_size,
2657									devvp,
2658									hfsmp->hfs_logical_block_size,
2659									arg_flags,
2660									arg_tbufsz,
2661									hfs_sync_metadata, hfsmp->hfs_mp,
2662									hfsmp->hfs_mp);
2663		if (hfsmp->jnl)
2664			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2665
2666		// no need to start a transaction here... if this were to fail
2667		// we'd just re-init it on the next mount.
2668		jib_flags &= ~kJIJournalNeedInitMask;
2669		write_jibp   = 1;
2670
2671	} else {
2672		//
2673		// if we weren't the last person to mount this volume
2674		// then we need to throw away the journal because it
2675		// is likely that someone else mucked with the disk.
2676		// if the journal is empty this is no big deal.  if the
2677		// disk is dirty this prevents us from replaying the
2678		// journal over top of changes that someone else made.
2679		//
2680		arg_flags |= JOURNAL_RESET;
2681
2682		//printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2683		//	   jib_offset,
2684		//	   jib_size, SWAP_BE32(vhp->blockSize));
2685
2686		hfsmp->jnl = journal_open(hfsmp->jvp,
2687								  jib_offset,
2688								  jib_size,
2689								  devvp,
2690								  hfsmp->hfs_logical_block_size,
2691								  arg_flags,
2692								  arg_tbufsz,
2693								  hfs_sync_metadata, hfsmp->hfs_mp,
2694								  hfsmp->hfs_mp);
2695		if (hfsmp->jnl)
2696			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2697	}
2698
2699
2700	if (write_jibp) {
2701		jibp->flags  = SWAP_BE32(jib_flags);
2702		jibp->offset = SWAP_BE64(jib_offset);
2703		jibp->size   = SWAP_BE64(jib_size);
2704
2705		buf_bwrite(jinfo_bp);
2706	} else {
2707		buf_brelse(jinfo_bp);
2708	}
2709	jinfo_bp = NULL;
2710	jibp     = NULL;
2711
2712	// if we expected the journal to be there and we couldn't
2713	// create it or open it then we have to bail out.
2714	if (hfsmp->jnl == NULL) {
2715		printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
2716		return EINVAL;
2717	}
2718
2719	return 0;
2720}
2721
2722/*
2723 * Calculate the allocation zone for metadata.
2724 *
2725 * This zone includes the following:
2726 *	Allocation Bitmap file
2727 *	Overflow Extents file
2728 *	Journal file
2729 *	Quota files
2730 *	Clustered Hot files
2731 *	Catalog file
2732 *
2733 *                          METADATA ALLOCATION ZONE
2734 * ____________________________________________________________________________
2735 * |    |    |     |               |                              |           |
2736 * | BM | JF | OEF |    CATALOG    |--->                          | HOT FILES |
2737 * |____|____|_____|_______________|______________________________|___________|
2738 *
2739 * <------------------------------- N * 128 MB ------------------------------->
2740 *
2741 */
2742#define GIGABYTE  (u_int64_t)(1024*1024*1024)
2743
2744#define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
2745#define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
2746#define JOURNAL_DEFAULT_SIZE  (8*1024*1024)
2747#define JOURNAL_MAXIMUM_SIZE  (512*1024*1024)
2748#define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
2749#define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
2750
2751/* Initialize the metadata zone.
2752 *
2753 * If the size of  the volume is less than the minimum size for
2754 * metadata zone, metadata zone is disabled.
2755 *
2756 * If disable is true, disable metadata zone unconditionally.
2757 */
2758void
2759hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
2760{
2761	ExtendedVCB  *vcb;
2762	u_int64_t  fs_size;
2763	u_int64_t  zonesize;
2764	u_int64_t  temp;
2765	u_int64_t  filesize;
2766	u_int32_t  blk;
2767	int  items, really_do_it=1;
2768
2769	vcb = HFSTOVCB(hfsmp);
2770	fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
2771
2772	/*
2773	 * For volumes less than 10 GB, don't bother.
2774	 */
2775	if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
2776		really_do_it = 0;
2777	}
2778
2779	/*
2780	 * Skip non-journaled volumes as well.
2781	 */
2782	if (hfsmp->jnl == NULL) {
2783		really_do_it = 0;
2784	}
2785
2786	/* If caller wants to disable metadata zone, do it */
2787	if (disable == true) {
2788		really_do_it = 0;
2789	}
2790
2791	/*
2792	 * Start with space for the boot blocks and Volume Header.
2793	 * 1536 = byte offset from start of volume to end of volume header:
2794	 * 1024 bytes is the offset from the start of the volume to the
2795	 * start of the volume header (defined by the volume format)
2796	 * + 512 bytes (the size of the volume header).
2797	 */
2798	zonesize = roundup(1536, hfsmp->blockSize);
2799
2800	/*
2801	 * Add the on-disk size of allocation bitmap.
2802	 */
2803	zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2804
2805	/*
2806	 * Add space for the Journal Info Block and Journal (if they're in
2807	 * this file system).
2808	 */
2809	if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
2810		zonesize += hfsmp->blockSize + hfsmp->jnl_size;
2811	}
2812
2813	/*
2814	 * Add the existing size of the Extents Overflow B-tree.
2815	 * (It rarely grows, so don't bother reserving additional room for it.)
2816	 */
2817	zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2818
2819	/*
2820	 * If there is an Attributes B-tree, leave room for 11 clumps worth.
2821	 * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
2822	 * When installing a full OS install onto a 20GB volume, we use
2823	 * 7 to 8 clumps worth of space (depending on packages), so that leaves
2824	 * us with another 3 or 4 clumps worth before we need another extent.
2825	 */
2826	if (hfsmp->hfs_attribute_cp) {
2827		zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
2828	}
2829
2830	/*
2831	 * Leave room for 11 clumps of the Catalog B-tree.
2832	 * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
2833	 * When installing a full OS install onto a 20GB volume, we use
2834	 * 7 to 8 clumps worth of space (depending on packages), so that leaves
2835	 * us with another 3 or 4 clumps worth before we need another extent.
2836	 */
2837	zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
2838
2839	/*
2840	 * Add space for hot file region.
2841	 *
2842	 * ...for now, use 5 MB per 1 GB (0.5 %)
2843	 */
2844	filesize = (fs_size / 1024) * 5;
2845	if (filesize > HOTBAND_MAXIMUM_SIZE)
2846		filesize = HOTBAND_MAXIMUM_SIZE;
2847	else if (filesize < HOTBAND_MINIMUM_SIZE)
2848		filesize = HOTBAND_MINIMUM_SIZE;
2849	/*
2850	 * Calculate user quota file requirements.
2851	 */
2852	if (hfsmp->hfs_flags & HFS_QUOTAS) {
2853		items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
2854		if (items < QF_MIN_USERS)
2855			items = QF_MIN_USERS;
2856		else if (items > QF_MAX_USERS)
2857			items = QF_MAX_USERS;
2858		if (!powerof2(items)) {
2859			int x = items;
2860			items = 4;
2861			while (x>>1 != 1) {
2862				x = x >> 1;
2863				items = items << 1;
2864			}
2865		}
2866		filesize += (items + 1) * sizeof(struct dqblk);
2867		/*
2868		 * Calculate group quota file requirements.
2869		 *
2870		 */
2871		items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
2872		if (items < QF_MIN_GROUPS)
2873			items = QF_MIN_GROUPS;
2874		else if (items > QF_MAX_GROUPS)
2875			items = QF_MAX_GROUPS;
2876		if (!powerof2(items)) {
2877			int x = items;
2878			items = 4;
2879			while (x>>1 != 1) {
2880				x = x >> 1;
2881				items = items << 1;
2882			}
2883		}
2884		filesize += (items + 1) * sizeof(struct dqblk);
2885	}
2886	zonesize += filesize;
2887
2888	/*
2889	 * Round up entire zone to a bitmap block's worth.
2890	 * The extra space goes to the catalog file and hot file area.
2891	 */
2892	temp = zonesize;
2893	zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
2894	hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
2895	/*
2896	 * If doing the round up for hfs_min_alloc_start would push us past
2897	 * allocLimit, then just reset it back to 0.  Though using a value
2898	 * bigger than allocLimit would not cause damage in the block allocator
2899	 * code, this value could get stored in the volume header and make it out
2900	 * to disk, making the volume header technically corrupt.
2901	 */
2902	if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
2903		hfsmp->hfs_min_alloc_start = 0;
2904	}
2905
2906	if (really_do_it == 0) {
2907		/* If metadata zone needs to be disabled because the
2908		 * volume was truncated, clear the bit and zero out
2909		 * the values that are no longer needed.
2910		 */
2911		if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2912			/* Disable metadata zone */
2913			hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
2914
2915			/* Zero out mount point values that are not required */
2916			hfsmp->hfs_catalog_maxblks = 0;
2917			hfsmp->hfs_hotfile_maxblks = 0;
2918			hfsmp->hfs_hotfile_start = 0;
2919			hfsmp->hfs_hotfile_end = 0;
2920			hfsmp->hfs_hotfile_freeblks = 0;
2921			hfsmp->hfs_metazone_start = 0;
2922			hfsmp->hfs_metazone_end = 0;
2923		}
2924
2925		return;
2926	}
2927
2928	temp = zonesize - temp;  /* temp has extra space */
2929	filesize += temp / 3;
2930	hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
2931
2932	hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
2933
2934	/* Convert to allocation blocks. */
2935	blk = zonesize / vcb->blockSize;
2936
2937	/* The default metadata zone location is at the start of volume. */
2938	hfsmp->hfs_metazone_start = 1;
2939	hfsmp->hfs_metazone_end = blk - 1;
2940
2941	/* The default hotfile area is at the end of the zone. */
2942	if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
2943		hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
2944		hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
2945		hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
2946	}
2947	else {
2948		hfsmp->hfs_hotfile_start = 0;
2949		hfsmp->hfs_hotfile_end = 0;
2950		hfsmp->hfs_hotfile_freeblks = 0;
2951	}
2952#if 0
2953	printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
2954	printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
2955	printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
2956#endif
2957	hfsmp->hfs_flags |= HFS_METADATA_ZONE;
2958}
2959
2960
2961static u_int32_t
2962hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
2963{
2964	ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
2965	int  lockflags;
2966	int  freeblocks;
2967
2968	lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2969	freeblocks = MetaZoneFreeBlocks(vcb);
2970	hfs_systemfile_unlock(hfsmp, lockflags);
2971
2972	/* Minus Extents overflow file reserve. */
2973	freeblocks -=
2974		hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
2975	/* Minus catalog file reserve. */
2976	freeblocks -=
2977		hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
2978	if (freeblocks < 0)
2979		freeblocks = 0;
2980
2981	return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
2982}
2983
2984/*
2985 * Determine if a file is a "virtual" metadata file.
2986 * This includes journal and quota files.
2987 */
2988int
2989hfs_virtualmetafile(struct cnode *cp)
2990{
2991	const char * filename;
2992
2993
2994	if (cp->c_parentcnid != kHFSRootFolderID)
2995		return (0);
2996
2997	filename = (const char *)cp->c_desc.cd_nameptr;
2998	if (filename == NULL)
2999		return (0);
3000
3001	if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
3002	    (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
3003	    (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
3004	    (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
3005	    (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
3006		return (1);
3007
3008	return (0);
3009}
3010
3011__private_extern__
3012void hfs_syncer_lock(struct hfsmount *hfsmp)
3013{
3014    hfs_lock_mount(hfsmp);
3015}
3016
3017__private_extern__
3018void hfs_syncer_unlock(struct hfsmount *hfsmp)
3019{
3020    hfs_unlock_mount(hfsmp);
3021}
3022
3023__private_extern__
3024void hfs_syncer_wait(struct hfsmount *hfsmp)
3025{
3026    msleep(&hfsmp->hfs_sync_incomplete, &hfsmp->hfs_mutex, PWAIT,
3027           "hfs_syncer_wait", NULL);
3028}
3029
3030__private_extern__
3031void hfs_syncer_wakeup(struct hfsmount *hfsmp)
3032{
3033    wakeup(&hfsmp->hfs_sync_incomplete);
3034}
3035
3036__private_extern__
3037uint64_t hfs_usecs_to_deadline(uint64_t usecs)
3038{
3039    uint64_t deadline;
3040    clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
3041    return deadline;
3042}
3043
3044__private_extern__
3045void hfs_syncer_queue(thread_call_t syncer)
3046{
3047    if (thread_call_enter_delayed_with_leeway(syncer,
3048                                              NULL,
3049                                              hfs_usecs_to_deadline(HFS_META_DELAY),
3050                                              0,
3051                                              THREAD_CALL_DELAY_SYS_BACKGROUND)) {
3052        printf ("hfs: syncer already scheduled!");
3053    }
3054}
3055
3056//
3057// Fire off a timed callback to sync the disk if the
3058// volume is on ejectable media.
3059//
3060 __private_extern__
3061void
3062hfs_sync_ejectable(struct hfsmount *hfsmp)
3063{
3064    // If we don't have a syncer or we get called by the syncer, just return
3065    if (!hfsmp->hfs_syncer || current_thread() == hfsmp->hfs_syncer_thread)
3066        return;
3067
3068    hfs_syncer_lock(hfsmp);
3069
3070    if (!timerisset(&hfsmp->hfs_sync_req_oldest))
3071        microuptime(&hfsmp->hfs_sync_req_oldest);
3072
3073    /* If hfs_unmount is running, it will set hfs_syncer to NULL. Also we
3074       don't want to queue again if there is a sync outstanding. */
3075    if (!hfsmp->hfs_syncer || hfsmp->hfs_sync_incomplete) {
3076        hfs_syncer_unlock(hfsmp);
3077        return;
3078    }
3079
3080    hfsmp->hfs_sync_incomplete = TRUE;
3081
3082    thread_call_t syncer = hfsmp->hfs_syncer;
3083
3084    hfs_syncer_unlock(hfsmp);
3085
3086    hfs_syncer_queue(syncer);
3087}
3088
3089int
3090hfs_start_transaction(struct hfsmount *hfsmp)
3091{
3092	int ret, unlock_on_err=0;
3093	void * thread = current_thread();
3094
3095#ifdef HFS_CHECK_LOCK_ORDER
3096	/*
3097	 * You cannot start a transaction while holding a system
3098	 * file lock. (unless the transaction is nested.)
3099	 */
3100	if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
3101		if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
3102			panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
3103		}
3104		if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
3105			panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
3106		}
3107		if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
3108			panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
3109		}
3110	}
3111#endif /* HFS_CHECK_LOCK_ORDER */
3112
3113	if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
3114		hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3115		OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3116		unlock_on_err = 1;
3117	}
3118
3119	/* If a downgrade to read-only mount is in progress, no other
3120	 * process than the downgrade process is allowed to modify
3121	 * the file system.
3122	 */
3123	if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
3124			(hfsmp->hfs_downgrading_proc != thread)) {
3125		ret = EROFS;
3126		goto out;
3127	}
3128
3129	if (hfsmp->jnl) {
3130		ret = journal_start_transaction(hfsmp->jnl);
3131		if (ret == 0) {
3132			OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
3133		}
3134	} else {
3135		ret = 0;
3136	}
3137
3138out:
3139	if (ret != 0 && unlock_on_err) {
3140		hfs_unlock_global (hfsmp);
3141		OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3142	}
3143
3144    return ret;
3145}
3146
3147int
3148hfs_end_transaction(struct hfsmount *hfsmp)
3149{
3150    int need_unlock=0, ret;
3151
3152    if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread()
3153	    && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
3154	    need_unlock = 1;
3155    }
3156
3157	if (hfsmp->jnl) {
3158		ret = journal_end_transaction(hfsmp->jnl);
3159	} else {
3160		ret = 0;
3161	}
3162
3163	if (need_unlock) {
3164		OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3165		hfs_unlock_global (hfsmp);
3166		hfs_sync_ejectable(hfsmp);
3167	}
3168
3169    return ret;
3170}
3171
3172
3173/*
3174 * Flush the contents of the journal to the disk.
3175 *
3176 *  Input:
3177 *  	wait_for_IO -
3178 *  	If TRUE, wait to write in-memory journal to the disk
3179 *  	consistently, and also wait to write all asynchronous
3180 *  	metadata blocks to its corresponding locations
3181 *  	consistently on the disk.  This means that the journal
3182 *  	is empty at this point and does not contain any
3183 *  	transactions.  This is overkill in normal scenarios
3184 *  	but is useful whenever the metadata blocks are required
3185 *  	to be consistent on-disk instead of just the journal
3186 *  	being consistent; like before live verification
3187 *  	and live volume resizing.
3188 *
3189 *  	If FALSE, only wait to write in-memory journal to the
3190 *  	disk consistently.  This means that the journal still
3191 *  	contains uncommitted transactions and the file system
3192 *  	metadata blocks in the journal transactions might be
3193 *  	written asynchronously to the disk.  But there is no
3194 *  	guarantee that they are written to the disk before
3195 *  	returning to the caller.  Note that this option is
3196 *  	sufficient for file system data integrity as it
3197 *  	guarantees consistent journal content on the disk.
3198 */
3199int
3200hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO)
3201{
3202	int ret;
3203
3204	/* Only peek at hfsmp->jnl while holding the global lock */
3205	hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3206	if (hfsmp->jnl) {
3207		ret = journal_flush(hfsmp->jnl, wait_for_IO);
3208	} else {
3209		ret = 0;
3210	}
3211	hfs_unlock_global (hfsmp);
3212
3213	return ret;
3214}
3215
3216
3217/*
3218 * hfs_erase_unused_nodes
3219 *
3220 * Check wheter a volume may suffer from unused Catalog B-tree nodes that
3221 * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
3222 * zeroes to the unused nodes.
3223 *
3224 * How do we detect when a volume needs this repair?  We can't always be
3225 * certain.  If a volume was created after a certain date, then it may have
3226 * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
3227 * clump, we can assume that if a Catalog B-tree is larger than its clump size,
3228 * that means that the entire first clump must have been written to, which means
3229 * there shouldn't be unused and unwritten nodes in that first clump, and this
3230 * repair is not needed.
3231 *
3232 * We have defined a bit in the Volume Header's attributes to indicate when the
3233 * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
3234 * As will fsck_hfs when it repairs the unused nodes.
3235 */
3236int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
3237{
3238	int result;
3239	struct filefork *catalog;
3240	int lockflags;
3241
3242	if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
3243	{
3244		/* This volume has already been checked and repaired. */
3245		return 0;
3246	}
3247
3248	if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
3249	{
3250		/* This volume is too old to have had the problem. */
3251		hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3252		return 0;
3253	}
3254
3255	catalog = hfsmp->hfs_catalog_cp->c_datafork;
3256	if (catalog->ff_size > catalog->ff_clumpsize)
3257	{
3258		/* The entire first clump must have been in use at some point. */
3259		hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3260		return 0;
3261	}
3262
3263	/*
3264	 * If we get here, we need to zero out those unused nodes.
3265	 *
3266	 * We start a transaction and lock the catalog since we're going to be
3267	 * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
3268	 * do its writing via the journal, because that would be too much I/O
3269	 * to fit in a transaction, and it's a pain to break it up into multiple
3270	 * transactions.  (It behaves more like growing a B-tree would.)
3271	 */
3272	printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
3273	result = hfs_start_transaction(hfsmp);
3274	if (result)
3275		goto done;
3276	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3277	result = BTZeroUnusedNodes(catalog);
3278	vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
3279	hfs_systemfile_unlock(hfsmp, lockflags);
3280	hfs_end_transaction(hfsmp);
3281	if (result == 0)
3282		hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3283	printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
3284
3285done:
3286	return result;
3287}
3288
3289
3290extern time_t snapshot_timestamp;
3291
3292int
3293check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
3294{
3295	int tracked_error = 0, snapshot_error = 0;
3296
3297	if (vp == NULL) {
3298		return 0;
3299	}
3300
3301	/* Swap files are special; skip them */
3302	if (vnode_isswap(vp)) {
3303		return 0;
3304	}
3305
3306	if (VTOC(vp)->c_bsdflags & UF_TRACKED) {
3307		// the file has the tracked bit set, so send an event to the tracked-file handler
3308		int error;
3309
3310		// printf("hfs: tracked-file: encountered a file with the tracked bit set! (vp %p)\n", vp);
3311		error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_TRACK_EVENT);
3312		if (error) {
3313			if (error == EAGAIN) {
3314				printf("hfs: tracked-file: timed out waiting for namespace handler...\n");
3315
3316			} else if (error == EINTR) {
3317				// printf("hfs: tracked-file: got a signal while waiting for namespace handler...\n");
3318				tracked_error = EINTR;
3319			}
3320		}
3321	}
3322
3323	if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
3324		// the change time is within this epoch
3325		int error;
3326
3327		error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
3328		if (error == EDEADLK) {
3329			snapshot_error = 0;
3330		} else if (error) {
3331			if (error == EAGAIN) {
3332				printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
3333			} else if (error == EINTR) {
3334				// printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
3335				snapshot_error = EINTR;
3336			}
3337		}
3338	}
3339
3340	if (tracked_error) return tracked_error;
3341	if (snapshot_error) return snapshot_error;
3342
3343	return 0;
3344}
3345
3346int
3347check_for_dataless_file(struct vnode *vp, uint64_t op_type)
3348{
3349	int error;
3350
3351	if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
3352		// there's nothing to do, it's not dataless
3353		return 0;
3354	}
3355
3356	/* Swap files are special; ignore them */
3357	if (vnode_isswap(vp)) {
3358		return 0;
3359	}
3360
3361	// printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
3362	error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
3363	if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
3364		error = 0;
3365	} else if (error) {
3366		if (error == EAGAIN) {
3367			printf("hfs: dataless: timed out waiting for namespace handler...\n");
3368			// XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
3369			return 0;
3370		} else if (error == EINTR) {
3371			// printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
3372			return EINTR;
3373		}
3374	} else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
3375		//
3376		// if we're here, the dataless bit is still set on the file
3377		// which means it didn't get handled.  we return an error
3378		// but it's presently ignored by all callers of this function.
3379		//
3380		// XXXdbg - EDATANOTPRESENT is what we really need...
3381		//
3382		return EBADF;
3383	}
3384
3385	return error;
3386}
3387
3388
3389//
3390// NOTE: this function takes care of starting a transaction and
3391//       acquiring the systemfile lock so that it can call
3392//       cat_update().
3393//
3394// NOTE: do NOT hold and cnode locks while calling this function
3395//       to avoid deadlocks (because we take a lock on the root
3396//       cnode)
3397//
3398int
3399hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
3400{
3401	struct vnode *rvp;
3402	struct cnode *cp;
3403	int error;
3404
3405	error = VFS_ROOT(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
3406	if (error) {
3407		return error;
3408	}
3409
3410	cp = VTOC(rvp);
3411	if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
3412		return error;
3413	}
3414	struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
3415
3416	int lockflags;
3417	if (hfs_start_transaction(hfsmp) != 0) {
3418		return error;
3419	}
3420	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3421
3422	if (extinfo->document_id == 0) {
3423		// initialize this to start at 3 (one greater than the root-dir id)
3424		extinfo->document_id = 3;
3425	}
3426
3427	*docid = extinfo->document_id++;
3428
3429	// mark the root cnode dirty
3430	cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
3431	(void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
3432
3433	hfs_systemfile_unlock (hfsmp, lockflags);
3434	(void) hfs_end_transaction(hfsmp);
3435
3436	(void) hfs_unlock(cp);
3437
3438	vnode_put(rvp);
3439	rvp = NULL;
3440
3441	return 0;
3442}
3443