1/*
2 * Copyright (c) 2002-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/proc.h>
31#include <sys/vnode.h>
32#include <sys/mount.h>
33#include <sys/kernel.h>
34#include <sys/malloc.h>
35#include <sys/time.h>
36#include <sys/ubc.h>
37#include <sys/quota.h>
38#include <sys/kdebug.h>
39#include <libkern/OSByteOrder.h>
40#include <sys/buf_internal.h>
41
42#include <kern/locks.h>
43
44#include <miscfs/specfs/specdev.h>
45#include <miscfs/fifofs/fifo.h>
46
47#include <hfs/hfs.h>
48#include <hfs/hfs_catalog.h>
49#include <hfs/hfs_cnode.h>
50#include <hfs/hfs_quota.h>
51#include <hfs/hfs_format.h>
52
53extern int prtactive;
54
55extern lck_attr_t *  hfs_lock_attr;
56extern lck_grp_t *  hfs_mutex_group;
57extern lck_grp_t *  hfs_rwlock_group;
58
59static void  hfs_reclaim_cnode(struct cnode *);
60static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim);
61static int hfs_isordered(struct cnode *, struct cnode *);
62
63extern int hfs_removefile_callback(struct buf *bp, void *hfsmp);
64
65__inline__ int hfs_checkdeleted (struct cnode *cp) {
66	return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);
67}
68
69/*
70 * Function used by a special fcntl() that decorates a cnode/vnode that
71 * indicates it is backing another filesystem, like a disk image.
72 *
73 * the argument 'val' indicates whether or not to set the bit in the cnode flags
74 *
75 * Returns non-zero on failure. 0 on success
76 */
77int hfs_set_backingstore (struct vnode *vp, int val) {
78	struct cnode *cp = NULL;
79	int err = 0;
80
81	cp = VTOC(vp);
82	if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
83		return EINVAL;
84	}
85
86	/* lock the cnode */
87	err = hfs_lock (cp, HFS_EXCLUSIVE_LOCK);
88	if (err) {
89		return err;
90	}
91
92	if (val) {
93		cp->c_flag |= C_BACKINGSTORE;
94	}
95	else {
96		cp->c_flag &= ~C_BACKINGSTORE;
97	}
98
99	/* unlock everything */
100	hfs_unlock (cp);
101
102	return err;
103}
104
105/*
106 * Function used by a special fcntl() that check to see if a cnode/vnode
107 * indicates it is backing another filesystem, like a disk image.
108 *
109 * the argument 'val' is an output argument for whether or not the bit is set
110 *
111 * Returns non-zero on failure. 0 on success
112 */
113
114int hfs_is_backingstore (struct vnode *vp, int *val) {
115	struct cnode *cp = NULL;
116	int err = 0;
117
118	if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
119		*val = 0;
120		return 0;
121	}
122
123	cp = VTOC(vp);
124
125	/* lock the cnode */
126	err = hfs_lock (cp, HFS_SHARED_LOCK);
127	if (err) {
128		return err;
129	}
130
131	if (cp->c_flag & C_BACKINGSTORE) {
132		*val = 1;
133	}
134	else {
135		*val = 0;
136	}
137
138	/* unlock everything */
139	hfs_unlock (cp);
140
141	return err;
142}
143
144
145/*
146 * hfs_cnode_teardown
147 *
148 * This is an internal function that is invoked from both hfs_vnop_inactive
149 * and hfs_vnop_reclaim.  As VNOP_INACTIVE is not necessarily called from vnodes
150 * being recycled and reclaimed, it is important that we do any post-processing
151 * necessary for the cnode in both places.  Important tasks include things such as
152 * releasing the blocks from an open-unlinked file when all references to it have dropped,
153 * and handling resource forks separately from data forks.
154 *
155 * Note that we take only the vnode as an argument here (rather than the cnode).
156 * Recall that each cnode supports two forks (rsrc/data), and we can always get the right
157 * cnode from either of the vnodes, but the reverse is not true -- we can't determine which
158 * vnode we need to reclaim if only the cnode is supplied.
159 *
160 * This function is idempotent and safe to call from both hfs_vnop_inactive and hfs_vnop_reclaim
161 * if both are invoked right after the other.  In the second call, most of this function's if()
162 * conditions will fail, since they apply generally to cnodes still marked with C_DELETED.
163 * As a quick check to see if this function is necessary, determine if the cnode is already
164 * marked C_NOEXISTS.  If it is, then it is safe to skip this function.  The only tasks that
165 * remain for cnodes marked in such a fashion is to teardown their fork references and
166 * release all directory hints and hardlink origins.  However, both of those are done
167 * in hfs_vnop_reclaim.  hfs_update, by definition, is not necessary if the cnode's catalog
168 * entry is no longer there.
169 *
170 * 'reclaim' argument specifies whether or not we were called from hfs_vnop_reclaim.  If we are
171 * invoked from hfs_vnop_reclaim, we can not call functions that cluster_push since the UBC info
172 * is totally gone by that point.
173 *
174 * Assumes that both truncate and cnode locks for 'cp' are held.
175 */
176static
177int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
178
179	int forkcount = 0;
180	enum vtype v_type;
181	struct cnode *cp;
182	int error = 0;
183	int started_tr = 0;
184	struct hfsmount *hfsmp = VTOHFS(vp);
185	struct proc *p = vfs_context_proc(ctx);
186	int truncated = 0;
187    cat_cookie_t cookie;
188    int cat_reserve = 0;
189    int lockflags;
190	int ea_error = 0;
191
192	v_type = vnode_vtype(vp);
193	cp = VTOC(vp);
194
195	if (cp->c_datafork) {
196		++forkcount;
197	}
198	if (cp->c_rsrcfork) {
199		++forkcount;
200	}
201
202
203	/*
204	 * Skip the call to ubc_setsize if we're being invoked on behalf of reclaim.
205	 * The dirty regions would have already been synced to disk, so informing UBC
206	 * that they can toss the pages doesn't help anyone at this point.
207	 *
208	 * Note that this is a performance problem if the vnode goes straight to reclaim
209	 * (and skips inactive), since there would be no way for anyone to notify the UBC
210	 * that all pages in this file are basically useless.
211	 */
212	if (reclaim == 0) {
213		/*
214		 * Check whether we are tearing down a cnode with only one remaining fork.
215		 * If there are blocks in its filefork, then we need to unlock the cnode
216		 * before calling ubc_setsize.  The cluster layer may re-enter the filesystem
217		 * (i.e. VNOP_BLOCKMAP), and if we retain the cnode lock, we could double-lock
218		 * panic.
219		 */
220
221		if ((v_type == VREG || v_type == VLNK) &&
222			(cp->c_flag & C_DELETED) &&
223			(VTOF(vp)->ff_blocks != 0) && (forkcount == 1)) {
224			hfs_unlock(cp);
225			/* ubc_setsize just fails if we were to call this from VNOP_RECLAIM */
226			ubc_setsize(vp, 0);
227			(void) hfs_lock(cp, HFS_FORCE_LOCK);
228		}
229	}
230
231	/*
232	 * Push file data out for normal files that haven't been evicted from
233	 * the namespace.  We only do this if this function was not called from reclaim,
234	 * because by that point the UBC information has been totally torn down.
235	 *
236	 * There should also be no way that a normal file that has NOT been deleted from
237	 * the namespace to skip INACTIVE and go straight to RECLAIM.  That race only happens
238	 * when the file becomes open-unlinked.
239	 */
240	if ((v_type == VREG) &&
241		(!ISSET(cp->c_flag, C_DELETED)) &&
242		(!ISSET(cp->c_flag, C_NOEXISTS)) &&
243		(VTOF(vp)->ff_blocks) &&
244		(reclaim == 0)) {
245		/*
246		 * Note that if content protection is enabled, then this is where we will
247		 * attempt to issue IOs for all dirty regions of this file.
248		 *
249		 * If we're called from hfs_vnop_inactive, all this means is at the time
250		 * the logic for deciding to call this function, there were not any lingering
251		 * mmap/fd references for this file.  However, there is nothing preventing the system
252		 * from creating a new reference in between the time that logic was checked
253		 * and we entered hfs_vnop_inactive.  As a result, the only time we can guarantee
254		 * that there aren't any references is during vnop_reclaim.
255		 */
256		hfs_filedone(vp, ctx);
257	}
258
259 	/*
260	 * We're holding the cnode lock now.  Stall behind any shadow BPs that may
261	 * be involved with this vnode if it is a symlink.  We don't want to allow
262	 * the blocks that we're about to release to be put back into the pool if there
263	 * is pending I/O to them.
264	 */
265	if (v_type == VLNK) {
266		/*
267		 * This will block if the asynchronous journal flush is in progress.
268		 * If this symlink is not being renamed over and doesn't have any open FDs,
269		 * then we'll remove it from the journal's bufs below in kill_block.
270		 */
271		buf_wait_for_shadow_io (vp, 0);
272	}
273
274	/*
275	 * Remove any directory hints or cached origins
276	 */
277	if (v_type == VDIR) {
278		hfs_reldirhints(cp, 0);
279	}
280	if (cp->c_flag & C_HARDLINK) {
281		hfs_relorigins(cp);
282	}
283
284	/*
285	 * This check is slightly complicated.  We should only truncate data
286	 * in very specific cases for open-unlinked files.  This is because
287	 * we want to ensure that the resource fork continues to be available
288	 * if the caller has the data fork open.  However, this is not symmetric;
289	 * someone who has the resource fork open need not be able to access the data
290	 * fork once the data fork has gone inactive.
291	 *
292	 * If we're the last fork, then we have cleaning up to do.
293	 *
294	 * A) last fork, and vp == c_vp
295	 *	Truncate away own fork data. If rsrc fork is not in core, truncate it too.
296	 *
297	 * B) last fork, and vp == c_rsrc_vp
298	 *	Truncate ourselves, assume data fork has been cleaned due to C).
299	 *
300	 * If we're not the last fork, then things are a little different:
301	 *
302	 * C) not the last fork, vp == c_vp
303	 *	Truncate ourselves.  Once the file has gone out of the namespace,
304	 *	it cannot be further opened.  Further access to the rsrc fork may
305	 *	continue, however.
306	 *
307	 * D) not the last fork, vp == c_rsrc_vp
308	 *	Don't enter the block below, just clean up vnode and push it out of core.
309	 */
310
311	if ((v_type == VREG || v_type == VLNK) &&
312		(cp->c_flag & C_DELETED) &&
313		((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
314
315		/* Start a transaction here.  We're about to change file sizes */
316		if (started_tr == 0) {
317			if (hfs_start_transaction(hfsmp) != 0) {
318				error = EINVAL;
319				goto out;
320			}
321			else {
322				started_tr = 1;
323			}
324		}
325
326		/* Truncate away our own fork data. (Case A, B, C above) */
327		if (VTOF(vp)->ff_blocks != 0) {
328
329 			/*
330			 * At this point, we have decided that this cnode is
331			 * suitable for full removal.  We are about to deallocate
332			 * its blocks and remove its entry from the catalog.
333			 * If it was a symlink, then it's possible that the operation
334			 * which created it is still in the current transaction group
335			 * due to coalescing.  Take action here to kill the data blocks
336			 * of the symlink out of the journal before moving to
337			 * deallocate the blocks.  We need to be in the middle of
338			 * a transaction before calling buf_iterate like this.
339			 *
340			 * Note: we have to kill any potential symlink buffers out of
341			 * the journal prior to deallocating their blocks.  This is so
342			 * that we don't race with another thread that may be doing an
343			 * an allocation concurrently and pick up these blocks. It could
344			 * generate I/O against them which could go out ahead of our journal
345			 * transaction.
346			 */
347
348			if (hfsmp->jnl && vnode_islnk(vp)) {
349				buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
350			}
351
352			/*
353			 * Since we're already inside a transaction,
354			 * tell hfs_truncate to skip the ubc_setsize.
355			 *
356			 * This truncate call (and the one below) is fine from VNOP_RECLAIM's
357			 * context because we're only removing blocks, not zero-filling new
358			 * ones.  The C_DELETED check above makes things much simpler.
359			 */
360			error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, 0, ctx);
361			if (error) {
362				goto out;
363			}
364			truncated = 1;
365		}
366
367		/*
368		 * Truncate away the resource fork, if we represent the data fork and
369		 * it is the last fork.  That means, by definition, the rsrc fork is not in
370		 * core.  To avoid bringing a vnode into core for the sole purpose of deleting the
371		 * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
372		 * to get rid of the resource fork's data.
373		 *
374		 * This is invoked via case A above only.
375		 */
376		if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
377			struct cat_lookup_buffer *lookup_rsrc = NULL;
378			struct cat_desc *desc_ptr = NULL;
379			lockflags = 0;
380
381			MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK);
382			if (lookup_rsrc == NULL) {
383				printf("hfs_cnode_teardown: ENOMEM from MALLOC\n");
384				error = ENOMEM;
385				goto out;
386			}
387			else {
388				bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer));
389			}
390
391			if (cp->c_desc.cd_namelen == 0) {
392				/* Initialize the rsrc descriptor for lookup if necessary*/
393				MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid);
394
395				lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name;
396				lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name);
397				lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
398				lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid;
399
400				desc_ptr = &lookup_rsrc->lookup_desc;
401			}
402			else {
403				desc_ptr = &cp->c_desc;
404			}
405
406			lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
407
408			error = cat_lookup (hfsmp, desc_ptr, 1, (struct cat_desc *) NULL,
409					(struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL);
410
411			hfs_systemfile_unlock (hfsmp, lockflags);
412
413			if (error) {
414				FREE (lookup_rsrc, M_TEMP);
415				goto out;
416			}
417
418			/*
419			 * Make the filefork in our temporary struct look like a real
420			 * filefork.  Fill in the cp, sysfileinfo and rangelist fields..
421			 */
422			rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges);
423			lookup_rsrc->lookup_fork.ff_cp = cp;
424
425			/*
426			 * If there were no errors, then we have the catalog's fork information
427			 * for the resource fork in question.  Go ahead and delete the data in it now.
428			 */
429
430			error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid);
431			FREE(lookup_rsrc, M_TEMP);
432
433			if (error) {
434				goto out;
435			}
436
437			/*
438			 * This fileid's resource fork extents have now been fully deleted on-disk
439			 * and this CNID is no longer valid. At this point, we should be able to
440			 * zero out cp->c_blocks to indicate there is no data left in this file.
441			 */
442			cp->c_blocks = 0;
443		}
444
445		/* End the transaction from the start of the file truncation segment */
446		if (started_tr) {
447			hfs_end_transaction(hfsmp);
448			started_tr = 0;
449		}
450	}
451
452	/*
453	 * If we represent the last fork (or none in the case of a dir),
454	 * and the cnode has become open-unlinked,
455	 * AND it has EA's, then we need to get rid of them.
456	 *
457	 * Note that this must happen outside of any other transactions
458	 * because it starts/ends its own transactions and grabs its
459	 * own locks.  This is to prevent a file with a lot of attributes
460	 * from creating a transaction that is too large (which panics).
461	 */
462    if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 &&
463		(cp->c_flag & C_DELETED) &&
464		(forkcount <= 1)) {
465
466        ea_error = hfs_removeallattr(hfsmp, cp->c_fileid);
467    }
468
469
470	/*
471	 * If the cnode represented an open-unlinked file, then now
472	 * actually remove the cnode's catalog entry and release all blocks
473	 * it may have been using.
474	 */
475    if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) {
476        /*
477         * Mark cnode in transit so that no one can get this
478         * cnode from cnode hash.
479         */
480		// hfs_chash_mark_in_transit(hfsmp, cp);
481		// XXXdbg - remove the cnode from the hash table since it's deleted
482		//          otherwise someone could go to sleep on the cnode and not
483		//          be woken up until this vnode gets recycled which could be
484		//          a very long time...
485        hfs_chashremove(hfsmp, cp);
486
487        cp->c_flag |= C_NOEXISTS;   // XXXdbg
488        cp->c_rdev = 0;
489
490        if (started_tr == 0) {
491            if (hfs_start_transaction(hfsmp) != 0) {
492				error = EINVAL;
493				goto out;
494            }
495            started_tr = 1;
496        }
497
498        /*
499         * Reserve some space in the Catalog file.
500         */
501        if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
502            goto out;
503        }
504        cat_reserve = 1;
505
506        lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
507
508        if (cp->c_blocks > 0) {
509            printf("hfs_inactive: deleting non-empty%sfile %d, "
510                   "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
511                   (int)cp->c_fileid, (int)cp->c_blocks);
512        }
513
514		//
515        // release the name pointer in the descriptor so that
516        // cat_delete() will use the file-id to do the deletion.
517        // in the case of hard links this is imperative (in the
518        // case of regular files the fileid and cnid are the
519        // same so it doesn't matter).
520        //
521        cat_releasedesc(&cp->c_desc);
522
523        /*
524         * The descriptor name may be zero,
525         * in which case the fileid is used.
526         */
527        error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
528
529        if (error && truncated && (error != ENXIO))
530            printf("hfs_inactive: couldn't delete a truncated file!");
531
532        /* Update HFS Private Data dir */
533        if (error == 0) {
534            hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
535            if (vnode_isdir(vp)) {
536                DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
537            }
538            (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
539							 &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
540        }
541
542        hfs_systemfile_unlock(hfsmp, lockflags);
543
544        if (error) {
545			goto out;
546		}
547
548#if QUOTA
549        if (hfsmp->hfs_flags & HFS_QUOTAS)
550            (void)hfs_chkiq(cp, -1, NOCRED, 0);
551#endif /* QUOTA */
552
553        /* Already set C_NOEXISTS at the beginning of this block */
554        cp->c_flag &= ~C_DELETED;
555        cp->c_touch_chgtime = TRUE;
556        cp->c_touch_modtime = TRUE;
557
558        if (error == 0)
559            hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
560    }
561
562	/*
563     * A file may have had delayed allocations, in which case hfs_update
564     * would not have updated the catalog record (cat_update).  We need
565     * to do that now, before we lose our fork data.  We also need to
566     * force the update, or hfs_update will again skip the cat_update.
567	 *
568	 * If the file has C_NOEXISTS set, then we can skip the hfs_update call
569	 * because the catalog entry has already been removed.  There would be no point
570     * to looking up the entry in the catalog to modify it when we already know it's gone
571	 */
572    if ((!ISSET(cp->c_flag, C_NOEXISTS)) &&
573		((cp->c_flag & C_MODIFIED) || cp->c_touch_acctime ||
574		 cp->c_touch_chgtime || cp->c_touch_modtime)) {
575
576			if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){
577				cp->c_flag |= C_FORCEUPDATE;
578			}
579			hfs_update(vp, 0);
580		}
581
582out:
583    if (cat_reserve)
584        cat_postflight(hfsmp, &cookie, p);
585
586    // XXXdbg - have to do this because a goto could have come here
587    if (started_tr) {
588        hfs_end_transaction(hfsmp);
589        started_tr = 0;
590    }
591
592#if 0
593#if CONFIG_PROTECT
594	/*
595	 * cnode truncate lock and cnode lock are both held exclusive here.
596	 *
597	 * Go ahead and flush the keys out if this cnode is the last fork
598	 * and it is not class F.  Class F keys should not be purged because they only
599	 * exist in memory and have no persistent keys.  Only do this
600	 * if we haven't already done it yet (maybe a vnode skipped inactive
601	 * and went straight to reclaim).  This function gets called from both reclaim and
602	 * inactive, so it will happen first in inactive if possible.
603	 *
604	 * We need to be mindful that all pending IO for this file has already been
605	 * issued and completed before we bzero out the key.  This is because
606	 * if it isn't, tossing the key here could result in garbage IO being
607	 * written (by using the bzero'd key) if the writes are happening asynchronously.
608	 *
609	 * In addition, class A files may have already been purged due to the
610	 * lock event occurring.
611	 */
612	if (forkcount == 1) {
613		struct cprotect *entry = cp->c_cpentry;
614		if ((entry) && (entry->cp_pclass != PROTECTION_CLASS_F)) {
615			if ((cp->c_cpentry->cp_flags & CP_KEY_FLUSHED) == 0) {
616				cp->c_cpentry->cp_flags |= CP_KEY_FLUSHED;
617				bzero (cp->c_cpentry->cp_cache_key, cp->c_cpentry->cp_cache_key_len);
618				bzero (cp->c_cpentry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx));
619			}
620		}
621	}
622#endif
623#endif
624
625	return error;
626}
627
628
629/*
630 * hfs_vnop_inactive
631 *
632 * The last usecount on the vnode has gone away, so we need to tear down
633 * any remaining data still residing in the cnode.  If necessary, write out
634 * remaining blocks or delete the cnode's entry in the catalog.
635 */
636int
637hfs_vnop_inactive(struct vnop_inactive_args *ap)
638{
639	struct vnode *vp = ap->a_vp;
640	struct cnode *cp;
641	struct hfsmount *hfsmp = VTOHFS(vp);
642	struct proc *p = vfs_context_proc(ap->a_context);
643	int error = 0;
644	int took_trunc_lock = 0;
645	enum vtype v_type;
646
647	v_type = vnode_vtype(vp);
648	cp = VTOC(vp);
649
650	if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) ||
651	    (hfsmp->hfs_freezing_proc == p)) {
652		error = 0;
653		goto inactive_done;
654	}
655
656	/*
657	 * For safety, do NOT call vnode_recycle from inside this function.  This can cause
658	 * problems in the following scenario:
659	 *
660	 * vnode_create -> vnode_reclaim_internal -> vclean -> VNOP_INACTIVE
661	 *
662	 * If we're being invoked as a result of a reclaim that was already in-flight, then we
663	 * cannot call vnode_recycle again.  Being in reclaim means that there are no usecounts or
664	 * iocounts by definition.  As a result, if we were to call vnode_recycle, it would immediately
665	 * try to re-enter reclaim again and panic.
666	 *
667	 * Currently, there are three things that can cause us (VNOP_INACTIVE) to get called.
668	 * 1) last usecount goes away on the vnode (vnode_rele)
669	 * 2) last iocount goes away on a vnode that previously had usecounts but didn't have
670	 * 		vnode_recycle called (vnode_put)
671	 * 3) vclean by way of reclaim
672	 *
673	 * In this function we would generally want to call vnode_recycle to speed things
674	 * along to ensure that we don't leak blocks due to open-unlinked files.  However, by
675	 * virtue of being in this function already, we can call hfs_cnode_teardown, which
676	 * will release blocks held by open-unlinked files, and mark them C_NOEXISTS so that
677	 * there's no entry in the catalog and no backing store anymore.  If that's the case,
678	 * then we really don't care all that much when the vnode actually goes through reclaim.
679	 * Further, the HFS VNOPs that manipulated the namespace in order to create the open-
680	 * unlinked file in the first place should have already called vnode_recycle on the vnode
681	 * to guarantee that it would go through reclaim in a speedy way.
682	 */
683
684	if (cp->c_flag & C_NOEXISTS) {
685		/*
686		 * If the cnode has already had its cat entry removed, then
687		 * just skip to the end. We don't need to do anything here.
688		 */
689		error = 0;
690		goto inactive_done;
691	}
692
693	if ((v_type == VREG || v_type == VLNK)) {
694		hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
695		took_trunc_lock = 1;
696	}
697
698	(void) hfs_lock(cp, HFS_FORCE_LOCK);
699
700	/*
701	 * Call cnode_teardown to push out dirty blocks to disk, release open-unlinked
702	 * files' blocks from being in use, and move the cnode from C_DELETED to C_NOEXISTS.
703	 */
704	error = hfs_cnode_teardown (vp, ap->a_context, 0);
705
706    /*
707     * Drop the truncate lock before unlocking the cnode
708     * (which can potentially perform a vnode_put and
709     * recycle the vnode which in turn might require the
710     * truncate lock)
711     */
712	if (took_trunc_lock) {
713	    hfs_unlock_truncate(cp, 0);
714	}
715
716	hfs_unlock(cp);
717
718inactive_done:
719
720	return error;
721}
722
723
724/*
725 * File clean-up (zero fill and shrink peof).
726 */
727
728int
729hfs_filedone(struct vnode *vp, vfs_context_t context)
730{
731	struct cnode *cp;
732	struct filefork *fp;
733	struct hfsmount *hfsmp;
734	struct rl_entry *invalid_range;
735	off_t leof;
736	u_int32_t blks, blocksize;
737	/* flags for zero-filling sparse ranges */
738	int cluster_flags = IO_CLOSE;
739	int cluster_zero_flags = IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE;
740
741	cp = VTOC(vp);
742	fp = VTOF(vp);
743	hfsmp = VTOHFS(vp);
744	leof = fp->ff_size;
745
746	if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0))
747		return (0);
748
749#if CONFIG_PROTECT
750	/*
751	 * Figure out if we need to do synchronous IO.
752	 *
753	 * If the file represents a content-protected file, we may need
754	 * to issue synchronous IO when we dispatch to the cluster layer.
755	 * If we didn't, then the IO would go out to the disk asynchronously.
756	 * If the vnode hits the end of inactive before getting reclaimed, the
757	 * content protection keys would be wiped/bzeroed out, and we'd end up
758	 * trying to issue the IO with an invalid key.  This will lead to file
759	 * corruption.  IO_SYNC will force the cluster_push to wait until all IOs
760	 * have completed (though they may be in the track cache).
761	 */
762	if (cp_fs_protected(VTOVFS(vp))) {
763		cluster_flags |= IO_SYNC;
764		cluster_zero_flags |= IO_SYNC;
765	}
766#endif
767
768	/*
769	 * If we are being invoked from F_SWAPDATAEXTENTS, then we
770	 * need to issue synchronous IO; Unless we are sure that all
771	 * of the data has been written to the disk, we won't know
772	 * that all of the blocks have been allocated properly.
773	 */
774	if (cp->c_flag & C_SWAPINPROGRESS) {
775		cluster_flags |= IO_SYNC;
776	}
777
778	hfs_unlock(cp);
779	(void) cluster_push(vp, cluster_flags);
780	hfs_lock(cp, HFS_FORCE_LOCK);
781
782	/*
783	 * Explicitly zero out the areas of file
784	 * that are currently marked invalid.
785	 */
786	while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) {
787		off_t start = invalid_range->rl_start;
788		off_t end = invalid_range->rl_end;
789
790		/* The range about to be written must be validated
791		 * first, so that VNOP_BLOCKMAP() will return the
792		 * appropriate mapping for the cluster code:
793		 */
794		rl_remove(start, end, &fp->ff_invalidranges);
795
796		hfs_unlock(cp);
797		(void) cluster_write(vp, (struct uio *) 0,
798				     leof, end + 1, start, (off_t)0, cluster_zero_flags);
799		hfs_lock(cp, HFS_FORCE_LOCK);
800		cp->c_flag |= C_MODIFIED;
801	}
802	cp->c_flag &= ~C_ZFWANTSYNC;
803	cp->c_zftimeout = 0;
804	blocksize = VTOVCB(vp)->blockSize;
805	blks = leof / blocksize;
806	if (((off_t)blks * (off_t)blocksize) != leof)
807		blks++;
808	/*
809	 * Shrink the peof to the smallest size neccessary to contain the leof.
810	 */
811	if (blks < fp->ff_blocks) {
812		(void) hfs_truncate(vp, leof, IO_NDELAY, 0, 0, context);
813	}
814
815	hfs_unlock(cp);
816	(void) cluster_push(vp, cluster_flags);
817	hfs_lock(cp, HFS_FORCE_LOCK);
818
819	/*
820	 * If the hfs_truncate didn't happen to flush the vnode's
821	 * information out to disk, force it to be updated now that
822	 * all invalid ranges have been zero-filled and validated:
823	 */
824	if (cp->c_flag & C_MODIFIED) {
825		hfs_update(vp, 0);
826	}
827	return (0);
828}
829
830
831/*
832 * Reclaim a cnode so that it can be used for other purposes.
833 */
834int
835hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
836{
837	struct vnode *vp = ap->a_vp;
838	struct cnode *cp;
839	struct filefork *fp = NULL;
840	struct filefork *altfp = NULL;
841	struct hfsmount *hfsmp = VTOHFS(vp);
842	vfs_context_t ctx = ap->a_context;
843	int reclaim_cnode = 0;
844	int err = 0;
845	enum vtype v_type;
846
847	v_type = vnode_vtype(vp);
848	cp = VTOC(vp);
849
850	/*
851	 * We don't take the truncate lock since by the time reclaim comes along,
852	 * all dirty pages have been synced and nobody should be competing
853	 * with us for this thread.
854	 */
855	(void) hfs_lock (cp, HFS_FORCE_LOCK);
856
857	/*
858	 * Sync to disk any remaining data in the cnode/vnode.  This includes
859	 * a call to hfs_update if the cnode has outbound data.
860	 *
861	 * If C_NOEXISTS is set on the cnode, then there's nothing teardown needs to do
862	 * because the catalog entry for this cnode is already gone.
863	 */
864	if (!ISSET(cp->c_flag, C_NOEXISTS)) {
865		err = hfs_cnode_teardown(vp, ctx, 1);
866	}
867
868	/*
869	 * Keep track of an inactive hot file.
870	 */
871	if (!vnode_isdir(vp) &&
872	    !vnode_issystem(vp) &&
873	    !(cp->c_flag & (C_DELETED | C_NOEXISTS)) ) {
874  		(void) hfs_addhotfile(vp);
875	}
876	vnode_removefsref(vp);
877
878	/*
879	 * Find file fork for this vnode (if any)
880	 * Also check if another fork is active
881	 */
882	if (cp->c_vp == vp) {
883	        fp = cp->c_datafork;
884		altfp = cp->c_rsrcfork;
885
886		cp->c_datafork = NULL;
887		cp->c_vp = NULL;
888	} else if (cp->c_rsrc_vp == vp) {
889	        fp = cp->c_rsrcfork;
890		altfp = cp->c_datafork;
891
892		cp->c_rsrcfork = NULL;
893		cp->c_rsrc_vp = NULL;
894	} else {
895	        panic("hfs_vnop_reclaim: vp points to wrong cnode (vp=%p cp->c_vp=%p cp->c_rsrc_vp=%p)\n", vp, cp->c_vp, cp->c_rsrc_vp);
896	}
897	/*
898	 * On the last fork, remove the cnode from its hash chain.
899	 */
900	if (altfp == NULL) {
901		/* If we can't remove it then the cnode must persist! */
902		if (hfs_chashremove(hfsmp, cp) == 0)
903			reclaim_cnode = 1;
904		/*
905		 * Remove any directory hints
906		 */
907		if (vnode_isdir(vp)) {
908			hfs_reldirhints(cp, 0);
909		}
910
911		if(cp->c_flag & C_HARDLINK) {
912			hfs_relorigins(cp);
913		}
914	}
915	/* Release the file fork and related data */
916	if (fp) {
917		/* Dump cached symlink data */
918		if (vnode_islnk(vp) && (fp->ff_symlinkptr != NULL)) {
919			FREE(fp->ff_symlinkptr, M_TEMP);
920		}
921		FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
922	}
923
924	/*
925	 * If there was only one active fork then we can release the cnode.
926	 */
927	if (reclaim_cnode) {
928		hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT);
929		hfs_reclaim_cnode(cp);
930	}
931	else  {
932		/*
933		 * cnode in use.  If it is a directory, it could have
934		 * no live forks. Just release the lock.
935		 */
936		hfs_unlock(cp);
937	}
938
939	vnode_clearfsnode(vp);
940	return (0);
941}
942
943
944extern int (**hfs_vnodeop_p) (void *);
945extern int (**hfs_std_vnodeop_p) (void *);
946extern int (**hfs_specop_p)  (void *);
947#if FIFO
948extern int (**hfs_fifoop_p)  (void *);
949#endif
950
951/*
952 * hfs_getnewvnode - get new default vnode
953 *
954 * The vnode is returned with an iocount and the cnode locked
955 */
956int
957hfs_getnewvnode(
958	struct hfsmount *hfsmp,
959	struct vnode *dvp,
960	struct componentname *cnp,
961	struct cat_desc *descp,
962	int flags,
963	struct cat_attr *attrp,
964	struct cat_fork *forkp,
965	struct vnode **vpp,
966	int *out_flags)
967{
968	struct mount *mp = HFSTOVFS(hfsmp);
969	struct vnode *vp = NULL;
970	struct vnode **cvpp;
971	struct vnode *tvp = NULLVP;
972	struct cnode *cp = NULL;
973	struct filefork *fp = NULL;
974	int hfs_standard = 0;
975	int retval;
976	int issystemfile;
977	int wantrsrc;
978	int hflags = 0;
979	struct vnode_fsparam vfsp;
980	enum vtype vtype;
981#if QUOTA
982	int i;
983#endif /* QUOTA */
984
985	hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD);
986
987	if (attrp->ca_fileid == 0) {
988		*vpp = NULL;
989		return (ENOENT);
990	}
991
992#if !FIFO
993	if (IFTOVT(attrp->ca_mode) == VFIFO) {
994		*vpp = NULL;
995		return (ENOTSUP);
996	}
997#endif /* !FIFO */
998	vtype = IFTOVT(attrp->ca_mode);
999	issystemfile = (descp->cd_flags & CD_ISMETA) && (vtype == VREG);
1000	wantrsrc = flags & GNV_WANTRSRC;
1001
1002	/* Sanity check the vtype and mode */
1003	if (vtype == VBAD) {
1004		/* Mark the FS as corrupt and bail out */
1005		hfs_mark_volume_inconsistent(hfsmp);
1006		return (EINVAL);
1007	}
1008
1009	/* Zero out the out_flags */
1010	*out_flags = 0;
1011
1012#ifdef HFS_CHECK_LOCK_ORDER
1013	/*
1014	 * The only case were its permissible to hold the parent cnode
1015	 * lock is during a create operation (hfs_makenode) or when
1016	 * we don't need the cnode lock (GNV_SKIPLOCK).
1017	 */
1018	if ((dvp != NULL) &&
1019	    (flags & (GNV_CREATE | GNV_SKIPLOCK)) == 0 &&
1020	    VTOC(dvp)->c_lockowner == current_thread()) {
1021		panic("hfs_getnewvnode: unexpected hold of parent cnode %p", VTOC(dvp));
1022	}
1023#endif /* HFS_CHECK_LOCK_ORDER */
1024
1025	/*
1026	 * Get a cnode (new or existing)
1027	 */
1028	cp = hfs_chash_getcnode(hfsmp, attrp->ca_fileid, vpp, wantrsrc,
1029							(flags & GNV_SKIPLOCK), out_flags, &hflags);
1030
1031	/*
1032	 * If the id is no longer valid for lookups we'll get back a NULL cp.
1033	 */
1034	if (cp == NULL) {
1035		return (ENOENT);
1036	}
1037
1038	/*
1039	 * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the
1040	 * descriptor in the cnode as needed if the cnode represents a hardlink.
1041	 * We want the caller to get the most up-to-date copy of the descriptor
1042	 * as possible. However, we only do anything here if there was a valid vnode.
1043	 * If there isn't a vnode, then the cnode is brand new and needs to be initialized
1044	 * as it doesn't have a descriptor or cat_attr yet.
1045	 *
1046	 * If we are about to replace the descriptor with the user-supplied one, then validate
1047	 * that the descriptor correctly acknowledges this item is a hardlink.  We could be
1048	 * subject to a race where the calling thread invoked cat_lookup, got a valid lookup
1049	 * result but the file was not yet a hardlink. With sufficient delay between there
1050	 * and here, we might accidentally copy in the raw inode ID into the descriptor in the
1051	 * call below.  If the descriptor's CNID is the same as the fileID then it must
1052	 * not yet have been a hardlink when the lookup occurred.
1053	 */
1054
1055	if (!(hfs_checkdeleted(cp))) {
1056		if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
1057			/* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */
1058			if ((descp->cd_cnid == cp->c_attr.ca_fileid)  &&
1059					(attrp->ca_linkcount != cp->c_attr.ca_linkcount)){
1060				if ((flags & GNV_SKIPLOCK) == 0) {
1061					/*
1062					 * Then we took the lock. Drop it before calling
1063					 * vnode_put, which may invoke hfs_vnop_inactive and need to take
1064					 * the cnode lock again.
1065					 */
1066					hfs_unlock(cp);
1067				}
1068
1069				/*
1070				 * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to
1071				 * force a re-drive in the lookup routine.
1072				 * Drop the iocount on the vnode obtained from
1073				 * chash_getcnode if needed.
1074				 */
1075				if (*vpp != NULL) {
1076					vnode_put (*vpp);
1077					*vpp = NULL;
1078				}
1079
1080				/*
1081				 * If we raced with VNOP_RECLAIM for this vnode, the hash code could
1082				 * have observed it after the c_vp or c_rsrc_vp fields had been torn down;
1083				 * the hash code peeks at those fields without holding the cnode lock because
1084				 * it needs to be fast.  As a result, we may have set H_ATTACH in the chash
1085				 * call above.  Since we're bailing out, unset whatever flags we just set, and
1086				 * wake up all waiters for this cnode.
1087				 */
1088				if (hflags) {
1089					hfs_chashwakeup(hfsmp, cp, hflags);
1090				}
1091
1092				*out_flags = GNV_CAT_ATTRCHANGED;
1093				return ERECYCLE;
1094			}
1095			else {
1096				/*
1097				 * Otherwise, CNID != fileid. Go ahead and copy in the new descriptor.
1098				 *
1099				 * Replacing the descriptor here is fine because we looked up the item without
1100				 * a vnode in hand before.  If a vnode existed, its identity must be attached to this
1101				 * item.  We are not susceptible to the lookup fastpath issue at this point.
1102				 */
1103				replace_desc(cp, descp);
1104			}
1105		}
1106	}
1107
1108	/* Check if we found a matching vnode */
1109	if (*vpp != NULL) {
1110		return (0);
1111	}
1112
1113	/*
1114	 * If this is a new cnode then initialize it.
1115	 */
1116	if (ISSET(cp->c_hflag, H_ALLOC)) {
1117		lck_rw_init(&cp->c_truncatelock, hfs_rwlock_group, hfs_lock_attr);
1118#if HFS_COMPRESSION
1119		cp->c_decmp = NULL;
1120#endif
1121
1122		/* Make sure its still valid (ie exists on disk). */
1123		if (!(flags & GNV_CREATE)) {
1124			int error = 0;
1125			if (!hfs_valid_cnode (hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid, attrp, &error)) {
1126				hfs_chash_abort(hfsmp, cp);
1127				hfs_reclaim_cnode(cp);
1128				*vpp = NULL;
1129				/*
1130				 * If we hit this case, that means that the entry was there in the catalog when
1131				 * we did a cat_lookup earlier.  Think hfs_lookup.  However, in between the time
1132				 * that we checked the catalog and the time we went to get a vnode/cnode for it,
1133				 * it had been removed from the namespace and the vnode totally reclaimed.  As a result,
1134				 * it's not there in the catalog during the check in hfs_valid_cnode and we bubble out
1135				 * an ENOENT.  To indicate to the caller that they should really double-check the
1136				 * entry (it could have been renamed over and gotten a new fileid), we mark a bit
1137				 * in the output flags.
1138				 */
1139				if (error == ENOENT) {
1140					*out_flags = GNV_CAT_DELETED;
1141					return ENOENT;
1142				}
1143
1144				/*
1145				 * Also, we need to protect the cat_attr acquired during hfs_lookup and passed into
1146				 * this function as an argument because the catalog may have changed w.r.t hardlink
1147				 * link counts and the firstlink field.  If that validation check fails, then let
1148				 * lookup re-drive itself to get valid/consistent data with the same failure condition below.
1149				 */
1150				if (error == ERECYCLE) {
1151					*out_flags = GNV_CAT_ATTRCHANGED;
1152					return (ERECYCLE);
1153				}
1154			}
1155		}
1156		bcopy(attrp, &cp->c_attr, sizeof(struct cat_attr));
1157		bcopy(descp, &cp->c_desc, sizeof(struct cat_desc));
1158
1159		/* The name was inherited so clear descriptor state... */
1160		descp->cd_namelen = 0;
1161		descp->cd_nameptr = NULL;
1162		descp->cd_flags &= ~CD_HASBUF;
1163
1164		/* Tag hardlinks */
1165		if ((vtype == VREG || vtype == VDIR) &&
1166		    ((descp->cd_cnid != attrp->ca_fileid) ||
1167		     (attrp->ca_recflags & kHFSHasLinkChainMask))) {
1168			cp->c_flag |= C_HARDLINK;
1169		}
1170		/*
1171		 * Fix-up dir link counts.
1172		 *
1173		 * Earlier versions of Leopard used ca_linkcount for posix
1174		 * nlink support (effectively the sub-directory count + 2).
1175		 * That is now accomplished using the ca_dircount field with
1176		 * the corresponding kHFSHasFolderCountMask flag.
1177		 *
1178		 * For directories the ca_linkcount is the true link count,
1179		 * tracking the number of actual hardlinks to a directory.
1180		 *
1181		 * We only do this if the mount has HFS_FOLDERCOUNT set;
1182		 * at the moment, we only set that for HFSX volumes.
1183		 */
1184		if ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) &&
1185		    (vtype == VDIR) &&
1186		    !(attrp->ca_recflags & kHFSHasFolderCountMask) &&
1187		    (cp->c_attr.ca_linkcount > 1)) {
1188			if (cp->c_attr.ca_entries == 0)
1189				cp->c_attr.ca_dircount = 0;
1190			else
1191				cp->c_attr.ca_dircount = cp->c_attr.ca_linkcount - 2;
1192
1193			cp->c_attr.ca_linkcount = 1;
1194			cp->c_attr.ca_recflags |= kHFSHasFolderCountMask;
1195			if ( !(hfsmp->hfs_flags & HFS_READ_ONLY) )
1196				cp->c_flag |= C_MODIFIED;
1197		}
1198#if QUOTA
1199		if (hfsmp->hfs_flags & HFS_QUOTAS) {
1200			for (i = 0; i < MAXQUOTAS; i++)
1201				cp->c_dquot[i] = NODQUOT;
1202		}
1203#endif /* QUOTA */
1204		/* Mark the output flag that we're vending a new cnode */
1205		*out_flags |= GNV_NEW_CNODE;
1206	}
1207
1208	if (vtype == VDIR) {
1209	        if (cp->c_vp != NULL)
1210		        panic("hfs_getnewvnode: orphaned vnode (data)");
1211		cvpp = &cp->c_vp;
1212	} else {
1213		if (forkp && attrp->ca_blocks < forkp->cf_blocks)
1214			panic("hfs_getnewvnode: bad ca_blocks (too small)");
1215		/*
1216		 * Allocate and initialize a file fork...
1217		 */
1218		MALLOC_ZONE(fp, struct filefork *, sizeof(struct filefork),
1219			M_HFSFORK, M_WAITOK);
1220		fp->ff_cp = cp;
1221		if (forkp)
1222			bcopy(forkp, &fp->ff_data, sizeof(struct cat_fork));
1223		else
1224			bzero(&fp->ff_data, sizeof(struct cat_fork));
1225		rl_init(&fp->ff_invalidranges);
1226		fp->ff_sysfileinfo = 0;
1227
1228		if (wantrsrc) {
1229			if (cp->c_rsrcfork != NULL)
1230				panic("hfs_getnewvnode: orphaned rsrc fork");
1231			if (cp->c_rsrc_vp != NULL)
1232			        panic("hfs_getnewvnode: orphaned vnode (rsrc)");
1233			cp->c_rsrcfork = fp;
1234			cvpp = &cp->c_rsrc_vp;
1235			if ( (tvp = cp->c_vp) != NULLVP )
1236			        cp->c_flag |= C_NEED_DVNODE_PUT;
1237		} else {
1238			if (cp->c_datafork != NULL)
1239				panic("hfs_getnewvnode: orphaned data fork");
1240			if (cp->c_vp != NULL)
1241			        panic("hfs_getnewvnode: orphaned vnode (data)");
1242			cp->c_datafork = fp;
1243			cvpp = &cp->c_vp;
1244			if ( (tvp = cp->c_rsrc_vp) != NULLVP)
1245			        cp->c_flag |= C_NEED_RVNODE_PUT;
1246		}
1247	}
1248	if (tvp != NULLVP) {
1249	        /*
1250		 * grab an iocount on the vnode we weren't
1251		 * interested in (i.e. we want the resource fork
1252		 * but the cnode already has the data fork)
1253		 * to prevent it from being
1254		 * recycled by us when we call vnode_create
1255		 * which will result in a deadlock when we
1256		 * try to take the cnode lock in hfs_vnop_fsync or
1257		 * hfs_vnop_reclaim... vnode_get can be called here
1258		 * because we already hold the cnode lock which will
1259		 * prevent the vnode from changing identity until
1260		 * we drop it.. vnode_get will not block waiting for
1261		 * a change of state... however, it will return an
1262		 * error if the current iocount == 0 and we've already
1263		 * started to terminate the vnode... we don't need/want to
1264		 * grab an iocount in the case since we can't cause
1265		 * the fileystem to be re-entered on this thread for this vp
1266		 *
1267		 * the matching vnode_put will happen in hfs_unlock
1268		 * after we've dropped the cnode lock
1269		 */
1270	        if ( vnode_get(tvp) != 0)
1271		        cp->c_flag &= ~(C_NEED_RVNODE_PUT | C_NEED_DVNODE_PUT);
1272	}
1273	vfsp.vnfs_mp = mp;
1274	vfsp.vnfs_vtype = vtype;
1275	vfsp.vnfs_str = "hfs";
1276	if ((cp->c_flag & C_HARDLINK) && (vtype == VDIR)) {
1277		vfsp.vnfs_dvp = NULL;  /* no parent for me! */
1278		vfsp.vnfs_cnp = NULL;  /* no name for me! */
1279	} else {
1280		vfsp.vnfs_dvp = dvp;
1281		vfsp.vnfs_cnp = cnp;
1282	}
1283	vfsp.vnfs_fsnode = cp;
1284
1285	/*
1286	 * Special Case HFS Standard VNOPs from HFS+, since
1287	 * HFS standard is readonly/deprecated as of 10.6
1288	 */
1289
1290#if FIFO
1291	if (vtype == VFIFO )
1292		vfsp.vnfs_vops = hfs_fifoop_p;
1293	else
1294#endif
1295	if (vtype == VBLK || vtype == VCHR)
1296		vfsp.vnfs_vops = hfs_specop_p;
1297	else if (hfs_standard)
1298		vfsp.vnfs_vops = hfs_std_vnodeop_p;
1299	else
1300		vfsp.vnfs_vops = hfs_vnodeop_p;
1301
1302	if (vtype == VBLK || vtype == VCHR)
1303		vfsp.vnfs_rdev = attrp->ca_rdev;
1304	else
1305		vfsp.vnfs_rdev = 0;
1306
1307	if (forkp)
1308		vfsp.vnfs_filesize = forkp->cf_size;
1309	else
1310		vfsp.vnfs_filesize = 0;
1311
1312	vfsp.vnfs_flags = VNFS_ADDFSREF;
1313	if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE))
1314		vfsp.vnfs_flags |= VNFS_NOCACHE;
1315
1316	/* Tag system files */
1317	vfsp.vnfs_marksystem = issystemfile;
1318
1319	/* Tag root directory */
1320	if (descp->cd_cnid == kHFSRootFolderID)
1321		vfsp.vnfs_markroot = 1;
1322	else
1323		vfsp.vnfs_markroot = 0;
1324
1325	if ((retval = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, cvpp))) {
1326	        if (fp) {
1327			if (fp == cp->c_datafork)
1328			        cp->c_datafork = NULL;
1329			else
1330			        cp->c_rsrcfork = NULL;
1331
1332		        FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
1333		}
1334		/*
1335		 * If this is a newly created cnode or a vnode reclaim
1336		 * occurred during the attachment, then cleanup the cnode.
1337		 */
1338		if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) {
1339			hfs_chash_abort(hfsmp, cp);
1340			hfs_reclaim_cnode(cp);
1341		}
1342		else {
1343			hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
1344			if ((flags & GNV_SKIPLOCK) == 0){
1345				hfs_unlock(cp);
1346			}
1347		}
1348		*vpp = NULL;
1349		return (retval);
1350	}
1351	vp = *cvpp;
1352	vnode_settag(vp, VT_HFS);
1353	if (cp->c_flag & C_HARDLINK) {
1354		vnode_setmultipath(vp);
1355	}
1356	/*
1357	 * Tag resource fork vnodes as needing an VNOP_INACTIVE
1358	 * so that any deferred removes (open unlinked files)
1359	 * have the chance to process the resource fork.
1360	 */
1361	if (VNODE_IS_RSRC(vp)) {
1362		int err;
1363		KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 37)), cp->c_vp, cp->c_rsrc_vp, 0, 0, 0);
1364
1365		/* Force VL_NEEDINACTIVE on this vnode */
1366		err = vnode_ref(vp);
1367		if (err == 0) {
1368			vnode_rele(vp);
1369		}
1370	}
1371	hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
1372
1373	/*
1374	 * Stop tracking an active hot file.
1375	 */
1376	if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile) {
1377		(void) hfs_removehotfile(vp);
1378	}
1379
1380#if CONFIG_PROTECT
1381	/* Initialize the cp data structures. The key should be in place now. */
1382	if (!issystemfile && (*out_flags & GNV_NEW_CNODE)) {
1383		cp_entry_init(cp, mp);
1384	}
1385#endif
1386
1387	*vpp = vp;
1388	return (0);
1389}
1390
1391
1392static void
1393hfs_reclaim_cnode(struct cnode *cp)
1394{
1395#if QUOTA
1396	int i;
1397
1398	for (i = 0; i < MAXQUOTAS; i++) {
1399		if (cp->c_dquot[i] != NODQUOT) {
1400			dqreclaim(cp->c_dquot[i]);
1401			cp->c_dquot[i] = NODQUOT;
1402		}
1403	}
1404#endif /* QUOTA */
1405
1406	/*
1407	 * If the descriptor has a name then release it
1408	 */
1409	if ((cp->c_desc.cd_flags & CD_HASBUF) && (cp->c_desc.cd_nameptr != 0)) {
1410		const char *nameptr;
1411
1412		nameptr = (const char *) cp->c_desc.cd_nameptr;
1413		cp->c_desc.cd_nameptr = 0;
1414		cp->c_desc.cd_flags &= ~CD_HASBUF;
1415		cp->c_desc.cd_namelen = 0;
1416		vfs_removename(nameptr);
1417	}
1418
1419	/*
1420	 * We only call this function if we are in hfs_vnop_reclaim and
1421	 * attempting to reclaim a cnode with only one live fork.  Because the vnode
1422	 * went through reclaim, any future attempts to use this item will have to
1423	 * go through lookup again, which will need to create a new vnode.  Thus,
1424	 * destroying the locks below (while they were still held during our parent
1425	 * function hfs_vnop_reclaim) is safe.
1426	 */
1427
1428	lck_rw_destroy(&cp->c_rwlock, hfs_rwlock_group);
1429	lck_rw_destroy(&cp->c_truncatelock, hfs_rwlock_group);
1430#if HFS_COMPRESSION
1431	if (cp->c_decmp) {
1432		decmpfs_cnode_destroy(cp->c_decmp);
1433		FREE_ZONE(cp->c_decmp, sizeof(*(cp->c_decmp)), M_DECMPFS_CNODE);
1434	}
1435#endif
1436#if CONFIG_PROTECT
1437	cp_entry_destroy(&cp->c_cpentry);
1438#endif
1439
1440
1441	bzero(cp, sizeof(struct cnode));
1442	FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE);
1443}
1444
1445
1446/*
1447 * hfs_valid_cnode
1448 *
1449 * This function is used to validate data that is stored in-core against what is contained
1450 * in the catalog.  Common uses include validating that the parent-child relationship still exist
1451 * for a specific directory entry (guaranteeing it has not been renamed into a different spot) at
1452 * the point of the check.
1453 */
1454int
1455hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp,
1456		cnid_t cnid, struct cat_attr *cattr, int *error)
1457{
1458	struct cat_attr attr;
1459	struct cat_desc cndesc;
1460	int stillvalid = 0;
1461	int lockflags;
1462
1463	/* System files are always valid */
1464	if (cnid < kHFSFirstUserCatalogNodeID) {
1465		*error = 0;
1466		return (1);
1467	}
1468
1469	/* XXX optimization:  check write count in dvp */
1470
1471	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1472
1473	if (dvp && cnp) {
1474		int lookup = 0;
1475		struct cat_fork fork;
1476		bzero(&cndesc, sizeof(cndesc));
1477		cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
1478		cndesc.cd_namelen = cnp->cn_namelen;
1479		cndesc.cd_parentcnid = VTOC(dvp)->c_fileid;
1480		cndesc.cd_hint = VTOC(dvp)->c_childhint;
1481
1482		/*
1483		 * We have to be careful when calling cat_lookup.  The result argument
1484		 * 'attr' may get different results based on whether or not you ask
1485		 * for the filefork to be supplied as output.  This is because cat_lookupbykey
1486		 * will attempt to do basic validation/smoke tests against the resident
1487		 * extents if there are no overflow extent records, but it needs someplace
1488		 * in memory to store the on-disk fork structures.
1489		 *
1490		 * Since hfs_lookup calls cat_lookup with a filefork argument, we should
1491		 * do the same here, to verify that block count differences are not
1492		 * due to calling the function with different styles.  cat_lookupbykey
1493		 * will request the volume be fsck'd if there is true on-disk corruption
1494		 * where the number of blocks does not match the number generated by
1495		 * summing the number of blocks in the resident extents.
1496		 */
1497
1498		lookup = cat_lookup (hfsmp, &cndesc, 0, NULL, &attr, &fork, NULL);
1499
1500		if ((lookup == 0) && (cnid == attr.ca_fileid)) {
1501			stillvalid = 1;
1502			*error = 0;
1503		}
1504		else {
1505			*error = ENOENT;
1506		}
1507
1508		/*
1509		 * In hfs_getnewvnode, we may encounter a time-of-check vs. time-of-vnode creation
1510		 * race.  Specifically, if there is no vnode/cnode pair for the directory entry
1511		 * being looked up, we have to go to the catalog.  But since we don't hold any locks (aside
1512		 * from the dvp in 'shared' mode) there is nothing to protect us against the catalog record
1513		 * changing in between the time we do the cat_lookup there and the time we re-grab the
1514		 * catalog lock above to do another cat_lookup.
1515		 *
1516		 * However, we need to check more than just the CNID and parent-child name relationships above.
1517		 * Hardlinks can suffer the same race in the following scenario:  Suppose we do a
1518		 * cat_lookup, and find a leaf record and a raw inode for a hardlink.  Now, we have
1519		 * the cat_attr in hand (passed in above).  But in between then and now, the vnode was
1520		 * created by a competing hfs_getnewvnode call, and is manipulated and reclaimed before we get
1521		 * a chance to do anything.  This is possible if there are a lot of threads thrashing around
1522		 * with the cnode hash.  In this case, if we don't check/validate the cat_attr in-hand, we will
1523		 * blindly stuff it into the cnode, which will make the in-core data inconsistent with what is
1524		 * on disk.  So validate the cat_attr below, if required.  This race cannot happen if the cnode/vnode
1525		 * already exists, as it does in the case of rename and delete.
1526		 */
1527		if (stillvalid && cattr != NULL) {
1528			if (cattr->ca_linkcount != attr.ca_linkcount) {
1529				stillvalid = 0;
1530				*error = ERECYCLE;
1531				goto notvalid;
1532			}
1533
1534			if (cattr->ca_union1.cau_linkref != attr.ca_union1.cau_linkref) {
1535				stillvalid = 0;
1536				*error = ERECYCLE;
1537				goto notvalid;
1538			}
1539
1540			if (cattr->ca_union3.cau_firstlink != attr.ca_union3.cau_firstlink) {
1541				stillvalid = 0;
1542				*error = ERECYCLE;
1543				goto notvalid;
1544			}
1545
1546			if (cattr->ca_union2.cau_blocks != attr.ca_union2.cau_blocks) {
1547				stillvalid = 0;
1548				*error = ERECYCLE;
1549				goto notvalid;
1550			}
1551		}
1552	} else {
1553		if (cat_idlookup(hfsmp, cnid, 0, 0, NULL, NULL, NULL) == 0) {
1554			stillvalid = 1;
1555			*error = 0;
1556		}
1557		else {
1558			*error = ENOENT;
1559		}
1560	}
1561notvalid:
1562	hfs_systemfile_unlock(hfsmp, lockflags);
1563
1564	return (stillvalid);
1565}
1566
1567
1568/*
1569 * Per HI and Finder requirements, HFS should add in the
1570 * date/time that a particular directory entry was added
1571 * to the containing directory.
1572 * This is stored in the extended Finder Info for the
1573 * item in question.
1574 *
1575 * Note that this field is also set explicitly in the hfs_vnop_setxattr code.
1576 * We must ignore user attempts to set this part of the finderinfo, and
1577 * so we need to save a local copy of the date added, write in the user
1578 * finderinfo, then stuff the value back in.
1579 */
1580void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
1581	u_int8_t *finfo = NULL;
1582
1583	/* overlay the FinderInfo to the correct pointer, and advance */
1584	finfo = (u_int8_t*)attrp->ca_finderinfo;
1585	finfo = finfo + 16;
1586
1587	/*
1588	 * Make sure to write it out as big endian, since that's how
1589	 * finder info is defined.
1590	 *
1591	 * NOTE: This is a Unix-epoch timestamp, not a HFS/Traditional Mac timestamp.
1592	 */
1593	if (S_ISREG(attrp->ca_mode)) {
1594		struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1595		extinfo->date_added = OSSwapHostToBigInt32(dateadded);
1596		attrp->ca_recflags |= kHFSHasDateAddedMask;
1597	}
1598	else if (S_ISDIR(attrp->ca_mode)) {
1599		struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
1600		extinfo->date_added = OSSwapHostToBigInt32(dateadded);
1601				attrp->ca_recflags |= kHFSHasDateAddedMask;
1602	}
1603	/* If it were neither directory/file, then we'd bail out */
1604	return;
1605}
1606
1607
1608u_int32_t hfs_get_dateadded (struct cnode *cp) {
1609	u_int8_t *finfo = NULL;
1610	u_int32_t dateadded = 0;
1611
1612	if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) {
1613		/* Date added was never set.  Return 0. */
1614		return dateadded;
1615	}
1616
1617
1618	/* overlay the FinderInfo to the correct pointer, and advance */
1619	finfo = (u_int8_t*)cp->c_finderinfo;
1620	finfo = finfo + 16;
1621
1622	/*
1623	 * FinderInfo is written out in big endian... make sure to convert it to host
1624	 * native before we use it.
1625	 */
1626	if (S_ISREG(cp->c_attr.ca_mode)) {
1627		struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1628		dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
1629	}
1630	else if (S_ISDIR(cp->c_attr.ca_mode)) {
1631		struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
1632		dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
1633	}
1634
1635	return dateadded;
1636}
1637
1638/*
1639 * Touch cnode times based on c_touch_xxx flags
1640 *
1641 * cnode must be locked exclusive
1642 *
1643 * This will also update the volume modify time
1644 */
1645void
1646hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
1647{
1648	vfs_context_t ctx;
1649	/* don't modify times if volume is read-only */
1650	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1651		cp->c_touch_acctime = FALSE;
1652		cp->c_touch_chgtime = FALSE;
1653		cp->c_touch_modtime = FALSE;
1654		return;
1655	}
1656	else if (hfsmp->hfs_flags & HFS_STANDARD) {
1657	/* HFS Standard doesn't support access times */
1658		cp->c_touch_acctime = FALSE;
1659	}
1660
1661	ctx = vfs_context_current();
1662	/*
1663	 * Skip access time updates if:
1664	 *	. MNT_NOATIME is set
1665	 *	. a file system freeze is in progress
1666	 *	. a file system resize is in progress
1667	 *	. the vnode associated with this cnode is marked for rapid aging
1668	 */
1669	if (cp->c_touch_acctime) {
1670		if ((vfs_flags(hfsmp->hfs_mp) & MNT_NOATIME) ||
1671		    (hfsmp->hfs_freezing_proc != NULL) ||
1672		    (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) ||
1673		    (cp->c_vp && ((vnode_israge(cp->c_vp) || (vfs_ctx_skipatime(ctx)))))) {
1674
1675			cp->c_touch_acctime = FALSE;
1676		}
1677	}
1678	if (cp->c_touch_acctime || cp->c_touch_chgtime ||
1679		cp->c_touch_modtime || (cp->c_flag & C_NEEDS_DATEADDED)) {
1680		struct timeval tv;
1681		int touchvol = 0;
1682
1683		microtime(&tv);
1684
1685		if (cp->c_touch_acctime) {
1686			cp->c_atime = tv.tv_sec;
1687			/*
1688			 * When the access time is the only thing changing
1689			 * then make sure its sufficiently newer before
1690			 * committing it to disk.
1691			 */
1692			if ((((u_int32_t)cp->c_atime - (u_int32_t)(cp)->c_attr.ca_atimeondisk) >
1693			      ATIME_ONDISK_ACCURACY)) {
1694				cp->c_flag |= C_MODIFIED;
1695			}
1696			cp->c_touch_acctime = FALSE;
1697		}
1698		if (cp->c_touch_modtime) {
1699			cp->c_mtime = tv.tv_sec;
1700			cp->c_touch_modtime = FALSE;
1701			cp->c_flag |= C_MODIFIED;
1702			touchvol = 1;
1703#if 1
1704			/*
1705			 * HFS dates that WE set must be adjusted for DST
1706			 */
1707			if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) {
1708				cp->c_mtime += 3600;
1709			}
1710#endif
1711		}
1712		if (cp->c_touch_chgtime) {
1713			cp->c_ctime = tv.tv_sec;
1714			cp->c_touch_chgtime = FALSE;
1715			cp->c_flag |= C_MODIFIED;
1716			touchvol = 1;
1717		}
1718
1719		if (cp->c_flag & C_NEEDS_DATEADDED) {
1720			hfs_write_dateadded (&(cp->c_attr), tv.tv_sec);
1721			cp->c_flag |= C_MODIFIED;
1722			/* untwiddle the bit */
1723			cp->c_flag &= ~C_NEEDS_DATEADDED;
1724			touchvol = 1;
1725		}
1726
1727		/* Touch the volume modtime if needed */
1728		if (touchvol) {
1729			MarkVCBDirty(hfsmp);
1730			HFSTOVCB(hfsmp)->vcbLsMod = tv.tv_sec;
1731		}
1732	}
1733}
1734
1735/*
1736 * Lock a cnode.
1737 */
1738int
1739hfs_lock(struct cnode *cp, enum hfslocktype locktype)
1740{
1741	void * thread = current_thread();
1742
1743	if (cp->c_lockowner == thread) {
1744		/*
1745		 * Only the extents and bitmap file's support lock recursion.
1746		 */
1747		if ((cp->c_fileid == kHFSExtentsFileID) ||
1748		    (cp->c_fileid == kHFSAllocationFileID)) {
1749			cp->c_syslockcount++;
1750		} else {
1751			panic("hfs_lock: locking against myself!");
1752		}
1753	} else if (locktype == HFS_SHARED_LOCK) {
1754		lck_rw_lock_shared(&cp->c_rwlock);
1755		cp->c_lockowner = HFS_SHARED_OWNER;
1756
1757	} else /* HFS_EXCLUSIVE_LOCK */ {
1758		lck_rw_lock_exclusive(&cp->c_rwlock);
1759		cp->c_lockowner = thread;
1760
1761		/*
1762		 * Only the extents and bitmap file's support lock recursion.
1763		 */
1764		if ((cp->c_fileid == kHFSExtentsFileID) ||
1765		    (cp->c_fileid == kHFSAllocationFileID)) {
1766			cp->c_syslockcount = 1;
1767		}
1768	}
1769
1770#ifdef HFS_CHECK_LOCK_ORDER
1771	/*
1772	 * Regular cnodes (non-system files) cannot be locked
1773	 * while holding the journal lock or a system file lock.
1774	 */
1775	if (!(cp->c_desc.cd_flags & CD_ISMETA) &&
1776            ((cp->c_fileid > kHFSFirstUserCatalogNodeID) || (cp->c_fileid == kHFSRootFolderID))) {
1777		vnode_t vp = NULLVP;
1778
1779		/* Find corresponding vnode. */
1780		if (cp->c_vp != NULLVP && VTOC(cp->c_vp) == cp) {
1781			vp = cp->c_vp;
1782		} else if (cp->c_rsrc_vp != NULLVP && VTOC(cp->c_rsrc_vp) == cp) {
1783			vp = cp->c_rsrc_vp;
1784		}
1785		if (vp != NULLVP) {
1786			struct hfsmount *hfsmp = VTOHFS(vp);
1787
1788			if (hfsmp->jnl && (journal_owner(hfsmp->jnl) == thread)) {
1789				/* This will eventually be a panic here. */
1790				printf("hfs_lock: bad lock order (cnode after journal)\n");
1791			}
1792			if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
1793				panic("hfs_lock: bad lock order (cnode after catalog)");
1794			}
1795			if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
1796				panic("hfs_lock: bad lock order (cnode after attribute)");
1797			}
1798			if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
1799				panic("hfs_lock: bad lock order (cnode after extents)");
1800			}
1801		}
1802	}
1803#endif /* HFS_CHECK_LOCK_ORDER */
1804
1805	/*
1806	 * Skip cnodes that no longer exist (were deleted).
1807	 */
1808	if ((locktype != HFS_FORCE_LOCK) &&
1809	    ((cp->c_desc.cd_flags & CD_ISMETA) == 0) &&
1810	    (cp->c_flag & C_NOEXISTS)) {
1811		hfs_unlock(cp);
1812		return (ENOENT);
1813	}
1814	return (0);
1815}
1816
1817/*
1818 * Lock a pair of cnodes.
1819 */
1820int
1821hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfslocktype locktype)
1822{
1823	struct cnode *first, *last;
1824	int error;
1825
1826	/*
1827	 * If cnodes match then just lock one.
1828	 */
1829	if (cp1 == cp2) {
1830		return hfs_lock(cp1, locktype);
1831	}
1832
1833	/*
1834	 * Lock in cnode address order.
1835	 */
1836	if (cp1 < cp2) {
1837		first = cp1;
1838		last = cp2;
1839	} else {
1840		first = cp2;
1841		last = cp1;
1842	}
1843
1844	if ( (error = hfs_lock(first, locktype))) {
1845		return (error);
1846	}
1847	if ( (error = hfs_lock(last, locktype))) {
1848		hfs_unlock(first);
1849		return (error);
1850	}
1851	return (0);
1852}
1853
1854/*
1855 * Check ordering of two cnodes. Return true if they are are in-order.
1856 */
1857static int
1858hfs_isordered(struct cnode *cp1, struct cnode *cp2)
1859{
1860	if (cp1 == cp2)
1861		return (0);
1862	if (cp1 == NULL || cp2 == (struct cnode *)0xffffffff)
1863		return (1);
1864	if (cp2 == NULL || cp1 == (struct cnode *)0xffffffff)
1865		return (0);
1866	/*
1867	 * Locking order is cnode address order.
1868	 */
1869	return (cp1 < cp2);
1870}
1871
1872/*
1873 * Acquire 4 cnode locks.
1874 *   - locked in cnode address order (lesser address first).
1875 *   - all or none of the locks are taken
1876 *   - only one lock taken per cnode (dup cnodes are skipped)
1877 *   - some of the cnode pointers may be null
1878 */
1879int
1880hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3,
1881             struct cnode *cp4, enum hfslocktype locktype, struct cnode **error_cnode)
1882{
1883	struct cnode * a[3];
1884	struct cnode * b[3];
1885	struct cnode * list[4];
1886	struct cnode * tmp;
1887	int i, j, k;
1888	int error;
1889	if (error_cnode) {
1890		*error_cnode = NULL;
1891	}
1892
1893	if (hfs_isordered(cp1, cp2)) {
1894		a[0] = cp1; a[1] = cp2;
1895	} else {
1896		a[0] = cp2; a[1] = cp1;
1897	}
1898	if (hfs_isordered(cp3, cp4)) {
1899		b[0] = cp3; b[1] = cp4;
1900	} else {
1901		b[0] = cp4; b[1] = cp3;
1902	}
1903	a[2] = (struct cnode *)0xffffffff;  /* sentinel value */
1904	b[2] = (struct cnode *)0xffffffff;  /* sentinel value */
1905
1906	/*
1907	 * Build the lock list, skipping over duplicates
1908	 */
1909	for (i = 0, j = 0, k = 0; (i < 2 || j < 2); ) {
1910		tmp = hfs_isordered(a[i], b[j]) ? a[i++] : b[j++];
1911		if (k == 0 || tmp != list[k-1])
1912			list[k++] = tmp;
1913	}
1914
1915	/*
1916	 * Now we can lock using list[0 - k].
1917	 * Skip over NULL entries.
1918	 */
1919	for (i = 0; i < k; ++i) {
1920		if (list[i])
1921			if ((error = hfs_lock(list[i], locktype))) {
1922				/* Only stuff error_cnode if requested */
1923				if (error_cnode) {
1924					*error_cnode = list[i];
1925				}
1926				/* Drop any locks we acquired. */
1927				while (--i >= 0) {
1928					if (list[i])
1929						hfs_unlock(list[i]);
1930				}
1931				return (error);
1932			}
1933	}
1934	return (0);
1935}
1936
1937
1938/*
1939 * Unlock a cnode.
1940 */
1941void
1942hfs_unlock(struct cnode *cp)
1943{
1944        vnode_t rvp = NULLVP;
1945        vnode_t vp = NULLVP;
1946        u_int32_t c_flag;
1947	void *lockowner;
1948
1949	/*
1950	 * Only the extents and bitmap file's support lock recursion.
1951	 */
1952	if ((cp->c_fileid == kHFSExtentsFileID) ||
1953	    (cp->c_fileid == kHFSAllocationFileID)) {
1954		if (--cp->c_syslockcount > 0) {
1955			return;
1956		}
1957	}
1958	c_flag = cp->c_flag;
1959	cp->c_flag &= ~(C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT | C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE);
1960
1961	if (c_flag & (C_NEED_DVNODE_PUT | C_NEED_DATA_SETSIZE)) {
1962	        vp = cp->c_vp;
1963	}
1964	if (c_flag & (C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE)) {
1965	        rvp = cp->c_rsrc_vp;
1966	}
1967
1968	lockowner = cp->c_lockowner;
1969	if (lockowner == current_thread()) {
1970	    cp->c_lockowner = NULL;
1971	    lck_rw_unlock_exclusive(&cp->c_rwlock);
1972	} else {
1973	    lck_rw_unlock_shared(&cp->c_rwlock);
1974	}
1975
1976	/* Perform any vnode post processing after cnode lock is dropped. */
1977	if (vp) {
1978		if (c_flag & C_NEED_DATA_SETSIZE)
1979			ubc_setsize(vp, 0);
1980		if (c_flag & C_NEED_DVNODE_PUT)
1981			vnode_put(vp);
1982	}
1983	if (rvp) {
1984		if (c_flag & C_NEED_RSRC_SETSIZE)
1985			ubc_setsize(rvp, 0);
1986		if (c_flag & C_NEED_RVNODE_PUT)
1987	        	vnode_put(rvp);
1988	}
1989}
1990
1991/*
1992 * Unlock a pair of cnodes.
1993 */
1994void
1995hfs_unlockpair(struct cnode *cp1, struct cnode *cp2)
1996{
1997	hfs_unlock(cp1);
1998	if (cp2 != cp1)
1999		hfs_unlock(cp2);
2000}
2001
2002/*
2003 * Unlock a group of cnodes.
2004 */
2005void
2006hfs_unlockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, struct cnode *cp4)
2007{
2008	struct cnode * list[4];
2009	int i, k = 0;
2010
2011	if (cp1) {
2012		hfs_unlock(cp1);
2013		list[k++] = cp1;
2014	}
2015	if (cp2) {
2016		for (i = 0; i < k; ++i) {
2017			if (list[i] == cp2)
2018				goto skip1;
2019		}
2020		hfs_unlock(cp2);
2021		list[k++] = cp2;
2022	}
2023skip1:
2024	if (cp3) {
2025		for (i = 0; i < k; ++i) {
2026			if (list[i] == cp3)
2027				goto skip2;
2028		}
2029		hfs_unlock(cp3);
2030		list[k++] = cp3;
2031	}
2032skip2:
2033	if (cp4) {
2034		for (i = 0; i < k; ++i) {
2035			if (list[i] == cp4)
2036				return;
2037		}
2038		hfs_unlock(cp4);
2039	}
2040}
2041
2042
2043/*
2044 * Protect a cnode against a truncation.
2045 *
2046 * Used mainly by read/write since they don't hold the
2047 * cnode lock across calls to the cluster layer.
2048 *
2049 * The process doing a truncation must take the lock
2050 * exclusive. The read/write processes can take it
2051 * shared.  The locktype argument is the same as supplied to
2052 * hfs_lock.
2053 */
2054void
2055hfs_lock_truncate(struct cnode *cp, enum hfslocktype locktype)
2056{
2057	void * thread = current_thread();
2058
2059	if (cp->c_truncatelockowner == thread) {
2060		/*
2061		 * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse.
2062		 *
2063		 * This is needed on the hfs_vnop_pagein path where we need to ensure
2064		 * the file does not change sizes while we are paging in.  However,
2065		 * we may already hold the lock exclusive due to another
2066		 * VNOP from earlier in the call stack.  So if we already hold
2067		 * the truncate lock exclusive, allow it to proceed, but ONLY if
2068		 * it's in the recursive case.
2069		 */
2070		if (locktype != HFS_RECURSE_TRUNCLOCK) {
2071			panic("hfs_lock_truncate: cnode %p locked!", cp);
2072		}
2073	}
2074	/* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */
2075	else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) {
2076		lck_rw_lock_shared(&cp->c_truncatelock);
2077		cp->c_truncatelockowner = HFS_SHARED_OWNER;
2078	}
2079	else { /* must be an HFS_EXCLUSIVE_LOCK */
2080		lck_rw_lock_exclusive(&cp->c_truncatelock);
2081		cp->c_truncatelockowner = thread;
2082	}
2083}
2084
2085
2086/*
2087 * Attempt to get the truncate lock.  If it cannot be acquired, error out.
2088 * This function is needed in the degenerate hfs_vnop_pagein during force unmount
2089 * case.  To prevent deadlocks while a VM copy object is moving pages, HFS vnop pagein will
2090 * temporarily need to disable V2 semantics.
2091 */
2092int hfs_try_trunclock (struct cnode *cp, enum hfslocktype locktype) {
2093	void * thread = current_thread();
2094	boolean_t didlock = false;
2095
2096	if (cp->c_truncatelockowner == thread) {
2097		/*
2098		 * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse.
2099		 *
2100		 * This is needed on the hfs_vnop_pagein path where we need to ensure
2101		 * the file does not change sizes while we are paging in.  However,
2102		 * we may already hold the lock exclusive due to another
2103		 * VNOP from earlier in the call stack.  So if we already hold
2104		 * the truncate lock exclusive, allow it to proceed, but ONLY if
2105		 * it's in the recursive case.
2106		 */
2107		if (locktype != HFS_RECURSE_TRUNCLOCK) {
2108			panic("hfs_lock_truncate: cnode %p locked!", cp);
2109		}
2110	}
2111	/* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */
2112	else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) {
2113		didlock = lck_rw_try_lock(&cp->c_truncatelock, LCK_RW_TYPE_SHARED);
2114		if (didlock) {
2115			cp->c_truncatelockowner = HFS_SHARED_OWNER;
2116		}
2117	}
2118	else { /* must be an HFS_EXCLUSIVE_LOCK */
2119		didlock = lck_rw_try_lock (&cp->c_truncatelock, LCK_RW_TYPE_EXCLUSIVE);
2120		if (didlock) {
2121			cp->c_truncatelockowner = thread;
2122		}
2123	}
2124
2125	return didlock;
2126}
2127
2128
2129/*
2130 * Unlock the truncate lock, which protects against size changes.
2131 *
2132 * The been_recursed argument is used when we may need to return
2133 * from this function without actually unlocking the truncate lock.
2134 */
2135void
2136hfs_unlock_truncate(struct cnode *cp, int been_recursed)
2137{
2138	void *thread = current_thread();
2139
2140	/*
2141	 * If been_recursed is nonzero AND the current lock owner of the
2142	 * truncate lock is our current thread, then we must have recursively
2143	 * taken the lock earlier on.  If the lock were unlocked,
2144	 * HFS_RECURSE_TRUNCLOCK took a shared lock and it would fall through
2145	 * to the SHARED case below.
2146	 *
2147	 * If been_recursed is zero (most of the time) then we check the
2148	 * lockowner field to infer whether the lock was taken exclusively or
2149	 * shared in order to know what underlying lock routine to call.
2150	 */
2151	if (been_recursed) {
2152		if (cp->c_truncatelockowner == thread) {
2153			return;
2154		}
2155	}
2156
2157	/* HFS_LOCK_EXCLUSIVE */
2158	if (thread == cp->c_truncatelockowner) {
2159		cp->c_truncatelockowner = NULL;
2160		lck_rw_unlock_exclusive(&cp->c_truncatelock);
2161	}
2162	/* HFS_LOCK_SHARED */
2163	else {
2164		lck_rw_unlock_shared(&cp->c_truncatelock);
2165	}
2166}
2167