1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993
31 *	The Regents of the University of California.  All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 *    must display the following acknowledgement:
46 *	This product includes software developed by the University of
47 *	California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 *    may be used to endorse or promote products derived from this software
50 *    without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 *	@(#)nfs_node.c	8.6 (Berkeley) 5/22/95
65 * FreeBSD-Id: nfs_node.c,v 1.22 1997/10/28 14:06:20 bde Exp $
66 */
67
68
69#include <sys/param.h>
70#include <sys/kernel.h>
71#include <sys/systm.h>
72#include <sys/proc.h>
73#include <sys/kauth.h>
74#include <sys/mount_internal.h>
75#include <sys/vnode_internal.h>
76#include <sys/vnode.h>
77#include <sys/ubc.h>
78#include <sys/malloc.h>
79#include <sys/fcntl.h>
80#include <sys/time.h>
81
82#include <nfs/rpcv2.h>
83#include <nfs/nfsproto.h>
84#include <nfs/nfs.h>
85#include <nfs/nfsnode.h>
86#include <nfs/nfs_gss.h>
87#include <nfs/nfsmount.h>
88
89#define	NFSNOHASH(fhsum) \
90	(&nfsnodehashtbl[(fhsum) & nfsnodehash])
91static LIST_HEAD(nfsnodehashhead, nfsnode) *nfsnodehashtbl;
92static u_long nfsnodehash;
93
94static lck_grp_t *nfs_node_hash_lck_grp;
95static lck_grp_t *nfs_node_lck_grp;
96static lck_grp_t *nfs_data_lck_grp;
97lck_mtx_t *nfs_node_hash_mutex;
98
99#define NFS_NODE_DBG(...) NFS_DBG(NFS_FAC_NODE, 7, ## __VA_ARGS__)
100
101/*
102 * Initialize hash links for nfsnodes
103 * and build nfsnode free list.
104 */
105void
106nfs_nhinit(void)
107{
108	nfs_node_hash_lck_grp = lck_grp_alloc_init("nfs_node_hash", LCK_GRP_ATTR_NULL);
109	nfs_node_hash_mutex = lck_mtx_alloc_init(nfs_node_hash_lck_grp, LCK_ATTR_NULL);
110	nfs_node_lck_grp = lck_grp_alloc_init("nfs_node", LCK_GRP_ATTR_NULL);
111	nfs_data_lck_grp = lck_grp_alloc_init("nfs_data", LCK_GRP_ATTR_NULL);
112}
113
114void
115nfs_nhinit_finish(void)
116{
117	lck_mtx_lock(nfs_node_hash_mutex);
118	if (!nfsnodehashtbl)
119		nfsnodehashtbl = hashinit(desiredvnodes, M_NFSNODE, &nfsnodehash);
120	lck_mtx_unlock(nfs_node_hash_mutex);
121}
122
123/*
124 * Compute an entry in the NFS hash table structure
125 */
126u_long
127nfs_hash(u_char *fhp, int fhsize)
128{
129	u_long fhsum;
130	int i;
131
132	fhsum = 0;
133	for (i = 0; i < fhsize; i++)
134		fhsum += *fhp++;
135	return (fhsum);
136}
137
138
139int nfs_case_insensitive(mount_t);
140
141int
142nfs_case_insensitive(mount_t mp)
143{
144	struct nfsmount *nmp = VFSTONFS(mp);
145	int answer = 0;
146	int skip = 0;
147
148	if (nmp == NULL) {
149		return (0);
150	}
151
152	if (nmp->nm_vers == NFS_VER2) {
153		/* V2 has no way to know */
154		return (0);
155	}
156
157	lck_mtx_lock(&nmp->nm_lock);
158	if (nmp->nm_vers == NFS_VER3) {
159		if (!(nmp->nm_state & NFSSTA_GOTPATHCONF)) {
160			/* We're holding the node lock so we just return
161			 * with answer as case sensitive. Is very rare
162			 * for file systems not to be homogenous w.r.t. pathconf
163			 */
164			skip = 1;
165		}
166	} else if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS)) {
167		/* no pathconf info cached */
168		skip = 1;
169	}
170
171	if (!skip && NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE))
172		answer = 1;
173
174	lck_mtx_unlock(&nmp->nm_lock);
175
176	return (answer);
177}
178
179
180/*
181 * Look up a vnode/nfsnode by file handle.
182 * Callers must check for mount points!!
183 * In all cases, a pointer to a
184 * nfsnode structure is returned.
185 */
186int
187nfs_nget(
188	mount_t mp,
189	nfsnode_t dnp,
190	struct componentname *cnp,
191	u_char *fhp,
192	int fhsize,
193	struct nfs_vattr *nvap,
194	u_int64_t *xidp,
195	uint32_t auth,
196	int flags,
197	nfsnode_t *npp)
198{
199	nfsnode_t np;
200	struct nfsnodehashhead *nhpp;
201	vnode_t vp;
202	int error, nfsvers;
203	mount_t mp2;
204	struct vnode_fsparam vfsp;
205	uint32_t vid;
206
207	FSDBG_TOP(263, mp, dnp, flags, npp);
208
209	/* Check for unmount in progress */
210	if (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT)) {
211		*npp = NULL;
212		error = ENXIO;
213		FSDBG_BOT(263, mp, dnp, 0xd1e, error);
214		return (error);
215	}
216	nfsvers = VFSTONFS(mp)->nm_vers;
217
218	nhpp = NFSNOHASH(nfs_hash(fhp, fhsize));
219loop:
220	lck_mtx_lock(nfs_node_hash_mutex);
221	for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) {
222		mp2 = (np->n_hflag & NHINIT) ? np->n_mount : NFSTOMP(np);
223		if (mp != mp2 || np->n_fhsize != fhsize ||
224		    bcmp(fhp, np->n_fhp, fhsize))
225			continue;
226		if (nvap && (nvap->nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) &&
227		    cnp && (cnp->cn_namelen > (fhsize - (int)sizeof(dnp)))) {
228			/* The name was too long to fit in the file handle.  Check it against the node's name. */
229			int namecmp = 0;
230			const char *vname = vnode_getname(NFSTOV(np));
231			if (vname) {
232				if (cnp->cn_namelen != (int)strlen(vname))
233					namecmp = 1;
234				else
235					namecmp = strncmp(vname, cnp->cn_nameptr, cnp->cn_namelen);
236				vnode_putname(vname);
237			}
238			if (namecmp)  /* full name didn't match */
239				continue;
240		}
241		FSDBG(263, dnp, np, np->n_flag, 0xcace0000);
242		/* if the node is locked, sleep on it */
243		if ((np->n_hflag & NHLOCKED) && !(flags & NG_NOCREATE)) {
244			np->n_hflag |= NHLOCKWANT;
245			FSDBG(263, dnp, np, np->n_flag, 0xcace2222);
246			msleep(np, nfs_node_hash_mutex, PDROP | PINOD, "nfs_nget", NULL);
247			FSDBG(263, dnp, np, np->n_flag, 0xcace3333);
248			goto loop;
249		}
250		vp = NFSTOV(np);
251		vid = vnode_vid(vp);
252		lck_mtx_unlock(nfs_node_hash_mutex);
253		if ((error = vnode_getwithvid(vp, vid))) {
254			/*
255			 * If vnode is being reclaimed or has already
256			 * changed identity, no need to wait.
257			 */
258			FSDBG_BOT(263, dnp, *npp, 0xcace0d1e, error);
259			return (error);
260		}
261		if ((error = nfs_node_lock(np))) {
262			/* this only fails if the node is now unhashed */
263			/* so let's see if we can find/create it again */
264			FSDBG(263, dnp, *npp, 0xcaced1e2, error);
265			vnode_put(vp);
266			if (flags & NG_NOCREATE) {
267				*npp = 0;
268				FSDBG_BOT(263, dnp, *npp, 0xcaced1e0, ENOENT);
269				return (ENOENT);
270			}
271			goto loop;
272		}
273		/* update attributes */
274		if (nvap)
275			error = nfs_loadattrcache(np, nvap, xidp, 0);
276		if (error) {
277			nfs_node_unlock(np);
278			vnode_put(vp);
279		} else {
280			if (dnp && cnp && (flags & NG_MAKEENTRY))
281				cache_enter(NFSTOV(dnp), vp, cnp);
282			/*
283			 * Update the vnode if the name/and or the parent has
284			 * changed. We need to do this so that if getattrlist is
285			 * called asking for ATTR_CMN_NAME, that the "most"
286			 * correct name is being returned. In addition for
287			 * monitored vnodes we need to kick the vnode out of the
288			 * name cache. We do this so that if there are hard
289			 * links in the same directory the link will not be
290			 * found and a lookup will get us here to return the
291			 * name of the current link. In addition by removing the
292			 * name from the name cache the old name will not be
293			 * found after a rename done on another client or the
294			 * server.  The principle reason to do this is because
295			 * Finder is asking for notifications on a directory.
296			 * The directory changes, Finder gets notified, reads
297			 * the directory (which we have purged) and for each
298			 * entry returned calls getattrlist with the name
299			 * returned from readdir. gettattrlist has to call
300			 * namei/lookup to resolve the name, because its not in
301			 * the cache we end up here. We need to update the name
302			 * so Finder will get the name it called us with.
303			 *
304			 * We had an imperfect solution with respect to case
305			 * sensitivity.  There is a test that is run in
306			 * FileBuster that does renames from some name to
307			 * another name differing only in case. It then reads
308			 * the directory looking for the new name, after it
309			 * finds that new name, it ask gettattrlist to verify
310			 * that the name is the new name.  Usually that works,
311			 * but renames generate fsevents and fseventsd will do a
312			 * lookup on the name via lstat. Since that test renames
313			 * old name to new name back and forth there is a race
314			 * that an fsevent will be behind and will access the
315			 * file by the old name, on a case insensitive file
316			 * system that will work. Problem is if we do a case
317			 * sensitive compare, we're going to change the name,
318			 * which the test's getattrlist verification step is
319			 * going to fail. So we will check the case sensitivity
320			 * of the file system and do the appropriate compare. In
321			 * a rare instance for non homogeneous file systems
322			 * w.r.t. pathconf we will use case sensitive compares.
323			 * That could break if the file system is actually case
324			 * insensitive.
325			 *
326			 * Note that V2 does not know the case, so we just
327			 * assume case sensitivity.
328			 *
329			 * This is clearly not perfect due to races, but this is
330			 * as good as its going to get. You can defeat the
331			 * handling of hard links simply by doing:
332			 *
333			 *	while :; do ls -l > /dev/null; done
334			 *
335			 * in a terminal window. Even a single ls -l can cause a
336			 * race.
337			 *
338			 * <rant>What we really need is for the caller, that
339			 * knows the name being used is valid since it got it
340			 * from a readdir to use that name and not ask for the
341			 * ATTR_CMN_NAME</rant>
342			 */
343			if (dnp && cnp && (vp != NFSTOV(dnp))) {
344				int update_flags = (vnode_ismonitored((NFSTOV(dnp)))) ? VNODE_UPDATE_CACHE : 0;
345				int (*cmp)(const char *s1, const char *s2, size_t n);
346
347				cmp = nfs_case_insensitive(mp) ? strncasecmp : strncmp;
348
349				if (vp->v_name && cnp->cn_namelen && (*cmp)(cnp->cn_nameptr, vp->v_name, cnp->cn_namelen))
350					update_flags |= VNODE_UPDATE_NAME;
351				if ((vp->v_name == NULL && cnp->cn_namelen != 0) || (vp->v_name != NULL && cnp->cn_namelen == 0))
352					update_flags |= VNODE_UPDATE_NAME;
353				if (vnode_parent(vp) != NFSTOV(dnp))
354					update_flags |= VNODE_UPDATE_PARENT;
355				if (update_flags) {
356					NFS_NODE_DBG("vnode_update_identity old name %s new name %*s\n",
357						     vp->v_name, cnp->cn_namelen, cnp->cn_nameptr ? cnp->cn_nameptr : "");
358					vnode_update_identity(vp, NFSTOV(dnp), cnp->cn_nameptr, cnp->cn_namelen, 0, update_flags);
359				}
360			}
361
362			*npp = np;
363		}
364		FSDBG_BOT(263, dnp, *npp, 0xcace0000, error);
365		return(error);
366	}
367
368	FSDBG(263, mp, dnp, npp, 0xaaaaaaaa);
369
370	if (flags & NG_NOCREATE) {
371		lck_mtx_unlock(nfs_node_hash_mutex);
372		*npp = 0;
373		FSDBG_BOT(263, dnp, *npp, 0x80000001, ENOENT);
374		return (ENOENT);
375	}
376
377	/*
378	 * allocate and initialize nfsnode and stick it in the hash
379	 * before calling getnewvnode().  Anyone finding it in the
380	 * hash before initialization is complete will wait for it.
381	 */
382	MALLOC_ZONE(np, nfsnode_t, sizeof *np, M_NFSNODE, M_WAITOK);
383	if (!np) {
384		lck_mtx_unlock(nfs_node_hash_mutex);
385		*npp = 0;
386		FSDBG_BOT(263, dnp, *npp, 0x80000001, ENOMEM);
387		return (ENOMEM);
388	}
389	bzero(np, sizeof *np);
390	np->n_hflag |= (NHINIT | NHLOCKED);
391	np->n_mount = mp;
392	np->n_auth = auth;
393	TAILQ_INIT(&np->n_opens);
394	TAILQ_INIT(&np->n_lock_owners);
395	TAILQ_INIT(&np->n_locks);
396	np->n_dlink.tqe_next = NFSNOLIST;
397	np->n_dreturn.tqe_next = NFSNOLIST;
398	np->n_monlink.le_next = NFSNOLIST;
399
400	/* ugh... need to keep track of ".zfs" directories to workaround server bugs */
401	if ((nvap->nva_type == VDIR) && cnp && (cnp->cn_namelen == 4) &&
402	    (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == 'z') &&
403	    (cnp->cn_nameptr[2] == 'f') && (cnp->cn_nameptr[3] == 's'))
404		np->n_flag |= NISDOTZFS;
405	if (dnp && (dnp->n_flag & NISDOTZFS))
406		np->n_flag |= NISDOTZFSCHILD;
407
408	if (dnp && cnp && ((cnp->cn_namelen != 2) ||
409	    (cnp->cn_nameptr[0] != '.') || (cnp->cn_nameptr[1] != '.'))) {
410		vnode_t dvp = NFSTOV(dnp);
411		if (!vnode_get(dvp)) {
412			if (!vnode_ref(dvp))
413				np->n_parent = dvp;
414			vnode_put(dvp);
415		}
416	}
417
418	/* setup node's file handle */
419	if (fhsize > NFS_SMALLFH) {
420		MALLOC_ZONE(np->n_fhp, u_char *,
421				fhsize, M_NFSBIGFH, M_WAITOK);
422		if (!np->n_fhp) {
423			lck_mtx_unlock(nfs_node_hash_mutex);
424			FREE_ZONE(np, sizeof *np, M_NFSNODE);
425			*npp = 0;
426			FSDBG_BOT(263, dnp, *npp, 0x80000002, ENOMEM);
427			return (ENOMEM);
428		}
429	} else {
430		np->n_fhp = &np->n_fh[0];
431	}
432	bcopy(fhp, np->n_fhp, fhsize);
433	np->n_fhsize = fhsize;
434
435	/* Insert the nfsnode in the hash queue for its new file handle */
436	LIST_INSERT_HEAD(nhpp, np, n_hash);
437	np->n_hflag |= NHHASHED;
438	FSDBG(266, 0, np, np->n_flag, np->n_hflag);
439
440	/* lock the new nfsnode */
441	lck_mtx_init(&np->n_lock, nfs_node_lck_grp, LCK_ATTR_NULL);
442	lck_rw_init(&np->n_datalock, nfs_data_lck_grp, LCK_ATTR_NULL);
443	lck_mtx_init(&np->n_openlock, nfs_open_grp, LCK_ATTR_NULL);
444	lck_mtx_lock(&np->n_lock);
445
446	/* release lock on hash table */
447	lck_mtx_unlock(nfs_node_hash_mutex);
448
449	/* do initial loading of attributes */
450	NACLINVALIDATE(np);
451	NACCESSINVALIDATE(np);
452	error = nfs_loadattrcache(np, nvap, xidp, 1);
453	if (error) {
454		FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
455		nfs_node_unlock(np);
456		lck_mtx_lock(nfs_node_hash_mutex);
457		LIST_REMOVE(np, n_hash);
458		np->n_hflag &= ~(NHHASHED|NHINIT|NHLOCKED);
459		if (np->n_hflag & NHLOCKWANT) {
460			np->n_hflag &= ~NHLOCKWANT;
461			wakeup(np);
462		}
463		lck_mtx_unlock(nfs_node_hash_mutex);
464		if (np->n_parent) {
465			if (!vnode_get(np->n_parent)) {
466				vnode_rele(np->n_parent);
467				vnode_put(np->n_parent);
468			}
469			np->n_parent = NULL;
470		}
471		lck_mtx_destroy(&np->n_lock, nfs_node_lck_grp);
472		lck_rw_destroy(&np->n_datalock, nfs_data_lck_grp);
473		lck_mtx_destroy(&np->n_openlock, nfs_open_grp);
474		if (np->n_fhsize > NFS_SMALLFH)
475			FREE_ZONE(np->n_fhp, np->n_fhsize, M_NFSBIGFH);
476		FREE_ZONE(np, sizeof *np, M_NFSNODE);
477		*npp = 0;
478		FSDBG_BOT(263, dnp, *npp, 0x80000003, error);
479		return (error);
480	}
481	NFS_CHANGED_UPDATE(nfsvers, np, nvap);
482	if (nvap->nva_type == VDIR)
483		NFS_CHANGED_UPDATE_NC(nfsvers, np, nvap);
484
485	/* now, attempt to get a new vnode */
486	vfsp.vnfs_mp = mp;
487	vfsp.vnfs_vtype = nvap->nva_type;
488	vfsp.vnfs_str = "nfs";
489	vfsp.vnfs_dvp = dnp ? NFSTOV(dnp) : NULL;
490	vfsp.vnfs_fsnode = np;
491	if (nfsvers == NFS_VER4) {
492#if FIFO
493		if (nvap->nva_type == VFIFO)
494			vfsp.vnfs_vops = fifo_nfsv4nodeop_p;
495		else
496#endif /* FIFO */
497		if (nvap->nva_type == VBLK || nvap->nva_type == VCHR)
498			vfsp.vnfs_vops = spec_nfsv4nodeop_p;
499		else
500			vfsp.vnfs_vops = nfsv4_vnodeop_p;
501	} else {
502#if FIFO
503		if (nvap->nva_type == VFIFO)
504			vfsp.vnfs_vops = fifo_nfsv2nodeop_p;
505		else
506#endif /* FIFO */
507		if (nvap->nva_type == VBLK || nvap->nva_type == VCHR)
508			vfsp.vnfs_vops = spec_nfsv2nodeop_p;
509		else
510			vfsp.vnfs_vops = nfsv2_vnodeop_p;
511	}
512	vfsp.vnfs_markroot = (flags & NG_MARKROOT) ? 1 : 0;
513	vfsp.vnfs_marksystem = 0;
514	vfsp.vnfs_rdev = 0;
515	vfsp.vnfs_filesize = nvap->nva_size;
516	vfsp.vnfs_cnp = cnp;
517	vfsp.vnfs_flags = VNFS_ADDFSREF;
518	if (!dnp || !cnp || !(flags & NG_MAKEENTRY))
519		vfsp.vnfs_flags |= VNFS_NOCACHE;
520
521#if CONFIG_TRIGGERS
522	if ((nfsvers >= NFS_VER4) && (nvap->nva_type == VDIR) && (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER)) {
523		struct vnode_trigger_param vtp;
524		bzero(&vtp, sizeof(vtp));
525		bcopy(&vfsp, &vtp.vnt_params, sizeof(vfsp));
526		vtp.vnt_resolve_func = nfs_mirror_mount_trigger_resolve;
527		vtp.vnt_unresolve_func = nfs_mirror_mount_trigger_unresolve;
528		vtp.vnt_rearm_func = nfs_mirror_mount_trigger_rearm;
529		vtp.vnt_flags = VNT_AUTO_REARM;
530		error = vnode_create(VNCREATE_TRIGGER, VNCREATE_TRIGGER_SIZE, &vtp, &np->n_vnode);
531	} else
532#endif
533	{
534		error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &np->n_vnode);
535	}
536	if (error) {
537		FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
538		nfs_node_unlock(np);
539		lck_mtx_lock(nfs_node_hash_mutex);
540		LIST_REMOVE(np, n_hash);
541		np->n_hflag &= ~(NHHASHED|NHINIT|NHLOCKED);
542		if (np->n_hflag & NHLOCKWANT) {
543			np->n_hflag &= ~NHLOCKWANT;
544			wakeup(np);
545		}
546		lck_mtx_unlock(nfs_node_hash_mutex);
547		if (np->n_parent) {
548			if (!vnode_get(np->n_parent)) {
549				vnode_rele(np->n_parent);
550				vnode_put(np->n_parent);
551			}
552			np->n_parent = NULL;
553		}
554		lck_mtx_destroy(&np->n_lock, nfs_node_lck_grp);
555		lck_rw_destroy(&np->n_datalock, nfs_data_lck_grp);
556		lck_mtx_destroy(&np->n_openlock, nfs_open_grp);
557		if (np->n_fhsize > NFS_SMALLFH)
558			FREE_ZONE(np->n_fhp, np->n_fhsize, M_NFSBIGFH);
559		FREE_ZONE(np, sizeof *np, M_NFSNODE);
560		*npp = 0;
561		FSDBG_BOT(263, dnp, *npp, 0x80000004, error);
562		return (error);
563	}
564	vp = np->n_vnode;
565	vnode_settag(vp, VT_NFS);
566	/* node is now initialized */
567
568	/* check if anyone's waiting on this node */
569	lck_mtx_lock(nfs_node_hash_mutex);
570	np->n_hflag &= ~(NHINIT|NHLOCKED);
571	if (np->n_hflag & NHLOCKWANT) {
572		np->n_hflag &= ~NHLOCKWANT;
573		wakeup(np);
574	}
575	lck_mtx_unlock(nfs_node_hash_mutex);
576
577	*npp = np;
578
579	FSDBG_BOT(263, dnp, vp, *npp, error);
580	return (error);
581}
582
583
584int
585nfs_vnop_inactive(ap)
586	struct vnop_inactive_args /* {
587		struct vnodeop_desc *a_desc;
588		vnode_t a_vp;
589		vfs_context_t a_context;
590	} */ *ap;
591{
592	vnode_t vp = ap->a_vp;
593	vfs_context_t ctx = ap->a_context;
594	nfsnode_t np;
595	struct nfs_sillyrename *nsp;
596	struct nfs_vattr nvattr;
597	int unhash, attrerr, busyerror, error, inuse, busied, force;
598	struct nfs_open_file *nofp;
599	struct componentname cn;
600	struct nfsmount *nmp;
601	mount_t mp;
602
603	if (vp == NULL)
604		panic("nfs_vnop_inactive: vp == NULL");
605	np = VTONFS(vp);
606	if (np == NULL)
607		panic("nfs_vnop_inactive: np == NULL");
608
609	nmp = NFSTONMP(np);
610	mp = vnode_mount(vp);
611
612restart:
613	force = (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT));
614	error = 0;
615	inuse = (nfs_mount_state_in_use_start(nmp, NULL) == 0);
616
617	/* There shouldn't be any open or lock state at this point */
618	lck_mtx_lock(&np->n_openlock);
619	if (np->n_openrefcnt && !force) {
620		/*
621		 * vnode_rele and vnode_put drop the vnode lock before
622		 * calling VNOP_INACTIVE, so there is a race were the
623		 * vnode could become active again. Perhaps there are
624		 * other places where this can happen, so if we've got
625		 * here we need to get out.
626		 */
627#ifdef NFS_NODE_DEBUG
628		NP(np, "nfs_vnop_inactive: still open: %d", np->n_openrefcnt);
629#endif
630		lck_mtx_unlock(&np->n_openlock);
631		return 0;
632	}
633
634	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
635		lck_mtx_lock(&nofp->nof_lock);
636		if (nofp->nof_flags & NFS_OPEN_FILE_BUSY) {
637			if (!force)
638				NP(np, "nfs_vnop_inactive: open file busy");
639			busied = 0;
640		} else {
641			nofp->nof_flags |= NFS_OPEN_FILE_BUSY;
642			busied = 1;
643		}
644		lck_mtx_unlock(&nofp->nof_lock);
645		if ((np->n_flag & NREVOKE) || (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
646			if (busied)
647				nfs_open_file_clear_busy(nofp);
648			continue;
649		}
650		/*
651		 * If we just created the file, we already had it open in
652		 * anticipation of getting a subsequent open call.  If the
653		 * node has gone inactive without being open, we need to
654		 * clean up (close) the open done in the create.
655		 */
656		if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && nofp->nof_creator && !force) {
657			if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
658				lck_mtx_unlock(&np->n_openlock);
659				if (busied)
660					nfs_open_file_clear_busy(nofp);
661				if (inuse)
662					nfs_mount_state_in_use_end(nmp, 0);
663				if (!nfs4_reopen(nofp, NULL))
664					goto restart;
665			}
666			nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
667			lck_mtx_unlock(&np->n_openlock);
668			error = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx);
669			if (error) {
670				NP(np, "nfs_vnop_inactive: create close error: %d", error);
671				nofp->nof_flags |= NFS_OPEN_FILE_CREATE;
672			}
673			if (busied)
674				nfs_open_file_clear_busy(nofp);
675			if (inuse)
676				nfs_mount_state_in_use_end(nmp, error);
677			goto restart;
678		}
679		if (nofp->nof_flags & NFS_OPEN_FILE_NEEDCLOSE) {
680			/*
681			 * If the file is marked as needing reopen, but this was the only
682			 * open on the file, just drop the open.
683			 */
684			nofp->nof_flags &= ~NFS_OPEN_FILE_NEEDCLOSE;
685			if ((nofp->nof_flags & NFS_OPEN_FILE_REOPEN) && (nofp->nof_opencnt == 1)) {
686				nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN;
687				nofp->nof_r--;
688				nofp->nof_opencnt--;
689				nofp->nof_access = 0;
690			} else if (!force) {
691				lck_mtx_unlock(&np->n_openlock);
692				if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
693					if (busied)
694						nfs_open_file_clear_busy(nofp);
695					if (inuse)
696						nfs_mount_state_in_use_end(nmp, 0);
697					if (!nfs4_reopen(nofp, NULL))
698						goto restart;
699				}
700				error = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx);
701				if (error) {
702					NP(np, "nfs_vnop_inactive: need close error: %d", error);
703					nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE;
704				}
705				if (busied)
706					nfs_open_file_clear_busy(nofp);
707				if (inuse)
708					nfs_mount_state_in_use_end(nmp, error);
709				goto restart;
710			}
711		}
712		if (nofp->nof_opencnt && !force)
713			NP(np, "nfs_vnop_inactive: file still open: %d", nofp->nof_opencnt);
714		if (!force && (nofp->nof_access || nofp->nof_deny ||
715		    nofp->nof_mmap_access || nofp->nof_mmap_deny ||
716		    nofp->nof_r || nofp->nof_w || nofp->nof_rw ||
717		    nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw ||
718		    nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw ||
719		    nofp->nof_d_r || nofp->nof_d_w || nofp->nof_d_rw ||
720		    nofp->nof_d_r_dw || nofp->nof_d_w_dw || nofp->nof_d_rw_dw ||
721		    nofp->nof_d_r_drw || nofp->nof_d_w_drw || nofp->nof_d_rw_drw)) {
722			NP(np, "nfs_vnop_inactive: non-zero access: %d %d %d %d # %u.%u %u.%u %u.%u dw %u.%u %u.%u %u.%u drw %u.%u %u.%u %u.%u",
723				nofp->nof_access, nofp->nof_deny,
724				nofp->nof_mmap_access, nofp->nof_mmap_deny,
725				nofp->nof_r, nofp->nof_d_r,
726				nofp->nof_w, nofp->nof_d_w,
727				nofp->nof_rw, nofp->nof_d_rw,
728				nofp->nof_r_dw, nofp->nof_d_r_dw,
729				nofp->nof_w_dw, nofp->nof_d_w_dw,
730				nofp->nof_rw_dw, nofp->nof_d_rw_dw,
731				nofp->nof_r_drw, nofp->nof_d_r_drw,
732				nofp->nof_w_drw, nofp->nof_d_w_drw,
733				nofp->nof_rw_drw, nofp->nof_d_rw_drw);
734		}
735		if (busied)
736			nfs_open_file_clear_busy(nofp);
737	}
738	lck_mtx_unlock(&np->n_openlock);
739
740	if (inuse && nfs_mount_state_in_use_end(nmp, error))
741		goto restart;
742
743	nfs_node_lock_force(np);
744
745	if (vnode_vtype(vp) != VDIR) {
746		nsp = np->n_sillyrename;
747		np->n_sillyrename = NULL;
748	} else {
749		nsp = NULL;
750	}
751
752	FSDBG_TOP(264, vp, np, np->n_flag, nsp);
753
754	if (!nsp) {
755		/* no silly file to clean up... */
756		/* clear all flags other than these */
757		np->n_flag &= (NMODIFIED);
758		nfs_node_unlock(np);
759		FSDBG_BOT(264, vp, np, np->n_flag, 0);
760		return (0);
761	}
762	nfs_node_unlock(np);
763
764	/* Remove the silly file that was rename'd earlier */
765
766	/* flush all the buffers */
767	nfs_vinvalbuf2(vp, V_SAVE, vfs_context_thread(ctx), nsp->nsr_cred, 1);
768
769	/* try to get the latest attributes */
770	attrerr = nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED);
771
772	/* Check if we should remove it from the node hash. */
773	/* Leave it if inuse or it has multiple hard links. */
774	if (vnode_isinuse(vp, 0) || (!attrerr && (nvattr.nva_nlink > 1))) {
775		unhash = 0;
776	} else {
777		unhash = 1;
778		ubc_setsize(vp, 0);
779	}
780
781	/* mark this node and the directory busy while we do the remove */
782	busyerror = nfs_node_set_busy2(nsp->nsr_dnp, np, vfs_context_thread(ctx));
783
784	/* lock the node while we remove the silly file */
785	lck_mtx_lock(nfs_node_hash_mutex);
786	while (np->n_hflag & NHLOCKED) {
787		np->n_hflag |= NHLOCKWANT;
788		msleep(np, nfs_node_hash_mutex, PINOD, "nfs_inactive", NULL);
789	}
790	np->n_hflag |= NHLOCKED;
791	lck_mtx_unlock(nfs_node_hash_mutex);
792
793	/* purge the name cache to deter others from finding it */
794	bzero(&cn, sizeof(cn));
795	cn.cn_nameptr = nsp->nsr_name;
796	cn.cn_namelen = nsp->nsr_namlen;
797	nfs_name_cache_purge(nsp->nsr_dnp, np, &cn, ctx);
798
799	FSDBG(264, np, np->n_size, np->n_vattr.nva_size, 0xf00d00f1);
800
801	/* now remove the silly file */
802	nfs_removeit(nsp);
803
804	/* clear all flags other than these */
805	nfs_node_lock_force(np);
806	np->n_flag &= (NMODIFIED);
807	nfs_node_unlock(np);
808
809	if (!busyerror)
810		nfs_node_clear_busy2(nsp->nsr_dnp, np);
811
812	if (unhash && vnode_isinuse(vp, 0)) {
813		/* vnode now inuse after silly remove? */
814		unhash = 0;
815		ubc_setsize(vp, np->n_size);
816	}
817
818	lck_mtx_lock(nfs_node_hash_mutex);
819	if (unhash) {
820		/*
821		 * remove nfsnode from hash now so we can't accidentally find it
822		 * again if another object gets created with the same filehandle
823		 * before this vnode gets reclaimed
824		 */
825		if (np->n_hflag & NHHASHED) {
826			LIST_REMOVE(np, n_hash);
827			np->n_hflag &= ~NHHASHED;
828			FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
829		}
830		vnode_recycle(vp);
831	}
832	/* unlock the node */
833	np->n_hflag &= ~NHLOCKED;
834	if (np->n_hflag & NHLOCKWANT) {
835		np->n_hflag &= ~NHLOCKWANT;
836		wakeup(np);
837	}
838	lck_mtx_unlock(nfs_node_hash_mutex);
839
840	/* cleanup sillyrename info */
841	if (nsp->nsr_cred != NOCRED)
842		kauth_cred_unref(&nsp->nsr_cred);
843	vnode_rele(NFSTOV(nsp->nsr_dnp));
844	FREE_ZONE(nsp, sizeof(*nsp), M_NFSREQ);
845
846	FSDBG_BOT(264, vp, np, np->n_flag, 0);
847	return (0);
848}
849
850/*
851 * Reclaim an nfsnode so that it can be used for other purposes.
852 */
853int
854nfs_vnop_reclaim(ap)
855	struct vnop_reclaim_args /* {
856		struct vnodeop_desc *a_desc;
857		vnode_t a_vp;
858		vfs_context_t a_context;
859	} */ *ap;
860{
861	vnode_t vp = ap->a_vp;
862	nfsnode_t np = VTONFS(vp);
863	vfs_context_t ctx = ap->a_context;
864	struct nfs_open_file *nofp, *nextnofp;
865	struct nfs_file_lock *nflp, *nextnflp;
866	struct nfs_lock_owner *nlop, *nextnlop;
867	struct nfsmount *nmp = np->n_mount ? VFSTONFS(np->n_mount) : NFSTONMP(np);
868	mount_t mp = vnode_mount(vp);
869	int force;
870
871	FSDBG_TOP(265, vp, np, np->n_flag, 0);
872	force = (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT));
873
874	/* There shouldn't be any open or lock state at this point */
875	lck_mtx_lock(&np->n_openlock);
876
877	if (nmp && (nmp->nm_vers >= NFS_VER4)) {
878		/* need to drop a delegation */
879		if (np->n_dreturn.tqe_next != NFSNOLIST) {
880			/* remove this node from the delegation return list */
881			lck_mtx_lock(&nmp->nm_lock);
882			if (np->n_dreturn.tqe_next != NFSNOLIST) {
883				TAILQ_REMOVE(&nmp->nm_dreturnq, np, n_dreturn);
884				np->n_dreturn.tqe_next = NFSNOLIST;
885			}
886			lck_mtx_unlock(&nmp->nm_lock);
887		}
888		if (np->n_dlink.tqe_next != NFSNOLIST) {
889			/* remove this node from the delegation list */
890			lck_mtx_lock(&nmp->nm_lock);
891			if (np->n_dlink.tqe_next != NFSNOLIST) {
892				TAILQ_REMOVE(&nmp->nm_delegations, np, n_dlink);
893				np->n_dlink.tqe_next = NFSNOLIST;
894			}
895			lck_mtx_unlock(&nmp->nm_lock);
896		}
897		if ((np->n_openflags & N_DELEG_MASK) && !force) {
898			/* try to return the delegation */
899			np->n_openflags &= ~N_DELEG_MASK;
900			nfs4_delegreturn_rpc(nmp, np->n_fhp, np->n_fhsize, &np->n_dstateid,
901				R_RECOVER, vfs_context_thread(ctx), vfs_context_ucred(ctx));
902		}
903		if (np->n_attrdirfh) {
904			FREE(np->n_attrdirfh, M_TEMP);
905			np->n_attrdirfh = NULL;
906		}
907	}
908
909	/* clean up file locks */
910	TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
911		if (!(nflp->nfl_flags & NFS_FILE_LOCK_DEAD) && !force) {
912			NP(np, "nfs_vnop_reclaim: lock 0x%llx 0x%llx 0x%x (bc %d)",
913				nflp->nfl_start, nflp->nfl_end, nflp->nfl_flags, nflp->nfl_blockcnt);
914		}
915		if (!(nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))) {
916			/* try sending an unlock RPC if it wasn't delegated */
917			if (!(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED) && !force)
918				nmp->nm_funcs->nf_unlock_rpc(np, nflp->nfl_owner, F_WRLCK, nflp->nfl_start, nflp->nfl_end, R_RECOVER,
919					NULL, nflp->nfl_owner->nlo_open_owner->noo_cred);
920			lck_mtx_lock(&nflp->nfl_owner->nlo_lock);
921			TAILQ_REMOVE(&nflp->nfl_owner->nlo_locks, nflp, nfl_lolink);
922			lck_mtx_unlock(&nflp->nfl_owner->nlo_lock);
923		}
924		TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
925		nfs_file_lock_destroy(nflp);
926	}
927	/* clean up lock owners */
928	TAILQ_FOREACH_SAFE(nlop, &np->n_lock_owners, nlo_link, nextnlop) {
929		if (!TAILQ_EMPTY(&nlop->nlo_locks) && !force)
930			NP(np, "nfs_vnop_reclaim: lock owner with locks");
931		TAILQ_REMOVE(&np->n_lock_owners, nlop, nlo_link);
932		nfs_lock_owner_destroy(nlop);
933	}
934	/* clean up open state */
935	if (np->n_openrefcnt && !force)
936		NP(np, "nfs_vnop_reclaim: still open: %d", np->n_openrefcnt);
937	TAILQ_FOREACH_SAFE(nofp, &np->n_opens, nof_link, nextnofp) {
938		if (nofp->nof_flags & NFS_OPEN_FILE_BUSY)
939			NP(np, "nfs_vnop_reclaim: open file busy");
940		if (!(np->n_flag & NREVOKE) && !(nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
941			if (nofp->nof_opencnt && !force)
942				NP(np, "nfs_vnop_reclaim: file still open: %d", nofp->nof_opencnt);
943			if (!force && (nofp->nof_access || nofp->nof_deny ||
944			    nofp->nof_mmap_access || nofp->nof_mmap_deny ||
945			    nofp->nof_r || nofp->nof_w || nofp->nof_rw ||
946			    nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw ||
947			    nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw ||
948			    nofp->nof_d_r || nofp->nof_d_w || nofp->nof_d_rw ||
949			    nofp->nof_d_r_dw || nofp->nof_d_w_dw || nofp->nof_d_rw_dw ||
950			    nofp->nof_d_r_drw || nofp->nof_d_w_drw || nofp->nof_d_rw_drw)) {
951				NP(np, "nfs_vnop_reclaim: non-zero access: %d %d %d %d # %u.%u %u.%u %u.%u dw %u.%u %u.%u %u.%u drw %u.%u %u.%u %u.%u",
952					nofp->nof_access, nofp->nof_deny,
953					nofp->nof_mmap_access, nofp->nof_mmap_deny,
954					nofp->nof_r, nofp->nof_d_r,
955					nofp->nof_w, nofp->nof_d_w,
956					nofp->nof_rw, nofp->nof_d_rw,
957					nofp->nof_r_dw, nofp->nof_d_r_dw,
958					nofp->nof_w_dw, nofp->nof_d_w_dw,
959					nofp->nof_rw_dw, nofp->nof_d_rw_dw,
960					nofp->nof_r_drw, nofp->nof_d_r_drw,
961					nofp->nof_w_drw, nofp->nof_d_w_drw,
962					nofp->nof_rw_drw, nofp->nof_d_rw_drw);
963				/* try sending a close RPC if it wasn't delegated */
964				if (nofp->nof_r || nofp->nof_w || nofp->nof_rw ||
965				    nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw ||
966				    nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw)
967					nfs4_close_rpc(np, nofp, NULL, nofp->nof_owner->noo_cred, R_RECOVER);
968			}
969		}
970		TAILQ_REMOVE(&np->n_opens, nofp, nof_link);
971		nfs_open_file_destroy(nofp);
972	}
973	lck_mtx_unlock(&np->n_openlock);
974
975	if (np->n_monlink.le_next != NFSNOLIST) {
976		/* Wait for any in-progress getattr to complete, */
977		/* then remove this node from the monitored node list. */
978		lck_mtx_lock(&nmp->nm_lock);
979		while (np->n_mflag & NMMONSCANINPROG) {
980			struct timespec ts = { 1, 0 };
981			np->n_mflag |= NMMONSCANWANT;
982			msleep(&np->n_mflag, &nmp->nm_lock, PZERO-1, "nfswaitmonscan", &ts);
983		}
984		if (np->n_monlink.le_next != NFSNOLIST) {
985			LIST_REMOVE(np, n_monlink);
986			np->n_monlink.le_next = NFSNOLIST;
987		}
988		lck_mtx_unlock(&nmp->nm_lock);
989	}
990
991	lck_mtx_lock(nfs_buf_mutex);
992	if (!force && (!LIST_EMPTY(&np->n_dirtyblkhd) || !LIST_EMPTY(&np->n_cleanblkhd)))
993		NP(np, "nfs_reclaim: dropping %s buffers", (!LIST_EMPTY(&np->n_dirtyblkhd) ? "dirty" : "clean"));
994	lck_mtx_unlock(nfs_buf_mutex);
995	nfs_vinvalbuf(vp, V_IGNORE_WRITEERR, ap->a_context, 0);
996
997	lck_mtx_lock(nfs_node_hash_mutex);
998
999	if ((vnode_vtype(vp) != VDIR) && np->n_sillyrename) {
1000		if (!force)
1001			NP(np, "nfs_reclaim: leaving unlinked file %s", np->n_sillyrename->nsr_name);
1002		if (np->n_sillyrename->nsr_cred != NOCRED)
1003			kauth_cred_unref(&np->n_sillyrename->nsr_cred);
1004		vnode_rele(NFSTOV(np->n_sillyrename->nsr_dnp));
1005		FREE_ZONE(np->n_sillyrename, sizeof(*np->n_sillyrename), M_NFSREQ);
1006	}
1007
1008	vnode_removefsref(vp);
1009
1010	if (np->n_hflag & NHHASHED) {
1011		LIST_REMOVE(np, n_hash);
1012		np->n_hflag &= ~NHHASHED;
1013		FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
1014	}
1015	lck_mtx_unlock(nfs_node_hash_mutex);
1016
1017	/*
1018	 * Free up any directory cookie structures and large file handle
1019	 * structures that might be associated with this nfs node.
1020	 */
1021	nfs_node_lock_force(np);
1022	if ((vnode_vtype(vp) == VDIR) && np->n_cookiecache)
1023		FREE_ZONE(np->n_cookiecache, sizeof(struct nfsdmap), M_NFSDIROFF);
1024	if (np->n_fhsize > NFS_SMALLFH)
1025		FREE_ZONE(np->n_fhp, np->n_fhsize, M_NFSBIGFH);
1026	if (np->n_vattr.nva_acl)
1027		kauth_acl_free(np->n_vattr.nva_acl);
1028	nfs_node_unlock(np);
1029	vnode_clearfsnode(vp);
1030
1031	if (np->n_parent) {
1032		if (!vnode_get(np->n_parent)) {
1033			vnode_rele(np->n_parent);
1034			vnode_put(np->n_parent);
1035		}
1036		np->n_parent = NULL;
1037	}
1038
1039	lck_mtx_destroy(&np->n_lock, nfs_node_lck_grp);
1040	lck_rw_destroy(&np->n_datalock, nfs_data_lck_grp);
1041	lck_mtx_destroy(&np->n_openlock, nfs_open_grp);
1042
1043	FSDBG_BOT(265, vp, np, np->n_flag, 0xd1ed1e);
1044	FREE_ZONE(np, sizeof(struct nfsnode), M_NFSNODE);
1045	return (0);
1046}
1047
1048/*
1049 * Acquire an NFS node lock
1050 */
1051
1052int
1053nfs_node_lock_internal(nfsnode_t np, int force)
1054{
1055	FSDBG_TOP(268, np, force, 0, 0);
1056	lck_mtx_lock(&np->n_lock);
1057	if (!force && !(np->n_hflag && NHHASHED)) {
1058		FSDBG_BOT(268, np, 0xdead, 0, 0);
1059		lck_mtx_unlock(&np->n_lock);
1060		return (ENOENT);
1061	}
1062	FSDBG_BOT(268, np, force, 0, 0);
1063	return (0);
1064}
1065
1066int
1067nfs_node_lock(nfsnode_t np)
1068{
1069	return nfs_node_lock_internal(np, 0);
1070}
1071
1072void
1073nfs_node_lock_force(nfsnode_t np)
1074{
1075	nfs_node_lock_internal(np, 1);
1076}
1077
1078/*
1079 * Release an NFS node lock
1080 */
1081void
1082nfs_node_unlock(nfsnode_t np)
1083{
1084	FSDBG(269, np, current_thread(), 0, 0);
1085	lck_mtx_unlock(&np->n_lock);
1086}
1087
1088/*
1089 * Acquire 2 NFS node locks
1090 *   - locks taken in reverse address order
1091 *   - both or neither of the locks are taken
1092 *   - only one lock taken per node (dup nodes are skipped)
1093 */
1094int
1095nfs_node_lock2(nfsnode_t np1, nfsnode_t np2)
1096{
1097	nfsnode_t first, second;
1098	int error;
1099
1100	first = (np1 > np2) ? np1 : np2;
1101	second = (np1 > np2) ? np2 : np1;
1102	if ((error = nfs_node_lock(first)))
1103		return (error);
1104	if (np1 == np2)
1105		return (error);
1106	if ((error = nfs_node_lock(second)))
1107		nfs_node_unlock(first);
1108	return (error);
1109}
1110
1111void
1112nfs_node_unlock2(nfsnode_t np1, nfsnode_t np2)
1113{
1114	nfs_node_unlock(np1);
1115	if (np1 != np2)
1116		nfs_node_unlock(np2);
1117}
1118
1119/*
1120 * Manage NFS node busy state.
1121 * (Similar to NFS node locks above)
1122 */
1123int
1124nfs_node_set_busy(nfsnode_t np, thread_t thd)
1125{
1126	struct timespec ts = { 2, 0 };
1127	int error;
1128
1129	if ((error = nfs_node_lock(np)))
1130		return (error);
1131	while (ISSET(np->n_flag, NBUSY)) {
1132		SET(np->n_flag, NBUSYWANT);
1133		msleep(np, &np->n_lock, PZERO-1, "nfsbusywant", &ts);
1134		if ((error = nfs_sigintr(NFSTONMP(np), NULL, thd, 0)))
1135			break;
1136	}
1137	if (!error)
1138		SET(np->n_flag, NBUSY);
1139	nfs_node_unlock(np);
1140	return (error);
1141}
1142
1143void
1144nfs_node_clear_busy(nfsnode_t np)
1145{
1146	int wanted;
1147
1148	nfs_node_lock_force(np);
1149	wanted = ISSET(np->n_flag, NBUSYWANT);
1150	CLR(np->n_flag, NBUSY|NBUSYWANT);
1151	nfs_node_unlock(np);
1152	if (wanted)
1153		wakeup(np);
1154}
1155
1156int
1157nfs_node_set_busy2(nfsnode_t np1, nfsnode_t np2, thread_t thd)
1158{
1159	nfsnode_t first, second;
1160	int error;
1161
1162	first = (np1 > np2) ? np1 : np2;
1163	second = (np1 > np2) ? np2 : np1;
1164	if ((error = nfs_node_set_busy(first, thd)))
1165		return (error);
1166	if (np1 == np2)
1167		return (error);
1168	if ((error = nfs_node_set_busy(second, thd)))
1169		nfs_node_clear_busy(first);
1170	return (error);
1171}
1172
1173void
1174nfs_node_clear_busy2(nfsnode_t np1, nfsnode_t np2)
1175{
1176	nfs_node_clear_busy(np1);
1177	if (np1 != np2)
1178		nfs_node_clear_busy(np2);
1179}
1180
1181/* helper function to sort four nodes in reverse address order (no dupes) */
1182static void
1183nfs_node_sort4(nfsnode_t np1, nfsnode_t np2, nfsnode_t np3, nfsnode_t np4, nfsnode_t *list, int *lcntp)
1184{
1185	nfsnode_t na[2], nb[2];
1186	int a, b, i, lcnt;
1187
1188	/* sort pairs then merge */
1189	na[0] = (np1 > np2) ? np1 : np2;
1190	na[1] = (np1 > np2) ? np2 : np1;
1191	nb[0] = (np3 > np4) ? np3 : np4;
1192	nb[1] = (np3 > np4) ? np4 : np3;
1193	for (a = b = i = lcnt = 0; i < 4; i++) {
1194		if (a >= 2)
1195			list[lcnt] = nb[b++];
1196		else if ((b >= 2) || (na[a] >= nb[b]))
1197			list[lcnt] = na[a++];
1198		else
1199			list[lcnt] = nb[b++];
1200		if ((lcnt <= 0) || (list[lcnt] != list[lcnt-1]))
1201			lcnt++; /* omit dups */
1202	}
1203	if (list[lcnt-1] == NULL)
1204		lcnt--;
1205	*lcntp = lcnt;
1206}
1207
1208int
1209nfs_node_set_busy4(nfsnode_t np1, nfsnode_t np2, nfsnode_t np3, nfsnode_t np4, thread_t thd)
1210{
1211	nfsnode_t list[4];
1212	int i, lcnt, error;
1213
1214	nfs_node_sort4(np1, np2, np3, np4, list, &lcnt);
1215
1216	/* Now we can lock using list[0 - lcnt-1] */
1217	for (i = 0; i < lcnt; ++i)
1218		if ((error = nfs_node_set_busy(list[i], thd))) {
1219			/* Drop any locks we acquired. */
1220			while (--i >= 0)
1221				nfs_node_clear_busy(list[i]);
1222			return (error);
1223		}
1224	return (0);
1225}
1226
1227void
1228nfs_node_clear_busy4(nfsnode_t np1, nfsnode_t np2, nfsnode_t np3, nfsnode_t np4)
1229{
1230	nfsnode_t list[4];
1231	int lcnt;
1232
1233	nfs_node_sort4(np1, np2, np3, np4, list, &lcnt);
1234	while (--lcnt >= 0)
1235		nfs_node_clear_busy(list[lcnt]);
1236}
1237
1238/*
1239 * Acquire an NFS node data lock
1240 */
1241void
1242nfs_data_lock(nfsnode_t np, int locktype)
1243{
1244	nfs_data_lock_internal(np, locktype, 1);
1245}
1246void
1247nfs_data_lock_noupdate(nfsnode_t np, int locktype)
1248{
1249	nfs_data_lock_internal(np, locktype, 0);
1250}
1251void
1252nfs_data_lock_internal(nfsnode_t np, int locktype, int updatesize)
1253{
1254	FSDBG_TOP(270, np, locktype, np->n_datalockowner, 0);
1255	if (locktype == NFS_DATA_LOCK_SHARED) {
1256		if (updatesize && ISSET(np->n_flag, NUPDATESIZE))
1257			nfs_data_update_size(np, 0);
1258		lck_rw_lock_shared(&np->n_datalock);
1259	} else {
1260		lck_rw_lock_exclusive(&np->n_datalock);
1261		np->n_datalockowner = current_thread();
1262		if (updatesize && ISSET(np->n_flag, NUPDATESIZE))
1263			nfs_data_update_size(np, 1);
1264	}
1265	FSDBG_BOT(270, np, locktype, np->n_datalockowner, 0);
1266}
1267
1268/*
1269 * Release an NFS node data lock
1270 */
1271void
1272nfs_data_unlock(nfsnode_t np)
1273{
1274	nfs_data_unlock_internal(np, 1);
1275}
1276void
1277nfs_data_unlock_noupdate(nfsnode_t np)
1278{
1279	nfs_data_unlock_internal(np, 0);
1280}
1281void
1282nfs_data_unlock_internal(nfsnode_t np, int updatesize)
1283{
1284	int mine = (np->n_datalockowner == current_thread());
1285	FSDBG_TOP(271, np, np->n_datalockowner, current_thread(), 0);
1286	if (updatesize && mine && ISSET(np->n_flag, NUPDATESIZE))
1287		nfs_data_update_size(np, 1);
1288	np->n_datalockowner = NULL;
1289	lck_rw_done(&np->n_datalock);
1290	if (updatesize && !mine && ISSET(np->n_flag, NUPDATESIZE))
1291		nfs_data_update_size(np, 0);
1292	FSDBG_BOT(271, np, np->n_datalockowner, current_thread(), 0);
1293}
1294
1295
1296/*
1297 * update an NFS node's size
1298 */
1299void
1300nfs_data_update_size(nfsnode_t np, int datalocked)
1301{
1302	int error;
1303
1304	FSDBG_TOP(272, np, np->n_flag, np->n_size, np->n_newsize);
1305	if (!datalocked) {
1306		nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
1307		/* grabbing data lock will automatically update size */
1308		nfs_data_unlock(np);
1309		FSDBG_BOT(272, np, np->n_flag, np->n_size, np->n_newsize);
1310		return;
1311	}
1312	error = nfs_node_lock(np);
1313	if (error || !ISSET(np->n_flag, NUPDATESIZE)) {
1314		if (!error)
1315			nfs_node_unlock(np);
1316		FSDBG_BOT(272, np, np->n_flag, np->n_size, np->n_newsize);
1317		return;
1318	}
1319	CLR(np->n_flag, NUPDATESIZE);
1320	np->n_size = np->n_newsize;
1321	/* make sure we invalidate buffers the next chance we get */
1322	SET(np->n_flag, NNEEDINVALIDATE);
1323	nfs_node_unlock(np);
1324	ubc_setsize(NFSTOV(np), (off_t)np->n_size); /* XXX error? */
1325	FSDBG_BOT(272, np, np->n_flag, np->n_size, np->n_newsize);
1326}
1327
1328#define DODEBUG 1
1329
1330int
1331nfs_mount_is_dirty(mount_t mp)
1332{
1333	u_long i;
1334	nfsnode_t np;
1335#ifdef DODEBUG
1336	struct timeval now, then, diff;
1337	u_long ncnt = 0;
1338	microuptime(&now);
1339#endif
1340	lck_mtx_lock(nfs_node_hash_mutex);
1341	for (i = 0; i <= nfsnodehash; i++) {
1342		LIST_FOREACH(np, &nfsnodehashtbl[i], n_hash) {
1343#ifdef DODEBUG
1344			ncnt++;
1345#endif
1346			if (np->n_mount == mp && !LIST_EMPTY(&np->n_dirtyblkhd))
1347				goto out;
1348		}
1349	}
1350out:
1351	lck_mtx_unlock(nfs_node_hash_mutex);
1352#ifdef DODEBUG
1353	microuptime(&then);
1354	timersub(&then, &now, &diff);
1355
1356	NFS_DBG(NFS_FAC_SOCK, 7, "mount_is_dirty for %s took %lld mics for %ld slots and %ld nodes return %d\n",
1357		vfs_statfs(mp)->f_mntfromname, (uint64_t)diff.tv_sec * 1000000LL + diff.tv_usec, i, ncnt, (i <= nfsnodehash));
1358#endif
1359
1360	return (i <=  nfsnodehash);
1361}
1362