1/*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993, 1995
31 *	The Regents of the University of California.  All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 *    must display the following acknowledgement:
46 *	This product includes software developed by the University of
47 *	California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 *    may be used to endorse or promote products derived from this software
50 *    without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 *	@(#)nfs_vfsops.c	8.12 (Berkeley) 5/20/95
65 * FreeBSD-Id: nfs_vfsops.c,v 1.52 1997/11/12 05:42:21 julian Exp $
66 */
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections.  This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/conf.h>
77#include <sys/ioctl.h>
78#include <sys/signal.h>
79#include <sys/proc_internal.h> /* for fs rooting to update rootdir in fdp */
80#include <sys/kauth.h>
81#include <sys/vnode_internal.h>
82#include <sys/malloc.h>
83#include <sys/kernel.h>
84#include <sys/sysctl.h>
85#include <sys/mount_internal.h>
86#include <sys/kpi_mbuf.h>
87#include <sys/socket.h>
88#include <sys/socketvar.h>
89#include <sys/fcntl.h>
90#include <sys/quota.h>
91#include <sys/priv.h>
92#include <libkern/OSAtomic.h>
93
94#include <sys/vm.h>
95#include <sys/vmparam.h>
96
97#if !defined(NO_MOUNT_PRIVATE)
98#include <sys/filedesc.h>
99#endif /* NO_MOUNT_PRIVATE */
100
101#include <net/if.h>
102#include <net/route.h>
103#include <netinet/in.h>
104
105#include <nfs/rpcv2.h>
106#include <nfs/krpc.h>
107#include <nfs/nfsproto.h>
108#include <nfs/nfs.h>
109#include <nfs/nfsnode.h>
110#include <nfs/nfs_gss.h>
111#include <nfs/nfsmount.h>
112#include <nfs/xdr_subs.h>
113#include <nfs/nfsm_subs.h>
114#include <nfs/nfsdiskless.h>
115#include <nfs/nfs_lock.h>
116#if CONFIG_MACF
117#include <security/mac_framework.h>
118#endif
119
120#include <pexpert/pexpert.h>
121
122/*
123 * NFS client globals
124 */
125
126int nfs_ticks;
127static lck_grp_t *nfs_global_grp, *nfs_mount_grp;
128lck_mtx_t *nfs_global_mutex;
129uint32_t nfs_fs_attr_bitmap[NFS_ATTR_BITMAP_LEN];
130uint32_t nfs_object_attr_bitmap[NFS_ATTR_BITMAP_LEN];
131uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN];
132struct nfsclientidlist nfsclientids;
133
134/* NFS requests */
135struct nfs_reqqhead nfs_reqq;
136lck_grp_t *nfs_request_grp;
137lck_mtx_t *nfs_request_mutex;
138thread_call_t nfs_request_timer_call;
139int nfs_request_timer_on;
140u_int32_t nfs_xid = 0;
141u_int32_t nfs_xidwrap = 0;		/* to build a (non-wrapping) 64 bit xid */
142
143thread_call_t nfs_buf_timer_call;
144
145/* NFSv4 */
146lck_grp_t *nfs_open_grp;
147uint32_t nfs_open_owner_seqnum = 0;
148uint32_t nfs_lock_owner_seqnum = 0;
149thread_call_t nfs4_callback_timer_call;
150int nfs4_callback_timer_on = 0;
151
152/* nfsiod */
153lck_grp_t *nfsiod_lck_grp;
154lck_mtx_t *nfsiod_mutex;
155struct nfsiodlist nfsiodfree, nfsiodwork;
156struct nfsiodmountlist nfsiodmounts;
157int nfsiod_thread_count = 0;
158int nfsiod_thread_max = NFS_DEFASYNCTHREAD;
159int nfs_max_async_writes = NFS_DEFMAXASYNCWRITES;
160
161int nfs_iosize = NFS_IOSIZE;
162int nfs_access_cache_timeout = NFS_MAXATTRTIMO;
163int nfs_access_delete = 1; /* too many servers get this wrong - workaround on by default */
164int nfs_access_dotzfs = 1;
165int nfs_access_for_getattr = 0;
166int nfs_allow_async = 0;
167int nfs_statfs_rate_limit = NFS_DEFSTATFSRATELIMIT;
168int nfs_lockd_mounts = 0;
169int nfs_lockd_request_sent = 0;
170int nfs_idmap_ctrl = NFS_IDMAP_CTRL_USE_IDMAP_SERVICE;
171int nfs_callback_port = 0;
172
173int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
174int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
175
176
177int		mountnfs(char *, mount_t, vfs_context_t, vnode_t *);
178static int	nfs_mount_diskless(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *, vfs_context_t);
179#if !defined(NO_MOUNT_PRIVATE)
180static int	nfs_mount_diskless_private(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *, vfs_context_t);
181#endif /* NO_MOUNT_PRIVATE */
182int		nfs_mount_connect(struct nfsmount *);
183void		nfs_mount_cleanup(struct nfsmount *);
184int		nfs_mountinfo_assemble(struct nfsmount *, struct xdrbuf *);
185int		nfs4_mount_update_path_with_symlink(struct nfsmount *, struct nfs_fs_path *, uint32_t, fhandle_t *, int *, fhandle_t *, vfs_context_t);
186
187/*
188 * NFS VFS operations.
189 */
190int	nfs_vfs_mount(mount_t, vnode_t, user_addr_t, vfs_context_t);
191int	nfs_vfs_start(mount_t, int, vfs_context_t);
192int	nfs_vfs_unmount(mount_t, int, vfs_context_t);
193int	nfs_vfs_root(mount_t, vnode_t *, vfs_context_t);
194int	nfs_vfs_quotactl(mount_t, int, uid_t, caddr_t, vfs_context_t);
195int	nfs_vfs_getattr(mount_t, struct vfs_attr *, vfs_context_t);
196int	nfs_vfs_sync(mount_t, int, vfs_context_t);
197int	nfs_vfs_vget(mount_t, ino64_t, vnode_t *, vfs_context_t);
198int	nfs_vfs_vptofh(vnode_t, int *, unsigned char *, vfs_context_t);
199int	nfs_vfs_fhtovp(mount_t, int, unsigned char *, vnode_t *, vfs_context_t);
200int	nfs_vfs_init(struct vfsconf *);
201int	nfs_vfs_sysctl(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t);
202
203struct vfsops nfs_vfsops = {
204	nfs_vfs_mount,
205	nfs_vfs_start,
206	nfs_vfs_unmount,
207	nfs_vfs_root,
208	nfs_vfs_quotactl,
209	nfs_vfs_getattr,
210	nfs_vfs_sync,
211	nfs_vfs_vget,
212	nfs_vfs_fhtovp,
213	nfs_vfs_vptofh,
214	nfs_vfs_init,
215	nfs_vfs_sysctl,
216	NULL,		/* setattr */
217	{ NULL,		/* reserved */
218	  NULL,		/* reserved */
219	  NULL,		/* reserved */
220	  NULL,		/* reserved */
221	  NULL,		/* reserved */
222	  NULL,		/* reserved */
223	  NULL }	/* reserved */
224};
225
226
227/*
228 * version-specific NFS functions
229 */
230int nfs3_mount(struct nfsmount *, vfs_context_t, nfsnode_t *);
231int nfs4_mount(struct nfsmount *, vfs_context_t, nfsnode_t *);
232int nfs3_fsinfo(struct nfsmount *, nfsnode_t, vfs_context_t);
233int nfs3_update_statfs(struct nfsmount *, vfs_context_t);
234int nfs4_update_statfs(struct nfsmount *, vfs_context_t);
235#if !QUOTA
236#define nfs3_getquota	NULL
237#define nfs4_getquota	NULL
238#else
239int nfs3_getquota(struct nfsmount *, vfs_context_t, uid_t, int, struct dqblk *);
240int nfs4_getquota(struct nfsmount *, vfs_context_t, uid_t, int, struct dqblk *);
241#endif
242
243struct nfs_funcs nfs3_funcs = {
244	nfs3_mount,
245	nfs3_update_statfs,
246	nfs3_getquota,
247	nfs3_access_rpc,
248	nfs3_getattr_rpc,
249	nfs3_setattr_rpc,
250	nfs3_read_rpc_async,
251	nfs3_read_rpc_async_finish,
252	nfs3_readlink_rpc,
253	nfs3_write_rpc_async,
254	nfs3_write_rpc_async_finish,
255	nfs3_commit_rpc,
256	nfs3_lookup_rpc_async,
257	nfs3_lookup_rpc_async_finish,
258	nfs3_remove_rpc,
259	nfs3_rename_rpc,
260	nfs3_setlock_rpc,
261	nfs3_unlock_rpc,
262	nfs3_getlock_rpc
263	};
264struct nfs_funcs nfs4_funcs = {
265	nfs4_mount,
266	nfs4_update_statfs,
267	nfs4_getquota,
268	nfs4_access_rpc,
269	nfs4_getattr_rpc,
270	nfs4_setattr_rpc,
271	nfs4_read_rpc_async,
272	nfs4_read_rpc_async_finish,
273	nfs4_readlink_rpc,
274	nfs4_write_rpc_async,
275	nfs4_write_rpc_async_finish,
276	nfs4_commit_rpc,
277	nfs4_lookup_rpc_async,
278	nfs4_lookup_rpc_async_finish,
279	nfs4_remove_rpc,
280	nfs4_rename_rpc,
281	nfs4_setlock_rpc,
282	nfs4_unlock_rpc,
283	nfs4_getlock_rpc
284	};
285
286/*
287 * Called once to initialize data structures...
288 */
289int
290nfs_vfs_init(__unused struct vfsconf *vfsp)
291{
292	int i;
293
294	/*
295	 * Check to see if major data structures haven't bloated.
296	 */
297	if (sizeof (struct nfsnode) > NFS_NODEALLOC) {
298		printf("struct nfsnode bloated (> %dbytes)\n", NFS_NODEALLOC);
299		printf("Try reducing NFS_SMALLFH\n");
300	}
301	if (sizeof (struct nfsmount) > NFS_MNTALLOC)
302		printf("struct nfsmount bloated (> %dbytes)\n", NFS_MNTALLOC);
303
304	nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
305	if (nfs_ticks < 1)
306		nfs_ticks = 1;
307
308	/* init async I/O thread pool state */
309	TAILQ_INIT(&nfsiodfree);
310	TAILQ_INIT(&nfsiodwork);
311	TAILQ_INIT(&nfsiodmounts);
312	nfsiod_lck_grp = lck_grp_alloc_init("nfsiod", LCK_GRP_ATTR_NULL);
313	nfsiod_mutex = lck_mtx_alloc_init(nfsiod_lck_grp, LCK_ATTR_NULL);
314
315	/* init lock groups, etc. */
316	nfs_mount_grp = lck_grp_alloc_init("nfs_mount", LCK_GRP_ATTR_NULL);
317	nfs_open_grp = lck_grp_alloc_init("nfs_open", LCK_GRP_ATTR_NULL);
318	nfs_global_grp = lck_grp_alloc_init("nfs_global", LCK_GRP_ATTR_NULL);
319
320	nfs_global_mutex = lck_mtx_alloc_init(nfs_global_grp, LCK_ATTR_NULL);
321
322	/* init request list mutex */
323	nfs_request_grp = lck_grp_alloc_init("nfs_request", LCK_GRP_ATTR_NULL);
324	nfs_request_mutex = lck_mtx_alloc_init(nfs_request_grp, LCK_ATTR_NULL);
325
326	/* initialize NFS request list */
327	TAILQ_INIT(&nfs_reqq);
328
329	nfs_nbinit();			/* Init the nfsbuf table */
330	nfs_nhinit();			/* Init the nfsnode table */
331	nfs_lockinit();			/* Init the nfs lock state */
332	nfs_gss_init();			/* Init RPCSEC_GSS security */
333
334	/* NFSv4 stuff */
335	NFS4_PER_FS_ATTRIBUTES(nfs_fs_attr_bitmap);
336	NFS4_PER_OBJECT_ATTRIBUTES(nfs_object_attr_bitmap);
337	NFS4_DEFAULT_ATTRIBUTES(nfs_getattr_bitmap);
338	for (i=0; i < NFS_ATTR_BITMAP_LEN; i++)
339		nfs_getattr_bitmap[i] &= nfs_object_attr_bitmap[i];
340	TAILQ_INIT(&nfsclientids);
341
342	/* initialize NFS timer callouts */
343	nfs_request_timer_call = thread_call_allocate(nfs_request_timer, NULL);
344	nfs_buf_timer_call = thread_call_allocate(nfs_buf_timer, NULL);
345	nfs4_callback_timer_call = thread_call_allocate(nfs4_callback_timer, NULL);
346
347	return (0);
348}
349
350/*
351 * nfs statfs call
352 */
353int
354nfs3_update_statfs(struct nfsmount *nmp, vfs_context_t ctx)
355{
356	nfsnode_t np;
357	int error = 0, lockerror, status, nfsvers;
358	u_int64_t xid;
359	struct nfsm_chain nmreq, nmrep;
360	uint32_t val = 0;
361
362	nfsvers = nmp->nm_vers;
363	np = nmp->nm_dnp;
364	if (!np)
365		return (ENXIO);
366	if ((error = vnode_get(NFSTOV(np))))
367		return (error);
368
369	nfsm_chain_null(&nmreq);
370	nfsm_chain_null(&nmrep);
371
372	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
373	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
374	nfsm_chain_build_done(error, &nmreq);
375	nfsmout_if(error);
376	error = nfs_request(np, NULL, &nmreq, NFSPROC_FSSTAT, ctx, NULL, &nmrep, &xid, &status);
377	if ((lockerror = nfs_node_lock(np)))
378		error = lockerror;
379	if (nfsvers == NFS_VER3)
380		nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
381	if (!lockerror)
382		nfs_node_unlock(np);
383	if (!error)
384		error = status;
385	nfsm_assert(error, NFSTONMP(np), ENXIO);
386	nfsmout_if(error);
387	lck_mtx_lock(&nmp->nm_lock);
388	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_TOTAL);
389	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_FREE);
390	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_AVAIL);
391	if (nfsvers == NFS_VER3) {
392		NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_AVAIL);
393		NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_TOTAL);
394		NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_FREE);
395		nmp->nm_fsattr.nfsa_bsize = NFS_FABLKSIZE;
396		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_space_total);
397		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_space_free);
398		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_space_avail);
399		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_files_total);
400		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_files_free);
401		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_files_avail);
402		// skip invarsec
403	} else {
404		nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED); // skip tsize?
405		nfsm_chain_get_32(error, &nmrep, nmp->nm_fsattr.nfsa_bsize);
406		nfsm_chain_get_32(error, &nmrep, val);
407		nfsmout_if(error);
408		if (nmp->nm_fsattr.nfsa_bsize <= 0)
409			nmp->nm_fsattr.nfsa_bsize = NFS_FABLKSIZE;
410		nmp->nm_fsattr.nfsa_space_total = (uint64_t)val * nmp->nm_fsattr.nfsa_bsize;
411		nfsm_chain_get_32(error, &nmrep, val);
412		nfsmout_if(error);
413		nmp->nm_fsattr.nfsa_space_free = (uint64_t)val * nmp->nm_fsattr.nfsa_bsize;
414		nfsm_chain_get_32(error, &nmrep, val);
415		nfsmout_if(error);
416		nmp->nm_fsattr.nfsa_space_avail = (uint64_t)val * nmp->nm_fsattr.nfsa_bsize;
417	}
418	lck_mtx_unlock(&nmp->nm_lock);
419nfsmout:
420	nfsm_chain_cleanup(&nmreq);
421	nfsm_chain_cleanup(&nmrep);
422	vnode_put(NFSTOV(np));
423	return (error);
424}
425
426int
427nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx)
428{
429	nfsnode_t np;
430	int error = 0, lockerror, status, nfsvers, numops;
431	u_int64_t xid;
432	struct nfsm_chain nmreq, nmrep;
433	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
434	struct nfs_vattr nvattr;
435	struct nfsreq_secinfo_args si;
436
437	nfsvers = nmp->nm_vers;
438	np = nmp->nm_dnp;
439	if (!np)
440		return (ENXIO);
441	if ((error = vnode_get(NFSTOV(np))))
442		return (error);
443
444	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
445	NVATTR_INIT(&nvattr);
446	nfsm_chain_null(&nmreq);
447	nfsm_chain_null(&nmrep);
448
449	// PUTFH + GETATTR
450	numops = 2;
451	nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
452	nfsm_chain_add_compound_header(error, &nmreq, "statfs", numops);
453	numops--;
454	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
455	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
456	numops--;
457	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
458	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
459	NFS4_STATFS_ATTRIBUTES(bitmap);
460	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
461	nfsm_chain_build_done(error, &nmreq);
462	nfsm_assert(error, (numops == 0), EPROTO);
463	nfsmout_if(error);
464	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
465	nfsm_chain_skip_tag(error, &nmrep);
466	nfsm_chain_get_32(error, &nmrep, numops);
467	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
468	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
469	nfsm_assert(error, NFSTONMP(np), ENXIO);
470	nfsmout_if(error);
471	lck_mtx_lock(&nmp->nm_lock);
472	error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL, NULL);
473	lck_mtx_unlock(&nmp->nm_lock);
474	nfsmout_if(error);
475	if ((lockerror = nfs_node_lock(np)))
476		error = lockerror;
477	if (!error)
478		nfs_loadattrcache(np, &nvattr, &xid, 0);
479	if (!lockerror)
480		nfs_node_unlock(np);
481	nfsm_assert(error, NFSTONMP(np), ENXIO);
482	nfsmout_if(error);
483	nmp->nm_fsattr.nfsa_bsize = NFS_FABLKSIZE;
484nfsmout:
485	NVATTR_CLEANUP(&nvattr);
486	nfsm_chain_cleanup(&nmreq);
487	nfsm_chain_cleanup(&nmrep);
488	vnode_put(NFSTOV(np));
489	return (error);
490}
491
492
493/*
494 * The NFS VFS_GETATTR function: "statfs"-type information is retrieved
495 * using the nf_update_statfs() function, and other attributes are cobbled
496 * together from whatever sources we can (getattr, fsinfo, pathconf).
497 */
498int
499nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx)
500{
501	struct nfsmount *nmp;
502	uint32_t bsize;
503	int error = 0, nfsvers;
504
505	if (!(nmp = VFSTONFS(mp)))
506		return (ENXIO);
507	nfsvers = nmp->nm_vers;
508
509	if (VFSATTR_IS_ACTIVE(fsap, f_bsize)  ||
510	    VFSATTR_IS_ACTIVE(fsap, f_iosize) ||
511	    VFSATTR_IS_ACTIVE(fsap, f_blocks) ||
512	    VFSATTR_IS_ACTIVE(fsap, f_bfree)  ||
513	    VFSATTR_IS_ACTIVE(fsap, f_bavail) ||
514	    VFSATTR_IS_ACTIVE(fsap, f_bused)  ||
515	    VFSATTR_IS_ACTIVE(fsap, f_files)  ||
516	    VFSATTR_IS_ACTIVE(fsap, f_ffree)) {
517		int statfsrate = nfs_statfs_rate_limit;
518		int refresh = 1;
519
520		/*
521		 * Are we rate-limiting statfs RPCs?
522		 * (Treat values less than 1 or greater than 1,000,000 as no limit.)
523		 */
524		if ((statfsrate > 0) && (statfsrate < 1000000)) {
525			struct timeval now;
526			uint32_t stamp;
527
528			microuptime(&now);
529			lck_mtx_lock(&nmp->nm_lock);
530			stamp = (now.tv_sec * statfsrate) + (now.tv_usec / (1000000/statfsrate));
531			if (stamp != nmp->nm_fsattrstamp) {
532				refresh = 1;
533				nmp->nm_fsattrstamp = stamp;
534			} else {
535				refresh = 0;
536			}
537			lck_mtx_unlock(&nmp->nm_lock);
538		}
539
540		if (refresh)
541			error = nmp->nm_funcs->nf_update_statfs(nmp, ctx);
542		if ((error == ESTALE) || (error == ETIMEDOUT))
543			error = 0;
544		if (error)
545			return (error);
546
547		lck_mtx_lock(&nmp->nm_lock);
548		VFSATTR_RETURN(fsap, f_iosize, nfs_iosize);
549		VFSATTR_RETURN(fsap, f_bsize, nmp->nm_fsattr.nfsa_bsize);
550		bsize = nmp->nm_fsattr.nfsa_bsize;
551		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_TOTAL))
552			VFSATTR_RETURN(fsap, f_blocks, nmp->nm_fsattr.nfsa_space_total / bsize);
553		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_FREE))
554			VFSATTR_RETURN(fsap, f_bfree, nmp->nm_fsattr.nfsa_space_free / bsize);
555		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_AVAIL))
556			VFSATTR_RETURN(fsap, f_bavail, nmp->nm_fsattr.nfsa_space_avail / bsize);
557		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_TOTAL) &&
558		    NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_FREE))
559			VFSATTR_RETURN(fsap, f_bused,
560				(nmp->nm_fsattr.nfsa_space_total / bsize) -
561				(nmp->nm_fsattr.nfsa_space_free / bsize));
562		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_TOTAL))
563			VFSATTR_RETURN(fsap, f_files, nmp->nm_fsattr.nfsa_files_total);
564		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_FREE))
565			VFSATTR_RETURN(fsap, f_ffree, nmp->nm_fsattr.nfsa_files_free);
566		lck_mtx_unlock(&nmp->nm_lock);
567	}
568
569	if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
570		u_int32_t caps, valid;
571		nfsnode_t np = nmp->nm_dnp;
572
573		nfsm_assert(error, VFSTONFS(mp) && np, ENXIO);
574		if (error)
575			return (error);
576		lck_mtx_lock(&nmp->nm_lock);
577
578		/*
579		 * The capabilities[] array defines what this volume supports.
580		 *
581		 * The valid[] array defines which bits this code understands
582		 * the meaning of (whether the volume has that capability or not).
583		 * Any zero bits here means "I don't know what you're asking about"
584		 * and the caller cannot tell whether that capability is
585		 * present or not.
586		 */
587		caps = valid = 0;
588		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SYMLINK_SUPPORT)) {
589			valid |= VOL_CAP_FMT_SYMBOLICLINKS;
590			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_SYMLINK)
591				caps |= VOL_CAP_FMT_SYMBOLICLINKS;
592		}
593		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_LINK_SUPPORT)) {
594			valid |= VOL_CAP_FMT_HARDLINKS;
595			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_LINK)
596				caps |= VOL_CAP_FMT_HARDLINKS;
597		}
598		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE)) {
599			valid |= VOL_CAP_FMT_CASE_SENSITIVE;
600			if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE))
601				caps |= VOL_CAP_FMT_CASE_SENSITIVE;
602		}
603		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_PRESERVING)) {
604			valid |= VOL_CAP_FMT_CASE_PRESERVING;
605			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CASE_PRESERVING)
606				caps |= VOL_CAP_FMT_CASE_PRESERVING;
607		}
608		/* Note: VOL_CAP_FMT_2TB_FILESIZE is actually used to test for "large file support" */
609		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXFILESIZE)) {
610			/* Is server's max file size at least 4GB? */
611			if (nmp->nm_fsattr.nfsa_maxfilesize >= 0x100000000ULL)
612				caps |= VOL_CAP_FMT_2TB_FILESIZE;
613		} else if (nfsvers >= NFS_VER3) {
614			/*
615			 * NFSv3 and up supports 64 bits of file size.
616			 * So, we'll just assume maxfilesize >= 4GB
617			 */
618			caps |= VOL_CAP_FMT_2TB_FILESIZE;
619		}
620		if (nfsvers >= NFS_VER4) {
621			caps |= VOL_CAP_FMT_HIDDEN_FILES;
622			valid |= VOL_CAP_FMT_HIDDEN_FILES;
623			// VOL_CAP_FMT_OPENDENYMODES
624//			caps |= VOL_CAP_FMT_OPENDENYMODES;
625//			valid |= VOL_CAP_FMT_OPENDENYMODES;
626		}
627		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
628			// VOL_CAP_FMT_PERSISTENTOBJECTIDS |
629			// VOL_CAP_FMT_SYMBOLICLINKS |
630			// VOL_CAP_FMT_HARDLINKS |
631			// VOL_CAP_FMT_JOURNAL |
632			// VOL_CAP_FMT_JOURNAL_ACTIVE |
633			// VOL_CAP_FMT_NO_ROOT_TIMES |
634			// VOL_CAP_FMT_SPARSE_FILES |
635			// VOL_CAP_FMT_ZERO_RUNS |
636			// VOL_CAP_FMT_CASE_SENSITIVE |
637			// VOL_CAP_FMT_CASE_PRESERVING |
638			// VOL_CAP_FMT_FAST_STATFS |
639			// VOL_CAP_FMT_2TB_FILESIZE |
640			// VOL_CAP_FMT_OPENDENYMODES |
641			// VOL_CAP_FMT_HIDDEN_FILES |
642			caps;
643		fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] =
644			VOL_CAP_FMT_PERSISTENTOBJECTIDS |
645			// VOL_CAP_FMT_SYMBOLICLINKS |
646			// VOL_CAP_FMT_HARDLINKS |
647			// VOL_CAP_FMT_JOURNAL |
648			// VOL_CAP_FMT_JOURNAL_ACTIVE |
649			// VOL_CAP_FMT_NO_ROOT_TIMES |
650			// VOL_CAP_FMT_SPARSE_FILES |
651			// VOL_CAP_FMT_ZERO_RUNS |
652			// VOL_CAP_FMT_CASE_SENSITIVE |
653			// VOL_CAP_FMT_CASE_PRESERVING |
654			VOL_CAP_FMT_FAST_STATFS |
655			VOL_CAP_FMT_2TB_FILESIZE |
656			// VOL_CAP_FMT_OPENDENYMODES |
657			// VOL_CAP_FMT_HIDDEN_FILES |
658			valid;
659
660		/*
661		 * We don't support most of the interfaces.
662		 *
663		 * We MAY support locking, but we don't have any easy way of probing.
664		 * We can tell if there's no lockd running or if locks have been
665		 * disabled for a mount, so we can definitely answer NO in that case.
666		 * Any attempt to send a request to lockd to test for locking support
667		 * may cause the lazily-launched locking daemons to be started
668		 * unnecessarily.  So we avoid that.  However, we do record if we ever
669		 * successfully perform a lock operation on a mount point, so if it
670		 * looks like lock ops have worked, we do report that we support them.
671		 */
672		caps = valid = 0;
673		if (nfsvers >= NFS_VER4) {
674			caps = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
675			valid = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
676			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL)
677				caps |= VOL_CAP_INT_EXTENDED_SECURITY;
678			valid |= VOL_CAP_INT_EXTENDED_SECURITY;
679			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)
680				caps |= VOL_CAP_INT_EXTENDED_ATTR;
681			valid |= VOL_CAP_INT_EXTENDED_ATTR;
682#if NAMEDSTREAMS
683			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)
684				caps |= VOL_CAP_INT_NAMEDSTREAMS;
685			valid |= VOL_CAP_INT_NAMEDSTREAMS;
686#endif
687		} else if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
688			/* locks disabled on this mount, so they definitely won't work */
689			valid = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
690		} else if (nmp->nm_state & NFSSTA_LOCKSWORK) {
691			caps = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
692			valid = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
693		}
694		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] =
695			// VOL_CAP_INT_SEARCHFS |
696			// VOL_CAP_INT_ATTRLIST |
697			// VOL_CAP_INT_NFSEXPORT |
698			// VOL_CAP_INT_READDIRATTR |
699			// VOL_CAP_INT_EXCHANGEDATA |
700			// VOL_CAP_INT_COPYFILE |
701			// VOL_CAP_INT_ALLOCATE |
702			// VOL_CAP_INT_VOL_RENAME |
703			// VOL_CAP_INT_ADVLOCK |
704			// VOL_CAP_INT_FLOCK |
705			// VOL_CAP_INT_EXTENDED_SECURITY |
706			// VOL_CAP_INT_USERACCESS |
707			// VOL_CAP_INT_MANLOCK |
708			// VOL_CAP_INT_NAMEDSTREAMS |
709			// VOL_CAP_INT_EXTENDED_ATTR |
710			VOL_CAP_INT_REMOTE_EVENT |
711			caps;
712		fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
713			VOL_CAP_INT_SEARCHFS |
714			VOL_CAP_INT_ATTRLIST |
715			VOL_CAP_INT_NFSEXPORT |
716			VOL_CAP_INT_READDIRATTR |
717			VOL_CAP_INT_EXCHANGEDATA |
718			VOL_CAP_INT_COPYFILE |
719			VOL_CAP_INT_ALLOCATE |
720			VOL_CAP_INT_VOL_RENAME |
721			// VOL_CAP_INT_ADVLOCK |
722			// VOL_CAP_INT_FLOCK |
723			// VOL_CAP_INT_EXTENDED_SECURITY |
724			// VOL_CAP_INT_USERACCESS |
725			// VOL_CAP_INT_MANLOCK |
726			// VOL_CAP_INT_NAMEDSTREAMS |
727			// VOL_CAP_INT_EXTENDED_ATTR |
728			VOL_CAP_INT_REMOTE_EVENT |
729			valid;
730
731		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
732		fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0;
733
734		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
735		fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0;
736
737		VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
738		lck_mtx_unlock(&nmp->nm_lock);
739	}
740
741	if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
742		fsap->f_attributes.validattr.commonattr = 0;
743		fsap->f_attributes.validattr.volattr =
744			ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
745		fsap->f_attributes.validattr.dirattr = 0;
746		fsap->f_attributes.validattr.fileattr = 0;
747		fsap->f_attributes.validattr.forkattr = 0;
748
749		fsap->f_attributes.nativeattr.commonattr = 0;
750		fsap->f_attributes.nativeattr.volattr =
751			ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
752		fsap->f_attributes.nativeattr.dirattr = 0;
753		fsap->f_attributes.nativeattr.fileattr = 0;
754		fsap->f_attributes.nativeattr.forkattr = 0;
755
756		VFSATTR_SET_SUPPORTED(fsap, f_attributes);
757	}
758
759	return (error);
760}
761
762/*
763 * nfs version 3 fsinfo rpc call
764 */
765int
766nfs3_fsinfo(struct nfsmount *nmp, nfsnode_t np, vfs_context_t ctx)
767{
768	int error = 0, lockerror, status, nmlocked = 0;
769	u_int64_t xid;
770	uint32_t val, prefsize, maxsize;
771	struct nfsm_chain nmreq, nmrep;
772
773	nfsm_chain_null(&nmreq);
774	nfsm_chain_null(&nmrep);
775
776	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nmp->nm_vers));
777	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
778	nfsm_chain_build_done(error, &nmreq);
779	nfsmout_if(error);
780	error = nfs_request(np, NULL, &nmreq, NFSPROC_FSINFO, ctx, NULL, &nmrep, &xid, &status);
781	if ((lockerror = nfs_node_lock(np)))
782		error = lockerror;
783	nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
784	if (!lockerror)
785		nfs_node_unlock(np);
786	if (!error)
787		error = status;
788	nfsmout_if(error);
789
790	lck_mtx_lock(&nmp->nm_lock);
791	nmlocked = 1;
792
793	nfsm_chain_get_32(error, &nmrep, maxsize);
794	nfsm_chain_get_32(error, &nmrep, prefsize);
795	nfsmout_if(error);
796	nmp->nm_fsattr.nfsa_maxread = maxsize;
797	if (prefsize < nmp->nm_rsize)
798		nmp->nm_rsize = (prefsize + NFS_FABLKSIZE - 1) &
799			~(NFS_FABLKSIZE - 1);
800	if ((maxsize > 0) && (maxsize < nmp->nm_rsize)) {
801		nmp->nm_rsize = maxsize & ~(NFS_FABLKSIZE - 1);
802		if (nmp->nm_rsize == 0)
803			nmp->nm_rsize = maxsize;
804	}
805	nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED); // skip rtmult
806
807	nfsm_chain_get_32(error, &nmrep, maxsize);
808	nfsm_chain_get_32(error, &nmrep, prefsize);
809	nfsmout_if(error);
810	nmp->nm_fsattr.nfsa_maxwrite = maxsize;
811	if (prefsize < nmp->nm_wsize)
812		nmp->nm_wsize = (prefsize + NFS_FABLKSIZE - 1) &
813			~(NFS_FABLKSIZE - 1);
814	if ((maxsize > 0) && (maxsize < nmp->nm_wsize)) {
815		nmp->nm_wsize = maxsize & ~(NFS_FABLKSIZE - 1);
816		if (nmp->nm_wsize == 0)
817			nmp->nm_wsize = maxsize;
818	}
819	nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED); // skip wtmult
820
821	nfsm_chain_get_32(error, &nmrep, prefsize);
822	nfsmout_if(error);
823	if ((prefsize > 0) && (prefsize < nmp->nm_readdirsize))
824		nmp->nm_readdirsize = prefsize;
825	if ((nmp->nm_fsattr.nfsa_maxread > 0) &&
826	    (nmp->nm_fsattr.nfsa_maxread < nmp->nm_readdirsize))
827		nmp->nm_readdirsize = nmp->nm_fsattr.nfsa_maxread;
828
829	nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_maxfilesize);
830
831	nfsm_chain_adv(error, &nmrep, 2 * NFSX_UNSIGNED); // skip time_delta
832
833	/* convert FS properties to our own flags */
834	nfsm_chain_get_32(error, &nmrep, val);
835	nfsmout_if(error);
836	if (val & NFSV3FSINFO_LINK)
837		nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_LINK;
838	if (val & NFSV3FSINFO_SYMLINK)
839		nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_SYMLINK;
840	if (val & NFSV3FSINFO_HOMOGENEOUS)
841		nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_HOMOGENEOUS;
842	if (val & NFSV3FSINFO_CANSETTIME)
843		nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_SET_TIME;
844	nmp->nm_state |= NFSSTA_GOTFSINFO;
845	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXREAD);
846	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXWRITE);
847	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXFILESIZE);
848	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_LINK_SUPPORT);
849	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SYMLINK_SUPPORT);
850	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_HOMOGENEOUS);
851	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CANSETTIME);
852nfsmout:
853	if (nmlocked)
854		lck_mtx_unlock(&nmp->nm_lock);
855	nfsm_chain_cleanup(&nmreq);
856	nfsm_chain_cleanup(&nmrep);
857	return (error);
858}
859
860/*
861 * Mount a remote root fs via. nfs. This depends on the info in the
862 * nfs_diskless structure that has been filled in properly by some primary
863 * bootstrap.
864 * It goes something like this:
865 * - do enough of "ifconfig" by calling ifioctl() so that the system
866 *   can talk to the server
867 * - If nfs_diskless.mygateway is filled in, use that address as
868 *   a default gateway.
869 * - hand craft the swap nfs vnode hanging off a fake mount point
870 *	if swdevt[0].sw_dev == NODEV
871 * - build the rootfs mount point and call mountnfs() to do the rest.
872 */
873int
874nfs_mountroot(void)
875{
876	struct nfs_diskless nd;
877	mount_t mp = NULL;
878	vnode_t vp = NULL;
879	vfs_context_t ctx;
880	int error;
881#if !defined(NO_MOUNT_PRIVATE)
882	mount_t mppriv = NULL;
883	vnode_t vppriv = NULL;
884#endif /* NO_MOUNT_PRIVATE */
885	int v3, sotype;
886
887	/*
888	 * Call nfs_boot_init() to fill in the nfs_diskless struct.
889	 * Note: networking must already have been configured before
890	 * we're called.
891	 */
892	bzero((caddr_t) &nd, sizeof(nd));
893	error = nfs_boot_init(&nd);
894	if (error)
895		panic("nfs_boot_init: unable to initialize NFS root system information, "
896		      "error %d, check configuration: %s\n", error, PE_boot_args());
897
898	/*
899	 * Try NFSv3 first, then fallback to NFSv2.
900	 * Likewise, try TCP first, then fall back to UDP.
901	 */
902	v3 = 1;
903	sotype = SOCK_STREAM;
904
905tryagain:
906	error = nfs_boot_getfh(&nd, v3, sotype);
907	if (error) {
908		if (error == EHOSTDOWN || error == EHOSTUNREACH) {
909			if (nd.nd_root.ndm_mntfrom)
910				FREE_ZONE(nd.nd_root.ndm_mntfrom,
911					  MAXPATHLEN, M_NAMEI);
912			if (nd.nd_root.ndm_path)
913				FREE_ZONE(nd.nd_root.ndm_path,
914					  MAXPATHLEN, M_NAMEI);
915			if (nd.nd_private.ndm_mntfrom)
916				FREE_ZONE(nd.nd_private.ndm_mntfrom,
917					  MAXPATHLEN, M_NAMEI);
918			if (nd.nd_private.ndm_path)
919				FREE_ZONE(nd.nd_private.ndm_path,
920					  MAXPATHLEN, M_NAMEI);
921			return (error);
922		}
923		if (v3) {
924			if (sotype == SOCK_STREAM) {
925				printf("NFS mount (v3,TCP) failed with error %d, trying UDP...\n", error);
926				sotype = SOCK_DGRAM;
927				goto tryagain;
928			}
929			printf("NFS mount (v3,UDP) failed with error %d, trying v2...\n", error);
930			v3 = 0;
931			sotype = SOCK_STREAM;
932			goto tryagain;
933		} else if (sotype == SOCK_STREAM) {
934			printf("NFS mount (v2,TCP) failed with error %d, trying UDP...\n", error);
935			sotype = SOCK_DGRAM;
936			goto tryagain;
937		} else {
938			printf("NFS mount (v2,UDP) failed with error %d, giving up...\n", error);
939		}
940		switch(error) {
941		case EPROGUNAVAIL:
942			panic("NFS mount failed: NFS server mountd not responding, check server configuration: %s", PE_boot_args());
943		case EACCES:
944		case EPERM:
945			panic("NFS mount failed: NFS server refused mount, check server configuration: %s", PE_boot_args());
946		default:
947			panic("NFS mount failed with error %d, check configuration: %s", error, PE_boot_args());
948		}
949	}
950
951	ctx = vfs_context_kernel();
952
953	/*
954	 * Create the root mount point.
955	 */
956#if !defined(NO_MOUNT_PRIVATE)
957	{
958		//PWC hack until we have a real "mount" tool to remount root rw
959		int rw_root=0;
960		int flags = MNT_ROOTFS|MNT_RDONLY;
961		PE_parse_boot_argn("-rwroot_hack", &rw_root, sizeof (rw_root));
962		if(rw_root)
963		{
964			flags = MNT_ROOTFS;
965			kprintf("-rwroot_hack in effect: mounting root fs read/write\n");
966		}
967
968	if ((error = nfs_mount_diskless(&nd.nd_root, "/", flags, &vp, &mp, ctx)))
969#else
970	if ((error = nfs_mount_diskless(&nd.nd_root, "/", MNT_ROOTFS, &vp, &mp, ctx)))
971#endif /* NO_MOUNT_PRIVATE */
972	{
973		if (v3) {
974			if (sotype == SOCK_STREAM) {
975				printf("NFS root mount (v3,TCP) failed with %d, trying UDP...\n", error);
976				sotype = SOCK_DGRAM;
977				goto tryagain;
978			}
979			printf("NFS root mount (v3,UDP) failed with %d, trying v2...\n", error);
980			v3 = 0;
981			sotype = SOCK_STREAM;
982			goto tryagain;
983		} else if (sotype == SOCK_STREAM) {
984			printf("NFS root mount (v2,TCP) failed with %d, trying UDP...\n", error);
985			sotype = SOCK_DGRAM;
986			goto tryagain;
987		} else {
988			printf("NFS root mount (v2,UDP) failed with error %d, giving up...\n", error);
989		}
990		panic("NFS root mount failed with error %d, check configuration: %s\n", error, PE_boot_args());
991	}
992	}
993	printf("root on %s\n", nd.nd_root.ndm_mntfrom);
994
995	vfs_unbusy(mp);
996	mount_list_add(mp);
997	rootvp = vp;
998
999#if !defined(NO_MOUNT_PRIVATE)
1000	if (nd.nd_private.ndm_saddr.sin_addr.s_addr) {
1001	    error = nfs_mount_diskless_private(&nd.nd_private, "/private",
1002					       0, &vppriv, &mppriv, ctx);
1003	    if (error)
1004		panic("NFS /private mount failed with error %d, check configuration: %s\n", error, PE_boot_args());
1005	    printf("private on %s\n", nd.nd_private.ndm_mntfrom);
1006
1007	    vfs_unbusy(mppriv);
1008	    mount_list_add(mppriv);
1009	}
1010
1011#endif /* NO_MOUNT_PRIVATE */
1012
1013	if (nd.nd_root.ndm_mntfrom)
1014		FREE_ZONE(nd.nd_root.ndm_mntfrom, MAXPATHLEN, M_NAMEI);
1015	if (nd.nd_root.ndm_path)
1016		FREE_ZONE(nd.nd_root.ndm_path, MAXPATHLEN, M_NAMEI);
1017	if (nd.nd_private.ndm_mntfrom)
1018		FREE_ZONE(nd.nd_private.ndm_mntfrom, MAXPATHLEN, M_NAMEI);
1019	if (nd.nd_private.ndm_path)
1020		FREE_ZONE(nd.nd_private.ndm_path, MAXPATHLEN, M_NAMEI);
1021
1022	/* Get root attributes (for the time). */
1023	error = nfs_getattr(VTONFS(vp), NULL, ctx, NGA_UNCACHED);
1024	if (error)
1025		panic("NFS mount: failed to get attributes for root directory, error %d, check server", error);
1026	return (0);
1027}
1028
1029/*
1030 * Internal version of mount system call for diskless setup.
1031 */
1032static int
1033nfs_mount_diskless(
1034	struct nfs_dlmount *ndmntp,
1035	const char *mntname,
1036	int mntflag,
1037	vnode_t *vpp,
1038	mount_t *mpp,
1039	vfs_context_t ctx)
1040{
1041	mount_t mp;
1042	int error, numcomps;
1043	char *xdrbuf, *p, *cp, *frompath, *endserverp;
1044	char uaddr[MAX_IPv4_STR_LEN];
1045	struct xdrbuf xb;
1046	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
1047	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN];
1048	uint32_t mflags[NFS_MFLAG_BITMAP_LEN];
1049	uint32_t argslength_offset, attrslength_offset, end_offset;
1050
1051	if ((error = vfs_rootmountalloc("nfs", ndmntp->ndm_mntfrom, &mp))) {
1052		printf("nfs_mount_diskless: NFS not configured\n");
1053		return (error);
1054	}
1055
1056	mp->mnt_flag |= mntflag;
1057	if (!(mntflag & MNT_RDONLY))
1058		mp->mnt_flag &= ~MNT_RDONLY;
1059
1060	/* find the server-side path being mounted */
1061	frompath = ndmntp->ndm_mntfrom;
1062	if (*frompath == '[') {  /* skip IPv6 literal address */
1063		while (*frompath && (*frompath != ']'))
1064			frompath++;
1065		if (*frompath == ']')
1066			frompath++;
1067	}
1068	while (*frompath && (*frompath != ':'))
1069		frompath++;
1070	endserverp = frompath;
1071	while (*frompath && (*frompath == ':'))
1072		frompath++;
1073	/* count fs location path components */
1074	p = frompath;
1075	while (*p && (*p == '/'))
1076		p++;
1077	numcomps = 0;
1078	while (*p) {
1079		numcomps++;
1080		while (*p && (*p != '/'))
1081			p++;
1082		while (*p && (*p == '/'))
1083			p++;
1084	}
1085
1086	/* convert address to universal address string */
1087	if (inet_ntop(AF_INET, &ndmntp->ndm_saddr.sin_addr, uaddr, sizeof(uaddr)) != uaddr) {
1088		printf("nfs_mount_diskless: bad address\n");
1089		return (EINVAL);
1090	}
1091
1092	/* prepare mount attributes */
1093	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
1094	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
1095	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
1096	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
1097	NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
1098	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
1099	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
1100
1101	/* prepare mount flags */
1102	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
1103	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
1104	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
1105	NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
1106
1107	/* build xdr buffer */
1108	xb_init_buffer(&xb, NULL, 0);
1109	xb_add_32(error, &xb, NFS_ARGSVERSION_XDR);
1110	argslength_offset = xb_offset(&xb);
1111	xb_add_32(error, &xb, 0); // args length
1112	xb_add_32(error, &xb, NFS_XDRARGS_VERSION_0);
1113	xb_add_bitmap(error, &xb, mattrs, NFS_MATTR_BITMAP_LEN);
1114	attrslength_offset = xb_offset(&xb);
1115	xb_add_32(error, &xb, 0); // attrs length
1116	xb_add_32(error, &xb, ndmntp->ndm_nfsv3 ? 3 : 2); // NFS version
1117	xb_add_string(error, &xb, ((ndmntp->ndm_sotype == SOCK_DGRAM) ? "udp" : "tcp"), 3);
1118	xb_add_32(error, &xb, ntohs(ndmntp->ndm_saddr.sin_port)); // NFS port
1119	xb_add_fh(error, &xb, &ndmntp->ndm_fh[0], ndmntp->ndm_fhlen);
1120	/* fs location */
1121	xb_add_32(error, &xb, 1); /* fs location count */
1122	xb_add_32(error, &xb, 1); /* server count */
1123	xb_add_string(error, &xb, ndmntp->ndm_mntfrom, (endserverp - ndmntp->ndm_mntfrom)); /* server name */
1124	xb_add_32(error, &xb, 1); /* address count */
1125	xb_add_string(error, &xb, uaddr, strlen(uaddr)); /* address */
1126	xb_add_32(error, &xb, 0); /* empty server info */
1127	xb_add_32(error, &xb, numcomps); /* pathname component count */
1128	p = frompath;
1129	while (*p && (*p == '/'))
1130		p++;
1131	while (*p) {
1132		cp = p;
1133		while (*p && (*p != '/'))
1134			p++;
1135		xb_add_string(error, &xb, cp, (p - cp)); /* component */
1136		if (error)
1137			break;
1138		while (*p && (*p == '/'))
1139			p++;
1140	}
1141	xb_add_32(error, &xb, 0); /* empty fsl info */
1142	xb_add_32(error, &xb, mntflag); /* MNT flags */
1143	xb_build_done(error, &xb);
1144
1145	/* update opaque counts */
1146	end_offset = xb_offset(&xb);
1147	if (!error) {
1148		error = xb_seek(&xb, argslength_offset);
1149		xb_add_32(error, &xb, end_offset - argslength_offset + XDRWORD/*version*/);
1150	}
1151	if (!error) {
1152		error = xb_seek(&xb, attrslength_offset);
1153		xb_add_32(error, &xb, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
1154	}
1155	if (error) {
1156		printf("nfs_mount_diskless: error %d assembling mount args\n", error);
1157		xb_cleanup(&xb);
1158		return (error);
1159	}
1160	/* grab the assembled buffer */
1161	xdrbuf = xb_buffer_base(&xb);
1162	xb.xb_flags &= ~XB_CLEANUP;
1163
1164	/* do the mount */
1165	if ((error = mountnfs(xdrbuf, mp, ctx, vpp))) {
1166		printf("nfs_mountroot: mount %s failed: %d\n", mntname, error);
1167		// XXX vfs_rootmountfailed(mp);
1168		mount_list_lock();
1169		mp->mnt_vtable->vfc_refcount--;
1170		mount_list_unlock();
1171		vfs_unbusy(mp);
1172		mount_lock_destroy(mp);
1173#if CONFIG_MACF
1174		mac_mount_label_destroy(mp);
1175#endif
1176		FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
1177	} else {
1178		*mpp = mp;
1179	}
1180	xb_cleanup(&xb);
1181	return (error);
1182}
1183
1184#if !defined(NO_MOUNT_PRIVATE)
1185/*
1186 * Internal version of mount system call to mount "/private"
1187 * separately in diskless setup
1188 */
1189static int
1190nfs_mount_diskless_private(
1191	struct nfs_dlmount *ndmntp,
1192	const char *mntname,
1193	int mntflag,
1194	vnode_t *vpp,
1195	mount_t *mpp,
1196	vfs_context_t ctx)
1197{
1198	mount_t mp;
1199	int error, numcomps;
1200	proc_t procp;
1201	struct vfstable *vfsp;
1202	struct nameidata nd;
1203	vnode_t vp;
1204	char *xdrbuf = NULL, *p, *cp, *frompath, *endserverp;
1205	char uaddr[MAX_IPv4_STR_LEN];
1206	struct xdrbuf xb;
1207	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
1208	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN], mflags[NFS_MFLAG_BITMAP_LEN];
1209	uint32_t argslength_offset, attrslength_offset, end_offset;
1210
1211	procp = current_proc(); /* XXX */
1212	xb_init(&xb, 0);
1213
1214	{
1215	/*
1216	 * mimic main()!. Temporarily set up rootvnode and other stuff so
1217	 * that namei works. Need to undo this because main() does it, too
1218	 */
1219		struct filedesc *fdp;	/* pointer to file descriptor state */
1220		fdp = procp->p_fd;
1221		mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
1222
1223		/* Get the vnode for '/'. Set fdp->fd_cdir to reference it. */
1224		if (VFS_ROOT(mountlist.tqh_first, &rootvnode, NULL))
1225			panic("cannot find root vnode");
1226		error = vnode_ref(rootvnode);
1227		if (error) {
1228			printf("nfs_mountroot: vnode_ref() failed on root vnode!\n");
1229			goto out;
1230		}
1231		fdp->fd_cdir = rootvnode;
1232		fdp->fd_rdir = NULL;
1233	}
1234
1235	/*
1236	 * Get vnode to be covered
1237	 */
1238	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1239	    CAST_USER_ADDR_T(mntname), ctx);
1240	if ((error = namei(&nd))) {
1241		printf("nfs_mountroot: private namei failed!\n");
1242		goto out;
1243	}
1244	{
1245		/* undo vnode_ref() in mimic main()! */
1246		vnode_rele(rootvnode);
1247	}
1248	nameidone(&nd);
1249	vp = nd.ni_vp;
1250
1251	if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) ||
1252	    (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) {
1253		vnode_put(vp);
1254		goto out;
1255	}
1256	if (vnode_vtype(vp) != VDIR) {
1257		vnode_put(vp);
1258		error = ENOTDIR;
1259		goto out;
1260	}
1261	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1262		if (!strncmp(vfsp->vfc_name, "nfs", sizeof(vfsp->vfc_name)))
1263			break;
1264	if (vfsp == NULL) {
1265		printf("nfs_mountroot: private NFS not configured\n");
1266		vnode_put(vp);
1267		error = ENODEV;
1268		goto out;
1269	}
1270	if (vnode_mountedhere(vp) != NULL) {
1271		vnode_put(vp);
1272		error = EBUSY;
1273		goto out;
1274	}
1275
1276	/*
1277	 * Allocate and initialize the filesystem.
1278	 */
1279	mp = _MALLOC_ZONE((u_int32_t)sizeof(struct mount), M_MOUNT, M_WAITOK);
1280	if (!mp) {
1281		printf("nfs_mountroot: unable to allocate mount structure\n");
1282		vnode_put(vp);
1283		error = ENOMEM;
1284		goto out;
1285	}
1286	bzero((char *)mp, sizeof(struct mount));
1287
1288	/* Initialize the default IO constraints */
1289	mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
1290	mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
1291	mp->mnt_ioflags = 0;
1292	mp->mnt_realrootvp = NULLVP;
1293	mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
1294
1295	mount_lock_init(mp);
1296	TAILQ_INIT(&mp->mnt_vnodelist);
1297	TAILQ_INIT(&mp->mnt_workerqueue);
1298	TAILQ_INIT(&mp->mnt_newvnodes);
1299	(void)vfs_busy(mp, LK_NOWAIT);
1300	TAILQ_INIT(&mp->mnt_vnodelist);
1301	mount_list_lock();
1302	vfsp->vfc_refcount++;
1303	mount_list_unlock();
1304	mp->mnt_vtable = vfsp;
1305	mp->mnt_op = vfsp->vfc_vfsops;
1306	// mp->mnt_stat.f_type = vfsp->vfc_typenum;
1307	mp->mnt_flag = mntflag;
1308	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
1309	strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSNAMELEN-1);
1310	vp->v_mountedhere = mp;
1311	mp->mnt_vnodecovered = vp;
1312	mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get());
1313	(void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN - 1, 0);
1314	(void) copystr(ndmntp->ndm_mntfrom, mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN - 1, 0);
1315#if CONFIG_MACF
1316	mac_mount_label_init(mp);
1317	mac_mount_label_associate(ctx, mp);
1318#endif
1319
1320	/* find the server-side path being mounted */
1321	frompath = ndmntp->ndm_mntfrom;
1322	if (*frompath == '[') {  /* skip IPv6 literal address */
1323		while (*frompath && (*frompath != ']'))
1324			frompath++;
1325		if (*frompath == ']')
1326			frompath++;
1327	}
1328	while (*frompath && (*frompath != ':'))
1329		frompath++;
1330	endserverp = frompath;
1331	while (*frompath && (*frompath == ':'))
1332		frompath++;
1333	/* count fs location path components */
1334	p = frompath;
1335	while (*p && (*p == '/'))
1336		p++;
1337	numcomps = 0;
1338	while (*p) {
1339		numcomps++;
1340		while (*p && (*p != '/'))
1341			p++;
1342		while (*p && (*p == '/'))
1343			p++;
1344	}
1345
1346	/* convert address to universal address string */
1347	if (inet_ntop(AF_INET, &ndmntp->ndm_saddr.sin_addr, uaddr, sizeof(uaddr)) != uaddr) {
1348		printf("nfs_mountroot: bad address\n");
1349		error = EINVAL;
1350		goto out;
1351	}
1352
1353	/* prepare mount attributes */
1354	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
1355	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
1356	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
1357	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
1358	NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
1359	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
1360	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
1361
1362	/* prepare mount flags */
1363	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
1364	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
1365	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
1366	NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
1367
1368	/* build xdr buffer */
1369	xb_init_buffer(&xb, NULL, 0);
1370	xb_add_32(error, &xb, NFS_ARGSVERSION_XDR);
1371	argslength_offset = xb_offset(&xb);
1372	xb_add_32(error, &xb, 0); // args length
1373	xb_add_32(error, &xb, NFS_XDRARGS_VERSION_0);
1374	xb_add_bitmap(error, &xb, mattrs, NFS_MATTR_BITMAP_LEN);
1375	attrslength_offset = xb_offset(&xb);
1376	xb_add_32(error, &xb, 0); // attrs length
1377	xb_add_32(error, &xb, ndmntp->ndm_nfsv3 ? 3 : 2); // NFS version
1378	xb_add_string(error, &xb, ((ndmntp->ndm_sotype == SOCK_DGRAM) ? "udp" : "tcp"), 3);
1379	xb_add_32(error, &xb, ntohs(ndmntp->ndm_saddr.sin_port)); // NFS port
1380	xb_add_fh(error, &xb, &ndmntp->ndm_fh[0], ndmntp->ndm_fhlen);
1381	/* fs location */
1382	xb_add_32(error, &xb, 1); /* fs location count */
1383	xb_add_32(error, &xb, 1); /* server count */
1384	xb_add_string(error, &xb, ndmntp->ndm_mntfrom, (endserverp - ndmntp->ndm_mntfrom)); /* server name */
1385	xb_add_32(error, &xb, 1); /* address count */
1386	xb_add_string(error, &xb, uaddr, strlen(uaddr)); /* address */
1387	xb_add_32(error, &xb, 0); /* empty server info */
1388	xb_add_32(error, &xb, numcomps); /* pathname component count */
1389	p = frompath;
1390	while (*p && (*p == '/'))
1391		p++;
1392	while (*p) {
1393		cp = p;
1394		while (*p && (*p != '/'))
1395			p++;
1396		xb_add_string(error, &xb, cp, (p - cp)); /* component */
1397		if (error)
1398			break;
1399		while (*p && (*p == '/'))
1400			p++;
1401	}
1402	xb_add_32(error, &xb, 0); /* empty fsl info */
1403	xb_add_32(error, &xb, mntflag); /* MNT flags */
1404	xb_build_done(error, &xb);
1405
1406	/* update opaque counts */
1407	end_offset = xb_offset(&xb);
1408	if (!error) {
1409		error = xb_seek(&xb, argslength_offset);
1410		xb_add_32(error, &xb, end_offset - argslength_offset + XDRWORD/*version*/);
1411	}
1412	if (!error) {
1413		error = xb_seek(&xb, attrslength_offset);
1414		xb_add_32(error, &xb, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
1415	}
1416	if (error) {
1417		printf("nfs_mountroot: error %d assembling mount args\n", error);
1418		goto out;
1419	}
1420	/* grab the assembled buffer */
1421	xdrbuf = xb_buffer_base(&xb);
1422	xb.xb_flags &= ~XB_CLEANUP;
1423
1424	/* do the mount */
1425	if ((error = mountnfs(xdrbuf, mp, ctx, &vp))) {
1426		printf("nfs_mountroot: mount %s failed: %d\n", mntname, error);
1427		mount_list_lock();
1428		vfsp->vfc_refcount--;
1429		mount_list_unlock();
1430		vfs_unbusy(mp);
1431		mount_lock_destroy(mp);
1432#if CONFIG_MACF
1433		mac_mount_label_destroy(mp);
1434#endif
1435		FREE_ZONE(mp, sizeof (struct mount), M_MOUNT);
1436		goto out;
1437	}
1438
1439	*mpp = mp;
1440	*vpp = vp;
1441out:
1442	xb_cleanup(&xb);
1443	return (error);
1444}
1445#endif /* NO_MOUNT_PRIVATE */
1446
1447/*
1448 * Convert old style NFS mount args to XDR.
1449 */
1450static int
1451nfs_convert_old_nfs_args(mount_t mp, user_addr_t data, vfs_context_t ctx, int argsversion, int inkernel, char **xdrbufp)
1452{
1453	int error = 0, args64bit, argsize, numcomps;
1454	struct user_nfs_args args;
1455	struct nfs_args tempargs;
1456	caddr_t argsp;
1457	size_t len;
1458	u_char nfh[NFS4_FHSIZE];
1459	char *mntfrom, *endserverp, *frompath, *p, *cp;
1460	struct sockaddr_storage ss;
1461	void *sinaddr;
1462	char uaddr[MAX_IPv6_STR_LEN];
1463	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
1464	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN], mflags[NFS_MFLAG_BITMAP_LEN];
1465	uint32_t nfsvers, nfslockmode = 0, argslength_offset, attrslength_offset, end_offset;
1466	struct xdrbuf xb;
1467
1468	*xdrbufp = NULL;
1469
1470	/* allocate a temporary buffer for mntfrom */
1471	MALLOC_ZONE(mntfrom, char*, MAXPATHLEN, M_NAMEI, M_WAITOK);
1472	if (!mntfrom)
1473		return (ENOMEM);
1474
1475	args64bit = (inkernel || vfs_context_is64bit(ctx));
1476	argsp = args64bit ? (void*)&args : (void*)&tempargs;
1477
1478	argsize = args64bit ? sizeof(args) : sizeof(tempargs);
1479	switch (argsversion) {
1480	case 3:
1481		argsize -= NFS_ARGSVERSION4_INCSIZE;
1482	case 4:
1483		argsize -= NFS_ARGSVERSION5_INCSIZE;
1484	case 5:
1485		argsize -= NFS_ARGSVERSION6_INCSIZE;
1486	case 6:
1487		break;
1488	default:
1489		error = EPROGMISMATCH;
1490		goto nfsmout;
1491	}
1492
1493	/* read in the structure */
1494	if (inkernel)
1495		bcopy(CAST_DOWN(void *, data), argsp, argsize);
1496	else
1497		error = copyin(data, argsp, argsize);
1498	nfsmout_if(error);
1499
1500	if (!args64bit) {
1501		args.addrlen = tempargs.addrlen;
1502		args.sotype = tempargs.sotype;
1503		args.proto = tempargs.proto;
1504		args.fhsize = tempargs.fhsize;
1505		args.flags = tempargs.flags;
1506		args.wsize = tempargs.wsize;
1507		args.rsize = tempargs.rsize;
1508		args.readdirsize = tempargs.readdirsize;
1509		args.timeo = tempargs.timeo;
1510		args.retrans = tempargs.retrans;
1511		args.maxgrouplist = tempargs.maxgrouplist;
1512		args.readahead = tempargs.readahead;
1513		args.leaseterm = tempargs.leaseterm;
1514		args.deadthresh = tempargs.deadthresh;
1515		args.addr = CAST_USER_ADDR_T(tempargs.addr);
1516		args.fh = CAST_USER_ADDR_T(tempargs.fh);
1517		args.hostname = CAST_USER_ADDR_T(tempargs.hostname);
1518		if (args.version >= 4) {
1519			args.acregmin = tempargs.acregmin;
1520			args.acregmax = tempargs.acregmax;
1521			args.acdirmin = tempargs.acdirmin;
1522			args.acdirmax = tempargs.acdirmax;
1523		}
1524		if (args.version >= 5)
1525			args.auth = tempargs.auth;
1526		if (args.version >= 6)
1527			args.deadtimeout = tempargs.deadtimeout;
1528	}
1529
1530	if ((args.fhsize < 0) || (args.fhsize > NFS4_FHSIZE)) {
1531		error = EINVAL;
1532		goto nfsmout;
1533	}
1534	if (args.fhsize > 0) {
1535		if (inkernel)
1536			bcopy(CAST_DOWN(void *, args.fh), (caddr_t)nfh, args.fhsize);
1537		else
1538			error = copyin(args.fh, (caddr_t)nfh, args.fhsize);
1539		nfsmout_if(error);
1540	}
1541
1542	if (inkernel)
1543		error = copystr(CAST_DOWN(void *, args.hostname), mntfrom, MAXPATHLEN-1, &len);
1544	else
1545		error = copyinstr(args.hostname, mntfrom, MAXPATHLEN-1, &len);
1546	nfsmout_if(error);
1547	bzero(&mntfrom[len], MAXPATHLEN - len);
1548
1549	/* find the server-side path being mounted */
1550	frompath = mntfrom;
1551	if (*frompath == '[') {  /* skip IPv6 literal address */
1552		while (*frompath && (*frompath != ']'))
1553			frompath++;
1554		if (*frompath == ']')
1555			frompath++;
1556	}
1557	while (*frompath && (*frompath != ':'))
1558		frompath++;
1559	endserverp = frompath;
1560	while (*frompath && (*frompath == ':'))
1561		frompath++;
1562	/* count fs location path components */
1563	p = frompath;
1564	while (*p && (*p == '/'))
1565		p++;
1566	numcomps = 0;
1567	while (*p) {
1568		numcomps++;
1569		while (*p && (*p != '/'))
1570			p++;
1571		while (*p && (*p == '/'))
1572			p++;
1573	}
1574
1575	/* copy socket address */
1576	if (inkernel)
1577		bcopy(CAST_DOWN(void *, args.addr), &ss, args.addrlen);
1578	else {
1579		if ((size_t)args.addrlen > sizeof (struct sockaddr_storage))
1580			error = EINVAL;
1581		else
1582			error = copyin(args.addr, &ss, args.addrlen);
1583	}
1584	nfsmout_if(error);
1585	ss.ss_len = args.addrlen;
1586
1587	/* convert address to universal address string */
1588	if (ss.ss_family == AF_INET)
1589		sinaddr = &((struct sockaddr_in*)&ss)->sin_addr;
1590	else if (ss.ss_family == AF_INET6)
1591		sinaddr = &((struct sockaddr_in6*)&ss)->sin6_addr;
1592	else
1593		sinaddr = NULL;
1594	if (!sinaddr || (inet_ntop(ss.ss_family, sinaddr, uaddr, sizeof(uaddr)) != uaddr)) {
1595		error = EINVAL;
1596		goto nfsmout;
1597	}
1598
1599	/* prepare mount flags */
1600	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
1601	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
1602	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_SOFT);
1603	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_INTR);
1604	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
1605	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOCONNECT);
1606	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_DUMBTIMER);
1607	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_CALLUMNT);
1608	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RDIRPLUS);
1609	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NONEGNAMECACHE);
1610	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MUTEJUKEBOX);
1611	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOQUOTA);
1612	if (args.flags & NFSMNT_SOFT)
1613		NFS_BITMAP_SET(mflags, NFS_MFLAG_SOFT);
1614	if (args.flags & NFSMNT_INT)
1615		NFS_BITMAP_SET(mflags, NFS_MFLAG_INTR);
1616	if (args.flags & NFSMNT_RESVPORT)
1617		NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
1618	if (args.flags & NFSMNT_NOCONN)
1619		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOCONNECT);
1620	if (args.flags & NFSMNT_DUMBTIMR)
1621		NFS_BITMAP_SET(mflags, NFS_MFLAG_DUMBTIMER);
1622	if (args.flags & NFSMNT_CALLUMNT)
1623		NFS_BITMAP_SET(mflags, NFS_MFLAG_CALLUMNT);
1624	if (args.flags & NFSMNT_RDIRPLUS)
1625		NFS_BITMAP_SET(mflags, NFS_MFLAG_RDIRPLUS);
1626	if (args.flags & NFSMNT_NONEGNAMECACHE)
1627		NFS_BITMAP_SET(mflags, NFS_MFLAG_NONEGNAMECACHE);
1628	if (args.flags & NFSMNT_MUTEJUKEBOX)
1629		NFS_BITMAP_SET(mflags, NFS_MFLAG_MUTEJUKEBOX);
1630	if (args.flags & NFSMNT_NOQUOTA)
1631		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOQUOTA);
1632
1633	/* prepare mount attributes */
1634	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
1635	NFS_BITMAP_SET(mattrs, NFS_MATTR_FLAGS);
1636	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
1637	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
1638	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
1639	NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
1640	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
1641	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
1642	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFROM);
1643	if (args.flags & NFSMNT_NFSV4)
1644		nfsvers = 4;
1645	else if (args.flags & NFSMNT_NFSV3)
1646		nfsvers = 3;
1647	else
1648		nfsvers = 2;
1649	if ((args.flags & NFSMNT_RSIZE) && (args.rsize > 0))
1650		NFS_BITMAP_SET(mattrs, NFS_MATTR_READ_SIZE);
1651	if ((args.flags & NFSMNT_WSIZE) && (args.wsize > 0))
1652		NFS_BITMAP_SET(mattrs, NFS_MATTR_WRITE_SIZE);
1653	if ((args.flags & NFSMNT_TIMEO) && (args.timeo > 0))
1654		NFS_BITMAP_SET(mattrs, NFS_MATTR_REQUEST_TIMEOUT);
1655	if ((args.flags & NFSMNT_RETRANS) && (args.retrans > 0))
1656		NFS_BITMAP_SET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT);
1657	if ((args.flags & NFSMNT_MAXGRPS) && (args.maxgrouplist > 0))
1658		NFS_BITMAP_SET(mattrs, NFS_MATTR_MAX_GROUP_LIST);
1659	if ((args.flags & NFSMNT_READAHEAD) && (args.readahead > 0))
1660		NFS_BITMAP_SET(mattrs, NFS_MATTR_READAHEAD);
1661	if ((args.flags & NFSMNT_READDIRSIZE) && (args.readdirsize > 0))
1662		NFS_BITMAP_SET(mattrs, NFS_MATTR_READDIR_SIZE);
1663	if ((args.flags & NFSMNT_NOLOCKS) ||
1664	    (args.flags & NFSMNT_LOCALLOCKS)) {
1665		NFS_BITMAP_SET(mattrs, NFS_MATTR_LOCK_MODE);
1666		if (args.flags & NFSMNT_NOLOCKS)
1667			nfslockmode = NFS_LOCK_MODE_DISABLED;
1668		else if (args.flags & NFSMNT_LOCALLOCKS)
1669			nfslockmode = NFS_LOCK_MODE_LOCAL;
1670		else
1671			nfslockmode = NFS_LOCK_MODE_ENABLED;
1672	}
1673	if (args.version >= 4) {
1674		if ((args.flags & NFSMNT_ACREGMIN) && (args.acregmin > 0))
1675			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN);
1676		if ((args.flags & NFSMNT_ACREGMAX) && (args.acregmax > 0))
1677			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX);
1678		if ((args.flags & NFSMNT_ACDIRMIN) && (args.acdirmin > 0))
1679			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN);
1680		if ((args.flags & NFSMNT_ACDIRMAX) && (args.acdirmax > 0))
1681			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX);
1682	}
1683	if (args.version >= 5) {
1684		if ((args.flags & NFSMNT_SECFLAVOR) || (args.flags & NFSMNT_SECSYSOK))
1685			NFS_BITMAP_SET(mattrs, NFS_MATTR_SECURITY);
1686	}
1687	if (args.version >= 6) {
1688		if ((args.flags & NFSMNT_DEADTIMEOUT) && (args.deadtimeout > 0))
1689			NFS_BITMAP_SET(mattrs, NFS_MATTR_DEAD_TIMEOUT);
1690	}
1691
1692	/* build xdr buffer */
1693	xb_init_buffer(&xb, NULL, 0);
1694	xb_add_32(error, &xb, args.version);
1695	argslength_offset = xb_offset(&xb);
1696	xb_add_32(error, &xb, 0); // args length
1697	xb_add_32(error, &xb, NFS_XDRARGS_VERSION_0);
1698	xb_add_bitmap(error, &xb, mattrs, NFS_MATTR_BITMAP_LEN);
1699	attrslength_offset = xb_offset(&xb);
1700	xb_add_32(error, &xb, 0); // attrs length
1701	xb_add_bitmap(error, &xb, mflags_mask, NFS_MFLAG_BITMAP_LEN); /* mask */
1702	xb_add_bitmap(error, &xb, mflags, NFS_MFLAG_BITMAP_LEN); /* value */
1703	xb_add_32(error, &xb, nfsvers);
1704	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
1705		xb_add_32(error, &xb, args.rsize);
1706	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
1707		xb_add_32(error, &xb, args.wsize);
1708	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READDIR_SIZE))
1709		xb_add_32(error, &xb, args.readdirsize);
1710	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READAHEAD))
1711		xb_add_32(error, &xb, args.readahead);
1712	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN)) {
1713		xb_add_32(error, &xb, args.acregmin);
1714		xb_add_32(error, &xb, 0);
1715	}
1716	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX)) {
1717		xb_add_32(error, &xb, args.acregmax);
1718		xb_add_32(error, &xb, 0);
1719	}
1720	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN)) {
1721		xb_add_32(error, &xb, args.acdirmin);
1722		xb_add_32(error, &xb, 0);
1723	}
1724	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX)) {
1725		xb_add_32(error, &xb, args.acdirmax);
1726		xb_add_32(error, &xb, 0);
1727	}
1728	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_LOCK_MODE))
1729		xb_add_32(error, &xb, nfslockmode);
1730	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SECURITY)) {
1731		uint32_t flavors[2], i=0;
1732		if (args.flags & NFSMNT_SECFLAVOR)
1733			flavors[i++] = args.auth;
1734		if ((args.flags & NFSMNT_SECSYSOK) && ((i == 0) || (flavors[0] != RPCAUTH_SYS)))
1735			flavors[i++] = RPCAUTH_SYS;
1736		xb_add_word_array(error, &xb, flavors, i);
1737	}
1738	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MAX_GROUP_LIST))
1739		xb_add_32(error, &xb, args.maxgrouplist);
1740	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOCKET_TYPE))
1741		xb_add_string(error, &xb, ((args.sotype == SOCK_DGRAM) ? "udp" : "tcp"), 3);
1742	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_PORT))
1743		xb_add_32(error, &xb, ((ss.ss_family == AF_INET) ?
1744			ntohs(((struct sockaddr_in*)&ss)->sin_port) :
1745			ntohs(((struct sockaddr_in6*)&ss)->sin6_port)));
1746	/* NFS_MATTR_MOUNT_PORT (not available in old args) */
1747	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REQUEST_TIMEOUT)) {
1748		/* convert from .1s increments to time */
1749		xb_add_32(error, &xb, args.timeo/10);
1750		xb_add_32(error, &xb, (args.timeo%10)*100000000);
1751	}
1752	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT))
1753		xb_add_32(error, &xb, args.retrans);
1754	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_DEAD_TIMEOUT)) {
1755		xb_add_32(error, &xb, args.deadtimeout);
1756		xb_add_32(error, &xb, 0);
1757	}
1758	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FH))
1759		xb_add_fh(error, &xb, &nfh[0], args.fhsize);
1760	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FS_LOCATIONS)) {
1761		xb_add_32(error, &xb, 1); /* fs location count */
1762		xb_add_32(error, &xb, 1); /* server count */
1763		xb_add_string(error, &xb, mntfrom, (endserverp - mntfrom)); /* server name */
1764		xb_add_32(error, &xb, 1); /* address count */
1765		xb_add_string(error, &xb, uaddr, strlen(uaddr)); /* address */
1766		xb_add_32(error, &xb, 0); /* empty server info */
1767		xb_add_32(error, &xb, numcomps); /* pathname component count */
1768		nfsmout_if(error);
1769		p = frompath;
1770		while (*p && (*p == '/'))
1771			p++;
1772		while (*p) {
1773			cp = p;
1774			while (*p && (*p != '/'))
1775				p++;
1776			xb_add_string(error, &xb, cp, (p - cp)); /* component */
1777			nfsmout_if(error);
1778			while (*p && (*p == '/'))
1779				p++;
1780		}
1781		xb_add_32(error, &xb, 0); /* empty fsl info */
1782	}
1783	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFLAGS))
1784		xb_add_32(error, &xb, (vfs_flags(mp) & MNT_VISFLAGMASK)); /* VFS MNT_* flags */
1785	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFROM))
1786		xb_add_string(error, &xb, mntfrom, strlen(mntfrom)); /* fixed f_mntfromname */
1787	xb_build_done(error, &xb);
1788
1789	/* update opaque counts */
1790	end_offset = xb_offset(&xb);
1791	error = xb_seek(&xb, argslength_offset);
1792	xb_add_32(error, &xb, end_offset - argslength_offset + XDRWORD/*version*/);
1793	nfsmout_if(error);
1794	error = xb_seek(&xb, attrslength_offset);
1795	xb_add_32(error, &xb, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
1796
1797	if (!error) {
1798		/* grab the assembled buffer */
1799		*xdrbufp = xb_buffer_base(&xb);
1800		xb.xb_flags &= ~XB_CLEANUP;
1801	}
1802nfsmout:
1803	xb_cleanup(&xb);
1804	FREE_ZONE(mntfrom, MAXPATHLEN, M_NAMEI);
1805	return (error);
1806}
1807
1808/*
1809 * VFS Operations.
1810 *
1811 * mount system call
1812 */
1813int
1814nfs_vfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t ctx)
1815{
1816	int error = 0, inkernel = vfs_iskernelmount(mp);
1817	uint32_t argsversion, argslength;
1818	char *xdrbuf = NULL;
1819
1820	/* read in version */
1821	if (inkernel)
1822		bcopy(CAST_DOWN(void *, data), &argsversion, sizeof(argsversion));
1823	else if ((error = copyin(data, &argsversion, sizeof(argsversion))))
1824		return (error);
1825
1826	/* If we have XDR args, then all values in the buffer are in network order */
1827	if (argsversion == htonl(NFS_ARGSVERSION_XDR))
1828		argsversion = NFS_ARGSVERSION_XDR;
1829
1830	switch (argsversion) {
1831	case 3:
1832	case 4:
1833	case 5:
1834	case 6:
1835		/* convert old-style args to xdr */
1836		error = nfs_convert_old_nfs_args(mp, data, ctx, argsversion, inkernel, &xdrbuf);
1837		break;
1838	case NFS_ARGSVERSION_XDR:
1839		/* copy in xdr buffer */
1840		if (inkernel)
1841			bcopy(CAST_DOWN(void *, (data + XDRWORD)), &argslength, XDRWORD);
1842		else
1843			error = copyin((data + XDRWORD), &argslength, XDRWORD);
1844		if (error)
1845			break;
1846		argslength = ntohl(argslength);
1847		/* put a reasonable limit on the size of the XDR args */
1848		if (argslength > 16*1024) {
1849			error = E2BIG;
1850			break;
1851		}
1852		/* allocate xdr buffer */
1853		xdrbuf = xb_malloc(xdr_rndup(argslength));
1854		if (!xdrbuf) {
1855			error = ENOMEM;
1856			break;
1857		}
1858		if (inkernel)
1859			bcopy(CAST_DOWN(void *, data), xdrbuf, argslength);
1860		else
1861			error = copyin(data, xdrbuf, argslength);
1862		break;
1863	default:
1864		error = EPROGMISMATCH;
1865	}
1866
1867	if (error) {
1868		if (xdrbuf)
1869			xb_free(xdrbuf);
1870		return (error);
1871	}
1872	error = mountnfs(xdrbuf, mp, ctx, &vp);
1873	return (error);
1874}
1875
1876/*
1877 * Common code for mount and mountroot
1878 */
1879
1880/* Set up an NFSv2/v3 mount */
1881int
1882nfs3_mount(
1883	struct nfsmount *nmp,
1884	vfs_context_t ctx,
1885	nfsnode_t *npp)
1886{
1887	int error = 0;
1888	struct nfs_vattr nvattr;
1889	u_int64_t xid;
1890
1891	*npp = NULL;
1892
1893	if (!nmp->nm_fh)
1894		return (EINVAL);
1895
1896	/*
1897	 * Get file attributes for the mountpoint.  These are needed
1898	 * in order to properly create the root vnode.
1899	 */
1900	error = nfs3_getattr_rpc(NULL, nmp->nm_mountp, nmp->nm_fh->fh_data, nmp->nm_fh->fh_len, 0,
1901			ctx, &nvattr, &xid);
1902	if (error)
1903		goto out;
1904
1905	error = nfs_nget(nmp->nm_mountp, NULL, NULL, nmp->nm_fh->fh_data, nmp->nm_fh->fh_len,
1906			&nvattr, &xid, RPCAUTH_UNKNOWN, NG_MARKROOT, npp);
1907	if (*npp)
1908		nfs_node_unlock(*npp);
1909	if (error)
1910		goto out;
1911
1912	/*
1913	 * Try to make sure we have all the general info from the server.
1914	 */
1915	if (nmp->nm_vers == NFS_VER2) {
1916		NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME);
1917		nmp->nm_fsattr.nfsa_maxname = NFS_MAXNAMLEN;
1918	} else if (nmp->nm_vers == NFS_VER3) {
1919		/* get the NFSv3 FSINFO */
1920		error = nfs3_fsinfo(nmp, *npp, ctx);
1921		if (error)
1922			goto out;
1923		/* If the server indicates all pathconf info is */
1924		/* the same, grab a copy of that info now */
1925		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_HOMOGENEOUS) &&
1926		    (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS)) {
1927			struct nfs_fsattr nfsa;
1928			if (!nfs3_pathconf_rpc(*npp, &nfsa, ctx)) {
1929				/* cache a copy of the results */
1930				lck_mtx_lock(&nmp->nm_lock);
1931				nfs3_pathconf_cache(nmp, &nfsa);
1932				lck_mtx_unlock(&nmp->nm_lock);
1933			}
1934		}
1935	}
1936out:
1937	if (*npp && error) {
1938		vnode_put(NFSTOV(*npp));
1939		vnode_recycle(NFSTOV(*npp));
1940		*npp = NULL;
1941	}
1942	return (error);
1943}
1944
1945/*
1946 * Update an NFSv4 mount path with the contents of the symlink.
1947 *
1948 * Read the link for the given file handle.
1949 * Insert the link's components into the path.
1950 */
1951int
1952nfs4_mount_update_path_with_symlink(struct nfsmount *nmp, struct nfs_fs_path *nfsp, uint32_t curcomp, fhandle_t *dirfhp, int *depthp, fhandle_t *fhp, vfs_context_t ctx)
1953{
1954	int error = 0, status, numops;
1955	uint32_t len = 0, comp, newcomp, linkcompcount;
1956	u_int64_t xid;
1957	struct nfsm_chain nmreq, nmrep;
1958	struct nfsreq rq, *req = &rq;
1959	struct nfsreq_secinfo_args si;
1960	char *link = NULL, *p, *q, ch;
1961	struct nfs_fs_path nfsp2;
1962
1963	bzero(&nfsp2, sizeof(nfsp2));
1964	if (dirfhp->fh_len)
1965		NFSREQ_SECINFO_SET(&si, NULL, dirfhp->fh_data, dirfhp->fh_len, nfsp->np_components[curcomp], 0);
1966	else
1967		NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, nfsp->np_components[curcomp], 0);
1968	nfsm_chain_null(&nmreq);
1969	nfsm_chain_null(&nmrep);
1970
1971	MALLOC_ZONE(link, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1972	if (!link)
1973		error = ENOMEM;
1974
1975	// PUTFH, READLINK
1976	numops = 2;
1977	nfsm_chain_build_alloc_init(error, &nmreq, 12 * NFSX_UNSIGNED);
1978	nfsm_chain_add_compound_header(error, &nmreq, "readlink", numops);
1979	numops--;
1980	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
1981	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, fhp->fh_data, fhp->fh_len);
1982	numops--;
1983	nfsm_chain_add_32(error, &nmreq, NFS_OP_READLINK);
1984	nfsm_chain_build_done(error, &nmreq);
1985	nfsm_assert(error, (numops == 0), EPROTO);
1986	nfsmout_if(error);
1987
1988	error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
1989			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
1990	if (!error)
1991		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
1992
1993	nfsm_chain_skip_tag(error, &nmrep);
1994	nfsm_chain_get_32(error, &nmrep, numops);
1995	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
1996	nfsm_chain_op_check(error, &nmrep, NFS_OP_READLINK);
1997	nfsm_chain_get_32(error, &nmrep, len);
1998	nfsmout_if(error);
1999	if (len == 0)
2000		error = ENOENT;
2001	else if (len >= MAXPATHLEN)
2002		len = MAXPATHLEN - 1;
2003	nfsm_chain_get_opaque(error, &nmrep, len, link);
2004	nfsmout_if(error);
2005	/* make sure link string is terminated properly */
2006	link[len] = '\0';
2007
2008	/* count the number of components in link */
2009	p = link;
2010	while (*p && (*p == '/'))
2011		p++;
2012	linkcompcount = 0;
2013	while (*p) {
2014		linkcompcount++;
2015		while (*p && (*p != '/'))
2016			p++;
2017		while (*p && (*p == '/'))
2018			p++;
2019	}
2020
2021	/* free up used components */
2022	for (comp=0; comp <= curcomp; comp++) {
2023		if (nfsp->np_components[comp]) {
2024			FREE(nfsp->np_components[comp], M_TEMP);
2025			nfsp->np_components[comp] = NULL;
2026		}
2027	}
2028
2029	/* set up new path */
2030	nfsp2.np_compcount = nfsp->np_compcount - curcomp - 1 + linkcompcount;
2031	MALLOC(nfsp2.np_components, char **, nfsp2.np_compcount*sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
2032	if (!nfsp2.np_components) {
2033		error = ENOMEM;
2034		goto nfsmout;
2035	}
2036
2037	/* add link components */
2038	p = link;
2039	while (*p && (*p == '/'))
2040		p++;
2041	for (newcomp=0; newcomp < linkcompcount; newcomp++) {
2042		/* find end of component */
2043		q = p;
2044		while (*q && (*q != '/'))
2045			q++;
2046		MALLOC(nfsp2.np_components[newcomp], char *, q-p+1, M_TEMP, M_WAITOK|M_ZERO);
2047		if (!nfsp2.np_components[newcomp]) {
2048			error = ENOMEM;
2049			break;
2050		}
2051		ch = *q;
2052		*q = '\0';
2053		strlcpy(nfsp2.np_components[newcomp], p, q-p+1);
2054		*q = ch;
2055		p = q;
2056		while (*p && (*p == '/'))
2057			p++;
2058	}
2059	nfsmout_if(error);
2060
2061	/* add remaining components */
2062	for(comp = curcomp + 1; comp < nfsp->np_compcount; comp++,newcomp++) {
2063		nfsp2.np_components[newcomp] = nfsp->np_components[comp];
2064		nfsp->np_components[comp] = NULL;
2065	}
2066
2067	/* move new path into place */
2068	FREE(nfsp->np_components, M_TEMP);
2069	nfsp->np_components = nfsp2.np_components;
2070	nfsp->np_compcount = nfsp2.np_compcount;
2071	nfsp2.np_components = NULL;
2072
2073	/* for absolute link, let the caller now that the next dirfh is root */
2074	if (link[0] == '/') {
2075		dirfhp->fh_len = 0;
2076		*depthp = 0;
2077	}
2078nfsmout:
2079	if (link)
2080		FREE_ZONE(link, MAXPATHLEN, M_NAMEI);
2081	if (nfsp2.np_components) {
2082		for (comp=0; comp < nfsp2.np_compcount; comp++)
2083			if (nfsp2.np_components[comp])
2084				FREE(nfsp2.np_components[comp], M_TEMP);
2085		FREE(nfsp2.np_components, M_TEMP);
2086	}
2087	nfsm_chain_cleanup(&nmreq);
2088	nfsm_chain_cleanup(&nmrep);
2089	return (error);
2090}
2091
2092/* Set up an NFSv4 mount */
2093int
2094nfs4_mount(
2095	struct nfsmount *nmp,
2096	vfs_context_t ctx,
2097	nfsnode_t *npp)
2098{
2099	struct nfsm_chain nmreq, nmrep;
2100	int error = 0, numops, status, interval, isdotdot, loopcnt = 0, depth = 0;
2101	struct nfs_fs_path fspath, *nfsp, fspath2;
2102	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], comp, comp2;
2103	fhandle_t fh, dirfh;
2104	struct nfs_vattr nvattr;
2105	u_int64_t xid;
2106	struct nfsreq rq, *req = &rq;
2107	struct nfsreq_secinfo_args si;
2108	struct nfs_sec sec;
2109	struct nfs_fs_locations nfsls;
2110
2111	*npp = NULL;
2112	fh.fh_len = dirfh.fh_len = 0;
2113	TAILQ_INIT(&nmp->nm_open_owners);
2114	TAILQ_INIT(&nmp->nm_delegations);
2115	TAILQ_INIT(&nmp->nm_dreturnq);
2116	nmp->nm_stategenid = 1;
2117	NVATTR_INIT(&nvattr);
2118	bzero(&nfsls, sizeof(nfsls));
2119	nfsm_chain_null(&nmreq);
2120	nfsm_chain_null(&nmrep);
2121
2122	/*
2123	 * If no security flavors were specified we'll want to default to the server's
2124	 * preferred flavor.  For NFSv4.0 we need a file handle and name to get that via
2125	 * SECINFO, so we'll do that on the last component of the server path we are
2126	 * mounting.  If we are mounting the server's root, we'll need to defer the
2127	 * SECINFO call to the first successful LOOKUP request.
2128	 */
2129	if (!nmp->nm_sec.count)
2130		nmp->nm_state |= NFSSTA_NEEDSECINFO;
2131
2132	/* make a copy of the current location's path */
2133	nfsp = &nmp->nm_locations.nl_locations[nmp->nm_locations.nl_current.nli_loc]->nl_path;
2134	bzero(&fspath, sizeof(fspath));
2135	fspath.np_compcount = nfsp->np_compcount;
2136	if (fspath.np_compcount > 0) {
2137		MALLOC(fspath.np_components, char **, fspath.np_compcount*sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
2138		if (!fspath.np_components) {
2139			error = ENOMEM;
2140			goto nfsmout;
2141		}
2142		for (comp=0; comp < nfsp->np_compcount; comp++) {
2143			int slen = strlen(nfsp->np_components[comp]);
2144			MALLOC(fspath.np_components[comp], char *, slen+1, M_TEMP, M_WAITOK|M_ZERO);
2145			if (!fspath.np_components[comp]) {
2146				error = ENOMEM;
2147				break;
2148			}
2149			strlcpy(fspath.np_components[comp], nfsp->np_components[comp], slen+1);
2150		}
2151		if (error)
2152			goto nfsmout;
2153	}
2154
2155	/* for mirror mounts, we can just use the file handle passed in */
2156	if (nmp->nm_fh) {
2157		dirfh.fh_len = nmp->nm_fh->fh_len;
2158		bcopy(nmp->nm_fh->fh_data, dirfh.fh_data, dirfh.fh_len);
2159		NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, NULL, 0);
2160		goto gotfh;
2161	}
2162
2163	/* otherwise, we need to get the fh for the directory we are mounting */
2164
2165	/* if no components, just get root */
2166	if (fspath.np_compcount == 0) {
2167nocomponents:
2168		// PUTROOTFH + GETATTR(FH)
2169		NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, NULL, 0);
2170		numops = 2;
2171		nfsm_chain_build_alloc_init(error, &nmreq, 9 * NFSX_UNSIGNED);
2172		nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
2173		numops--;
2174		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTROOTFH);
2175		numops--;
2176		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
2177		NFS_CLEAR_ATTRIBUTES(bitmap);
2178		NFS4_DEFAULT_ATTRIBUTES(bitmap);
2179		NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
2180		nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
2181		nfsm_chain_build_done(error, &nmreq);
2182		nfsm_assert(error, (numops == 0), EPROTO);
2183		nfsmout_if(error);
2184		error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
2185				vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
2186		if (!error)
2187			error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2188		nfsm_chain_skip_tag(error, &nmrep);
2189		nfsm_chain_get_32(error, &nmrep, numops);
2190		nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTROOTFH);
2191		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
2192		nfsmout_if(error);
2193		NFS_CLEAR_ATTRIBUTES(nmp->nm_fsattr.nfsa_bitmap);
2194		error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, &dirfh, NULL, NULL);
2195		if (!error && !NFS_BITMAP_ISSET(&nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
2196			printf("nfs: mount didn't return filehandle?\n");
2197			error = EBADRPC;
2198		}
2199		nfsmout_if(error);
2200		nfsm_chain_cleanup(&nmrep);
2201		nfsm_chain_null(&nmreq);
2202		NVATTR_CLEANUP(&nvattr);
2203		goto gotfh;
2204	}
2205
2206	/* look up each path component */
2207	for (comp=0; comp < fspath.np_compcount; ) {
2208		isdotdot = 0;
2209		if (fspath.np_components[comp][0] == '.') {
2210			if (fspath.np_components[comp][1] == '\0') {
2211				/* skip "." */
2212				comp++;
2213				continue;
2214			}
2215			/* treat ".." specially */
2216			if ((fspath.np_components[comp][1] == '.') &&
2217			    (fspath.np_components[comp][2] == '\0'))
2218			    	isdotdot = 1;
2219			if (isdotdot && (dirfh.fh_len == 0)) {
2220				/* ".." in root directory is same as "." */
2221				comp++;
2222				continue;
2223			}
2224		}
2225		// PUT(ROOT)FH + LOOKUP(P) + GETFH + GETATTR
2226		if (dirfh.fh_len == 0)
2227			NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, isdotdot ? NULL : fspath.np_components[comp], 0);
2228		else
2229			NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, isdotdot ? NULL : fspath.np_components[comp], 0);
2230		numops = 4;
2231		nfsm_chain_build_alloc_init(error, &nmreq, 18 * NFSX_UNSIGNED);
2232		nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
2233		numops--;
2234		if (dirfh.fh_len) {
2235			nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
2236			nfsm_chain_add_fh(error, &nmreq, NFS_VER4, dirfh.fh_data, dirfh.fh_len);
2237		} else {
2238			nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTROOTFH);
2239		}
2240		numops--;
2241		if (isdotdot) {
2242			nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUPP);
2243		} else {
2244			nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUP);
2245			nfsm_chain_add_name(error, &nmreq,
2246				fspath.np_components[comp], strlen(fspath.np_components[comp]), nmp);
2247		}
2248		numops--;
2249		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETFH);
2250		numops--;
2251		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
2252		NFS_CLEAR_ATTRIBUTES(bitmap);
2253		NFS4_DEFAULT_ATTRIBUTES(bitmap);
2254		/* if no namedattr support or component is ".zfs", clear NFS_FATTR_NAMED_ATTR */
2255		if (NMFLAG(nmp, NONAMEDATTR) || !strcmp(fspath.np_components[comp], ".zfs"))
2256			NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
2257		nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
2258		nfsm_chain_build_done(error, &nmreq);
2259		nfsm_assert(error, (numops == 0), EPROTO);
2260		nfsmout_if(error);
2261		error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
2262				vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
2263		if (!error)
2264			error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2265		nfsm_chain_skip_tag(error, &nmrep);
2266		nfsm_chain_get_32(error, &nmrep, numops);
2267		nfsm_chain_op_check(error, &nmrep, dirfh.fh_len ? NFS_OP_PUTFH : NFS_OP_PUTROOTFH);
2268		nfsm_chain_op_check(error, &nmrep, isdotdot ? NFS_OP_LOOKUPP : NFS_OP_LOOKUP);
2269		nfsmout_if(error);
2270		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETFH);
2271		nfsm_chain_get_32(error, &nmrep, fh.fh_len);
2272		nfsm_chain_get_opaque(error, &nmrep, fh.fh_len, fh.fh_data);
2273		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
2274		if (!error) {
2275			NFS_CLEAR_ATTRIBUTES(nmp->nm_fsattr.nfsa_bitmap);
2276			error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL, &nfsls);
2277		}
2278		nfsm_chain_cleanup(&nmrep);
2279		nfsm_chain_null(&nmreq);
2280		if (error) {
2281			/* LOOKUP succeeded but GETATTR failed?  This could be a referral. */
2282			/* Try the lookup again with a getattr for fs_locations. */
2283			nfs_fs_locations_cleanup(&nfsls);
2284			error = nfs4_get_fs_locations(nmp, NULL, dirfh.fh_data, dirfh.fh_len, fspath.np_components[comp], ctx, &nfsls);
2285			if (!error && (nfsls.nl_numlocs < 1))
2286				error = ENOENT;
2287			nfsmout_if(error);
2288			if (++loopcnt > MAXSYMLINKS) {
2289				/* too many symlink/referral redirections */
2290				error = ELOOP;
2291				goto nfsmout;
2292			}
2293			/* tear down the current connection */
2294			nfs_disconnect(nmp);
2295			/* replace fs locations */
2296			nfs_fs_locations_cleanup(&nmp->nm_locations);
2297			nmp->nm_locations = nfsls;
2298			bzero(&nfsls, sizeof(nfsls));
2299			/* initiate a connection using the new fs locations */
2300			error = nfs_mount_connect(nmp);
2301			if (!error && !(nmp->nm_locations.nl_current.nli_flags & NLI_VALID))
2302				error = EIO;
2303			nfsmout_if(error);
2304			/* add new server's remote path to beginning of our path and continue */
2305			nfsp = &nmp->nm_locations.nl_locations[nmp->nm_locations.nl_current.nli_loc]->nl_path;
2306			bzero(&fspath2, sizeof(fspath2));
2307			fspath2.np_compcount = (fspath.np_compcount - comp - 1) + nfsp->np_compcount;
2308			if (fspath2.np_compcount > 0) {
2309				MALLOC(fspath2.np_components, char **, fspath2.np_compcount*sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
2310				if (!fspath2.np_components) {
2311					error = ENOMEM;
2312					goto nfsmout;
2313				}
2314				for (comp2=0; comp2 < nfsp->np_compcount; comp2++) {
2315					int slen = strlen(nfsp->np_components[comp2]);
2316					MALLOC(fspath2.np_components[comp2], char *, slen+1, M_TEMP, M_WAITOK|M_ZERO);
2317					if (!fspath2.np_components[comp2]) {
2318						/* clean up fspath2, then error out */
2319						while (comp2 > 0) {
2320							comp2--;
2321							FREE(fspath2.np_components[comp2], M_TEMP);
2322						}
2323						FREE(fspath2.np_components, M_TEMP);
2324						error = ENOMEM;
2325						goto nfsmout;
2326					}
2327					strlcpy(fspath2.np_components[comp2], nfsp->np_components[comp2], slen+1);
2328				}
2329				if ((fspath.np_compcount - comp - 1) > 0)
2330					bcopy(&fspath.np_components[comp+1], &fspath2.np_components[nfsp->np_compcount], (fspath.np_compcount - comp - 1)*sizeof(char*));
2331				/* free up unused parts of old path (prior components and component array) */
2332				do {
2333					FREE(fspath.np_components[comp], M_TEMP);
2334				} while (comp-- > 0);
2335				FREE(fspath.np_components, M_TEMP);
2336				/* put new path in place */
2337				fspath = fspath2;
2338			}
2339			/* reset dirfh and component index */
2340			dirfh.fh_len = 0;
2341			comp = 0;
2342			NVATTR_CLEANUP(&nvattr);
2343			if (fspath.np_compcount == 0)
2344				goto nocomponents;
2345			continue;
2346		}
2347		nfsmout_if(error);
2348		/* if file handle is for a symlink, then update the path with the symlink contents */
2349		if (NFS_BITMAP_ISSET(&nvattr.nva_bitmap, NFS_FATTR_TYPE) && (nvattr.nva_type == VLNK)) {
2350			if (++loopcnt > MAXSYMLINKS)
2351				error = ELOOP;
2352			else
2353				error = nfs4_mount_update_path_with_symlink(nmp, &fspath, comp, &dirfh, &depth, &fh, ctx);
2354			nfsmout_if(error);
2355			/* directory file handle is either left the same or reset to root (if link was absolute) */
2356			/* path traversal starts at beginning of the path again */
2357			comp = 0;
2358			NVATTR_CLEANUP(&nvattr);
2359			nfs_fs_locations_cleanup(&nfsls);
2360			continue;
2361		}
2362		NVATTR_CLEANUP(&nvattr);
2363		nfs_fs_locations_cleanup(&nfsls);
2364		/* not a symlink... */
2365		if ((nmp->nm_state & NFSSTA_NEEDSECINFO) && (comp == (fspath.np_compcount-1)) && !isdotdot) {
2366			/* need to get SECINFO for the directory being mounted */
2367			if (dirfh.fh_len == 0)
2368				NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, isdotdot ? NULL : fspath.np_components[comp], 0);
2369			else
2370				NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, isdotdot ? NULL : fspath.np_components[comp], 0);
2371			sec.count = NX_MAX_SEC_FLAVORS;
2372			error = nfs4_secinfo_rpc(nmp, &si, vfs_context_ucred(ctx), sec.flavors, &sec.count);
2373			/* [sigh] some implementations return "illegal" error for unsupported ops */
2374			if (error == NFSERR_OP_ILLEGAL)
2375				error = 0;
2376			nfsmout_if(error);
2377			/* set our default security flavor to the first in the list */
2378			if (sec.count)
2379				nmp->nm_auth = sec.flavors[0];
2380			nmp->nm_state &= ~NFSSTA_NEEDSECINFO;
2381		}
2382		/* advance directory file handle, component index, & update depth */
2383		dirfh = fh;
2384		comp++;
2385		if (!isdotdot) /* going down the hierarchy */
2386			depth++;
2387		else if (--depth <= 0)  /* going up the hierarchy */
2388			dirfh.fh_len = 0; /* clear dirfh when we hit root */
2389	}
2390
2391gotfh:
2392	/* get attrs for mount point root */
2393	numops = NMFLAG(nmp, NONAMEDATTR) ? 2 : 3; // PUTFH + GETATTR + OPENATTR
2394	nfsm_chain_build_alloc_init(error, &nmreq, 25 * NFSX_UNSIGNED);
2395	nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
2396	numops--;
2397	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
2398	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, dirfh.fh_data, dirfh.fh_len);
2399	numops--;
2400	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
2401	NFS_CLEAR_ATTRIBUTES(bitmap);
2402	NFS4_DEFAULT_ATTRIBUTES(bitmap);
2403	/* if no namedattr support or last component is ".zfs", clear NFS_FATTR_NAMED_ATTR */
2404	if (NMFLAG(nmp, NONAMEDATTR) || ((fspath.np_compcount > 0) && !strcmp(fspath.np_components[fspath.np_compcount-1], ".zfs")))
2405		NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
2406	nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
2407	if (!NMFLAG(nmp, NONAMEDATTR)) {
2408		numops--;
2409		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPENATTR);
2410		nfsm_chain_add_32(error, &nmreq, 0);
2411	}
2412	nfsm_chain_build_done(error, &nmreq);
2413	nfsm_assert(error, (numops == 0), EPROTO);
2414	nfsmout_if(error);
2415	error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
2416			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
2417	if (!error)
2418		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2419	nfsm_chain_skip_tag(error, &nmrep);
2420	nfsm_chain_get_32(error, &nmrep, numops);
2421	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
2422	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
2423	nfsmout_if(error);
2424	NFS_CLEAR_ATTRIBUTES(nmp->nm_fsattr.nfsa_bitmap);
2425	error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL, NULL);
2426	nfsmout_if(error);
2427	if (!NMFLAG(nmp, NONAMEDATTR)) {
2428		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPENATTR);
2429		if (error == ENOENT)
2430			error = 0;
2431		/* [sigh] some implementations return "illegal" error for unsupported ops */
2432		if (error || !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_NAMED_ATTR)) {
2433			nmp->nm_fsattr.nfsa_flags &= ~NFS_FSFLAG_NAMED_ATTR;
2434		} else {
2435			nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_NAMED_ATTR;
2436		}
2437	} else {
2438		nmp->nm_fsattr.nfsa_flags &= ~NFS_FSFLAG_NAMED_ATTR;
2439	}
2440	if (NMFLAG(nmp, NOACL)) /* make sure ACL support is turned off */
2441		nmp->nm_fsattr.nfsa_flags &= ~NFS_FSFLAG_ACL;
2442	if (NMFLAG(nmp, ACLONLY) && !(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL))
2443		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_ACLONLY);
2444	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_FH_EXPIRE_TYPE)) {
2445		uint32_t fhtype = ((nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_FHTYPE_MASK) >> NFS_FSFLAG_FHTYPE_SHIFT);
2446		if (fhtype != NFS_FH_PERSISTENT)
2447			printf("nfs: warning: non-persistent file handles! for %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
2448	}
2449
2450	/* make sure it's a directory */
2451	if (!NFS_BITMAP_ISSET(&nvattr.nva_bitmap, NFS_FATTR_TYPE) || (nvattr.nva_type != VDIR)) {
2452		error = ENOTDIR;
2453		goto nfsmout;
2454	}
2455
2456	/* save the NFS fsid */
2457	nmp->nm_fsid = nvattr.nva_fsid;
2458
2459	/* create the root node */
2460	error = nfs_nget(nmp->nm_mountp, NULL, NULL, dirfh.fh_data, dirfh.fh_len, &nvattr, &xid, rq.r_auth, NG_MARKROOT, npp);
2461	nfsmout_if(error);
2462
2463	if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL)
2464		vfs_setextendedsecurity(nmp->nm_mountp);
2465
2466	/* adjust I/O sizes to server limits */
2467	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXREAD) && (nmp->nm_fsattr.nfsa_maxread > 0)) {
2468		if (nmp->nm_fsattr.nfsa_maxread < (uint64_t)nmp->nm_rsize) {
2469			nmp->nm_rsize = nmp->nm_fsattr.nfsa_maxread & ~(NFS_FABLKSIZE - 1);
2470			if (nmp->nm_rsize == 0)
2471				nmp->nm_rsize = nmp->nm_fsattr.nfsa_maxread;
2472		}
2473	}
2474	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXWRITE) && (nmp->nm_fsattr.nfsa_maxwrite > 0)) {
2475		if (nmp->nm_fsattr.nfsa_maxwrite < (uint64_t)nmp->nm_wsize) {
2476			nmp->nm_wsize = nmp->nm_fsattr.nfsa_maxwrite & ~(NFS_FABLKSIZE - 1);
2477			if (nmp->nm_wsize == 0)
2478				nmp->nm_wsize = nmp->nm_fsattr.nfsa_maxwrite;
2479		}
2480	}
2481
2482	/* set up lease renew timer */
2483	nmp->nm_renew_timer = thread_call_allocate(nfs4_renew_timer, nmp);
2484	interval = nmp->nm_fsattr.nfsa_lease / 2;
2485	if (interval < 1)
2486		interval = 1;
2487	nfs_interval_timer_start(nmp->nm_renew_timer, interval * 1000);
2488
2489nfsmout:
2490	if (fspath.np_components) {
2491		for (comp=0; comp < fspath.np_compcount; comp++)
2492			if (fspath.np_components[comp])
2493				FREE(fspath.np_components[comp], M_TEMP);
2494		FREE(fspath.np_components, M_TEMP);
2495	}
2496	NVATTR_CLEANUP(&nvattr);
2497	nfs_fs_locations_cleanup(&nfsls);
2498	if (*npp)
2499		nfs_node_unlock(*npp);
2500	nfsm_chain_cleanup(&nmreq);
2501	nfsm_chain_cleanup(&nmrep);
2502	return (error);
2503}
2504
2505/*
2506 * Thread to handle initial NFS mount connection.
2507 */
2508void
2509nfs_mount_connect_thread(void *arg, __unused wait_result_t wr)
2510{
2511	struct nfsmount *nmp = arg;
2512	int error = 0, savederror = 0, slpflag = (NMFLAG(nmp, INTR) ? PCATCH : 0);
2513	int done = 0, timeo, tries, maxtries;
2514
2515	if (NM_OMFLAG(nmp, MNTQUICK)) {
2516		timeo = 8;
2517		maxtries = 1;
2518	} else {
2519		timeo = 30;
2520		maxtries = 2;
2521	}
2522
2523	for (tries = 0; tries < maxtries; tries++) {
2524		error = nfs_connect(nmp, 1, timeo);
2525		switch (error) {
2526		case ETIMEDOUT:
2527		case EAGAIN:
2528		case EPIPE:
2529		case EADDRNOTAVAIL:
2530		case ENETDOWN:
2531		case ENETUNREACH:
2532		case ENETRESET:
2533		case ECONNABORTED:
2534		case ECONNRESET:
2535		case EISCONN:
2536		case ENOTCONN:
2537		case ESHUTDOWN:
2538		case ECONNREFUSED:
2539		case EHOSTDOWN:
2540		case EHOSTUNREACH:
2541			/* just keep retrying on any of these errors */
2542			break;
2543		case 0:
2544		default:
2545			/* looks like we got an answer... */
2546			done = 1;
2547			break;
2548		}
2549
2550		/* save the best error */
2551		if (nfs_connect_error_class(error) >= nfs_connect_error_class(savederror))
2552			savederror = error;
2553		if (done) {
2554			error = savederror;
2555			break;
2556		}
2557
2558		/* pause before next attempt */
2559		if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0)))
2560			break;
2561		error = tsleep(nmp, PSOCK|slpflag, "nfs_mount_connect_retry", 2*hz);
2562		if (error && (error != EWOULDBLOCK))
2563			break;
2564		error = savederror;
2565	}
2566
2567	/* update status of mount connect */
2568	lck_mtx_lock(&nmp->nm_lock);
2569	if (!nmp->nm_mounterror)
2570		nmp->nm_mounterror = error;
2571	nmp->nm_state &= ~NFSSTA_MOUNT_THREAD;
2572	lck_mtx_unlock(&nmp->nm_lock);
2573	wakeup(&nmp->nm_nss);
2574}
2575
2576int
2577nfs_mount_connect(struct nfsmount *nmp)
2578{
2579	int error = 0, slpflag;
2580	thread_t thd;
2581	struct timespec ts = { 2, 0 };
2582
2583	/*
2584	 * Set up the socket.  Perform initial search for a location/server/address to
2585	 * connect to and negotiate any unspecified mount parameters.  This work is
2586	 * done on a kernel thread to satisfy reserved port usage needs.
2587	 */
2588	slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
2589	lck_mtx_lock(&nmp->nm_lock);
2590	/* set flag that the thread is running */
2591	nmp->nm_state |= NFSSTA_MOUNT_THREAD;
2592	if (kernel_thread_start(nfs_mount_connect_thread, nmp, &thd) != KERN_SUCCESS) {
2593		nmp->nm_state &= ~NFSSTA_MOUNT_THREAD;
2594		nmp->nm_mounterror = EIO;
2595		printf("nfs mount %s start socket connect thread failed\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
2596	} else {
2597		thread_deallocate(thd);
2598	}
2599
2600	/* wait until mount connect thread is finished/gone */
2601	while (nmp->nm_state & NFSSTA_MOUNT_THREAD) {
2602		error = msleep(&nmp->nm_nss, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectthread", &ts);
2603		if ((error && (error != EWOULDBLOCK)) || ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))) {
2604			/* record error */
2605			if (!nmp->nm_mounterror)
2606				nmp->nm_mounterror = error;
2607			/* signal the thread that we are aborting */
2608			nmp->nm_sockflags |= NMSOCK_UNMOUNT;
2609			if (nmp->nm_nss)
2610				wakeup(nmp->nm_nss);
2611			/* and continue waiting on it to finish */
2612			slpflag = 0;
2613		}
2614	}
2615	lck_mtx_unlock(&nmp->nm_lock);
2616
2617	/* grab mount connect status */
2618	error = nmp->nm_mounterror;
2619
2620	return (error);
2621}
2622
2623/*
2624 * Common code to mount an NFS file system.
2625 */
2626int
2627mountnfs(
2628	char *xdrbuf,
2629	mount_t mp,
2630	vfs_context_t ctx,
2631	vnode_t *vpp)
2632{
2633	struct nfsmount *nmp;
2634	nfsnode_t np;
2635	int error = 0;
2636	struct vfsstatfs *sbp;
2637	struct xdrbuf xb;
2638	uint32_t i, val, vers = 0, minorvers, maxio, iosize, len;
2639	uint32_t *mattrs;
2640	uint32_t *mflags_mask;
2641	uint32_t *mflags;
2642	uint32_t argslength, attrslength;
2643	struct nfs_location_index firstloc = { NLI_VALID, 0, 0, 0 };
2644
2645	/* make sure mbuf constants are set up */
2646	if (!nfs_mbuf_mhlen)
2647		nfs_mbuf_init();
2648
2649	if (vfs_flags(mp) & MNT_UPDATE) {
2650		nmp = VFSTONFS(mp);
2651		/* update paths, file handles, etc, here	XXX */
2652		xb_free(xdrbuf);
2653		return (0);
2654	} else {
2655		/* allocate an NFS mount structure for this mount */
2656		MALLOC_ZONE(nmp, struct nfsmount *,
2657				sizeof (struct nfsmount), M_NFSMNT, M_WAITOK);
2658		if (!nmp) {
2659			xb_free(xdrbuf);
2660			return (ENOMEM);
2661		}
2662		bzero((caddr_t)nmp, sizeof (struct nfsmount));
2663		lck_mtx_init(&nmp->nm_lock, nfs_mount_grp, LCK_ATTR_NULL);
2664		TAILQ_INIT(&nmp->nm_resendq);
2665		TAILQ_INIT(&nmp->nm_iodq);
2666		TAILQ_INIT(&nmp->nm_gsscl);
2667		LIST_INIT(&nmp->nm_monlist);
2668		vfs_setfsprivate(mp, nmp);
2669		vfs_getnewfsid(mp);
2670		nmp->nm_mountp = mp;
2671		vfs_setauthopaque(mp);
2672
2673		nfs_nhinit_finish();
2674
2675		nmp->nm_args = xdrbuf;
2676
2677		/* set up defaults */
2678		nmp->nm_vers = 0;
2679		nmp->nm_timeo = NFS_TIMEO;
2680		nmp->nm_retry = NFS_RETRANS;
2681		nmp->nm_sotype = 0;
2682		nmp->nm_sofamily = 0;
2683		nmp->nm_nfsport = 0;
2684		nmp->nm_wsize = NFS_WSIZE;
2685		nmp->nm_rsize = NFS_RSIZE;
2686		nmp->nm_readdirsize = NFS_READDIRSIZE;
2687		nmp->nm_numgrps = NFS_MAXGRPS;
2688		nmp->nm_readahead = NFS_DEFRAHEAD;
2689		nmp->nm_tprintf_delay = nfs_tprintf_delay;
2690		if (nmp->nm_tprintf_delay < 0)
2691			nmp->nm_tprintf_delay = 0;
2692		nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
2693		if (nmp->nm_tprintf_initial_delay < 0)
2694			nmp->nm_tprintf_initial_delay = 0;
2695		nmp->nm_acregmin = NFS_MINATTRTIMO;
2696		nmp->nm_acregmax = NFS_MAXATTRTIMO;
2697		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
2698		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
2699		nmp->nm_auth = RPCAUTH_SYS;
2700		nmp->nm_deadtimeout = 0;
2701		nmp->nm_curdeadtimeout = 0;
2702		NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_NOACL);
2703	}
2704
2705	mattrs = nmp->nm_mattrs;
2706	mflags = nmp->nm_mflags;
2707	mflags_mask = nmp->nm_mflags_mask;
2708
2709	/* set up NFS mount with args */
2710	xb_init_buffer(&xb, xdrbuf, 2*XDRWORD);
2711	xb_get_32(error, &xb, val); /* version */
2712	xb_get_32(error, &xb, argslength); /* args length */
2713	nfsmerr_if(error);
2714	xb_init_buffer(&xb, xdrbuf, argslength);	/* restart parsing with actual buffer length */
2715	xb_get_32(error, &xb, val); /* version */
2716	xb_get_32(error, &xb, argslength); /* args length */
2717	xb_get_32(error, &xb, val); /* XDR args version */
2718	if (val != NFS_XDRARGS_VERSION_0)
2719		error = EINVAL;
2720	len = NFS_MATTR_BITMAP_LEN;
2721	xb_get_bitmap(error, &xb, mattrs, len); /* mount attribute bitmap */
2722	attrslength = 0;
2723	xb_get_32(error, &xb, attrslength); /* attrs length */
2724	if (!error && (attrslength > (argslength - ((4+NFS_MATTR_BITMAP_LEN+1)*XDRWORD))))
2725		error = EINVAL;
2726	nfsmerr_if(error);
2727	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FLAGS)) {
2728		len = NFS_MFLAG_BITMAP_LEN;
2729		xb_get_bitmap(error, &xb, mflags_mask, len); /* mount flag mask */
2730		len = NFS_MFLAG_BITMAP_LEN;
2731		xb_get_bitmap(error, &xb, mflags, len); /* mount flag values */
2732		if (!error) {
2733			/* clear all mask bits and OR in all the ones that are set */
2734			nmp->nm_flags[0] &= ~mflags_mask[0];
2735			nmp->nm_flags[0] |= (mflags_mask[0] & mflags[0]);
2736		}
2737	}
2738	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION)) {
2739		xb_get_32(error, &xb, vers);
2740		if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION))
2741			xb_get_32(error, &xb, minorvers);
2742		else
2743			minorvers = 0;
2744		nfsmerr_if(error);
2745		switch (vers) {
2746		case 2:
2747			nmp->nm_vers = NFS_VER2;
2748			break;
2749		case 3:
2750			nmp->nm_vers = NFS_VER3;
2751			break;
2752		case 4:
2753			switch (minorvers) {
2754			case 0:
2755				nmp->nm_vers = NFS_VER4;
2756				break;
2757			default:
2758				error = EINVAL;
2759			}
2760			break;
2761		default:
2762			error = EINVAL;
2763		}
2764	}
2765	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION)) {
2766		/* should have also gotten NFS version (and already gotten minorvers) */
2767		if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION))
2768			error = EINVAL;
2769	}
2770	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
2771		xb_get_32(error, &xb, nmp->nm_rsize);
2772	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
2773		xb_get_32(error, &xb, nmp->nm_wsize);
2774	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READDIR_SIZE))
2775		xb_get_32(error, &xb, nmp->nm_readdirsize);
2776	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READAHEAD))
2777		xb_get_32(error, &xb, nmp->nm_readahead);
2778	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN)) {
2779		xb_get_32(error, &xb, nmp->nm_acregmin);
2780		xb_skip(error, &xb, XDRWORD);
2781	}
2782	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX)) {
2783		xb_get_32(error, &xb, nmp->nm_acregmax);
2784		xb_skip(error, &xb, XDRWORD);
2785	}
2786	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN)) {
2787		xb_get_32(error, &xb, nmp->nm_acdirmin);
2788		xb_skip(error, &xb, XDRWORD);
2789	}
2790	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX)) {
2791		xb_get_32(error, &xb, nmp->nm_acdirmax);
2792		xb_skip(error, &xb, XDRWORD);
2793	}
2794	nfsmerr_if(error);
2795	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_LOCK_MODE)) {
2796		xb_get_32(error, &xb, val);
2797		switch (val) {
2798		case NFS_LOCK_MODE_DISABLED:
2799		case NFS_LOCK_MODE_LOCAL:
2800			if (nmp->nm_vers >= NFS_VER4) {
2801				/* disabled/local lock mode only allowed on v2/v3 */
2802				error = EINVAL;
2803				break;
2804			}
2805			/* FALLTHROUGH */
2806		case NFS_LOCK_MODE_ENABLED:
2807			nmp->nm_lockmode = val;
2808			break;
2809		default:
2810			error = EINVAL;
2811		}
2812	}
2813	nfsmerr_if(error);
2814	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SECURITY)) {
2815		uint32_t seccnt;
2816		xb_get_32(error, &xb, seccnt);
2817		if (!error && ((seccnt < 1) || (seccnt > NX_MAX_SEC_FLAVORS)))
2818			error = EINVAL;
2819		nfsmerr_if(error);
2820		nmp->nm_sec.count = seccnt;
2821		for (i=0; i < seccnt; i++) {
2822			xb_get_32(error, &xb, nmp->nm_sec.flavors[i]);
2823			/* Check for valid security flavor */
2824			switch (nmp->nm_sec.flavors[i]) {
2825			case RPCAUTH_NONE:
2826			case RPCAUTH_SYS:
2827			case RPCAUTH_KRB5:
2828			case RPCAUTH_KRB5I:
2829			case RPCAUTH_KRB5P:
2830				break;
2831			default:
2832				error = EINVAL;
2833			}
2834		}
2835		/* start with the first flavor */
2836		nmp->nm_auth = nmp->nm_sec.flavors[0];
2837	}
2838	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MAX_GROUP_LIST))
2839		xb_get_32(error, &xb, nmp->nm_numgrps);
2840	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOCKET_TYPE)) {
2841		char sotype[6];
2842
2843		xb_get_32(error, &xb, val);
2844		if (!error && ((val < 3) || (val > 5)))
2845			error = EINVAL;
2846		nfsmerr_if(error);
2847		error = xb_get_bytes(&xb, sotype, val, 0);
2848		nfsmerr_if(error);
2849		sotype[val] = '\0';
2850		if (!strcmp(sotype, "tcp")) {
2851			nmp->nm_sotype = SOCK_STREAM;
2852		} else if (!strcmp(sotype, "udp")) {
2853			nmp->nm_sotype = SOCK_DGRAM;
2854		} else if (!strcmp(sotype, "tcp4")) {
2855			nmp->nm_sotype = SOCK_STREAM;
2856			nmp->nm_sofamily = AF_INET;
2857		} else if (!strcmp(sotype, "udp4")) {
2858			nmp->nm_sotype = SOCK_DGRAM;
2859			nmp->nm_sofamily = AF_INET;
2860		} else if (!strcmp(sotype, "tcp6")) {
2861			nmp->nm_sotype = SOCK_STREAM;
2862			nmp->nm_sofamily = AF_INET6;
2863		} else if (!strcmp(sotype, "udp6")) {
2864			nmp->nm_sotype = SOCK_DGRAM;
2865			nmp->nm_sofamily = AF_INET6;
2866		} else if (!strcmp(sotype, "inet4")) {
2867			nmp->nm_sofamily = AF_INET;
2868		} else if (!strcmp(sotype, "inet6")) {
2869			nmp->nm_sofamily = AF_INET6;
2870		} else if (!strcmp(sotype, "inet")) {
2871			nmp->nm_sofamily = 0; /* ok */
2872		} else {
2873			error = EINVAL;
2874		}
2875		if (!error && (nmp->nm_vers >= NFS_VER4) && nmp->nm_sotype &&
2876		    (nmp->nm_sotype != SOCK_STREAM))
2877			error = EINVAL;		/* NFSv4 is only allowed over TCP. */
2878		nfsmerr_if(error);
2879	}
2880	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_PORT))
2881		xb_get_32(error, &xb, nmp->nm_nfsport);
2882	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MOUNT_PORT))
2883		xb_get_32(error, &xb, nmp->nm_mountport);
2884	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REQUEST_TIMEOUT)) {
2885		/* convert from time to 0.1s units */
2886		xb_get_32(error, &xb, nmp->nm_timeo);
2887		xb_get_32(error, &xb, val);
2888		nfsmerr_if(error);
2889		if (val >= 1000000000)
2890			error = EINVAL;
2891		nfsmerr_if(error);
2892		nmp->nm_timeo *= 10;
2893		nmp->nm_timeo += (val+100000000-1)/100000000;
2894		/* now convert to ticks */
2895		nmp->nm_timeo = (nmp->nm_timeo * NFS_HZ + 5) / 10;
2896	}
2897	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT)) {
2898		xb_get_32(error, &xb, val);
2899		if (!error && (val > 1))
2900			nmp->nm_retry = val;
2901	}
2902	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_DEAD_TIMEOUT)) {
2903		xb_get_32(error, &xb, nmp->nm_deadtimeout);
2904		xb_skip(error, &xb, XDRWORD);
2905	}
2906	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FH)) {
2907		nfsmerr_if(error);
2908		MALLOC(nmp->nm_fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK|M_ZERO);
2909		if (!nmp->nm_fh)
2910			error = ENOMEM;
2911		xb_get_32(error, &xb, nmp->nm_fh->fh_len);
2912		nfsmerr_if(error);
2913		error = xb_get_bytes(&xb, (char*)&nmp->nm_fh->fh_data[0], nmp->nm_fh->fh_len, 0);
2914	}
2915	nfsmerr_if(error);
2916	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FS_LOCATIONS)) {
2917		uint32_t loc, serv, addr, comp;
2918		struct nfs_fs_location *fsl;
2919		struct nfs_fs_server *fss;
2920		struct nfs_fs_path *fsp;
2921
2922		xb_get_32(error, &xb, nmp->nm_locations.nl_numlocs); /* fs location count */
2923		/* sanity check location count */
2924		if (!error && ((nmp->nm_locations.nl_numlocs < 1) || (nmp->nm_locations.nl_numlocs > 256)))
2925			error = EINVAL;
2926		nfsmerr_if(error);
2927		MALLOC(nmp->nm_locations.nl_locations, struct nfs_fs_location **, nmp->nm_locations.nl_numlocs * sizeof(struct nfs_fs_location*), M_TEMP, M_WAITOK|M_ZERO);
2928		if (!nmp->nm_locations.nl_locations)
2929			error = ENOMEM;
2930		for (loc = 0; loc < nmp->nm_locations.nl_numlocs; loc++) {
2931			nfsmerr_if(error);
2932			MALLOC(fsl, struct nfs_fs_location *, sizeof(struct nfs_fs_location), M_TEMP, M_WAITOK|M_ZERO);
2933			if (!fsl)
2934				error = ENOMEM;
2935			nmp->nm_locations.nl_locations[loc] = fsl;
2936			xb_get_32(error, &xb, fsl->nl_servcount); /* server count */
2937			/* sanity check server count */
2938			if (!error && ((fsl->nl_servcount < 1) || (fsl->nl_servcount > 256)))
2939				error = EINVAL;
2940			nfsmerr_if(error);
2941			MALLOC(fsl->nl_servers, struct nfs_fs_server **, fsl->nl_servcount * sizeof(struct nfs_fs_server*), M_TEMP, M_WAITOK|M_ZERO);
2942			if (!fsl->nl_servers)
2943				error = ENOMEM;
2944			for (serv = 0; serv < fsl->nl_servcount; serv++) {
2945				nfsmerr_if(error);
2946				MALLOC(fss, struct nfs_fs_server *, sizeof(struct nfs_fs_server), M_TEMP, M_WAITOK|M_ZERO);
2947				if (!fss)
2948					error = ENOMEM;
2949				fsl->nl_servers[serv] = fss;
2950				xb_get_32(error, &xb, val); /* server name length */
2951				/* sanity check server name length */
2952				if (!error && ((val < 1) || (val > MAXPATHLEN)))
2953					error = EINVAL;
2954				nfsmerr_if(error);
2955				MALLOC(fss->ns_name, char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
2956				if (!fss->ns_name)
2957					error = ENOMEM;
2958				nfsmerr_if(error);
2959				error = xb_get_bytes(&xb, fss->ns_name, val, 0); /* server name */
2960				xb_get_32(error, &xb, fss->ns_addrcount); /* address count */
2961				/* sanity check address count (OK to be zero) */
2962				if (!error && (fss->ns_addrcount > 256))
2963					error = EINVAL;
2964				nfsmerr_if(error);
2965				if (fss->ns_addrcount > 0) {
2966					MALLOC(fss->ns_addresses, char **, fss->ns_addrcount * sizeof(char *), M_TEMP, M_WAITOK|M_ZERO);
2967					if (!fss->ns_addresses)
2968						error = ENOMEM;
2969					for (addr = 0; addr < fss->ns_addrcount; addr++) {
2970						xb_get_32(error, &xb, val); /* address length */
2971						/* sanity check address length */
2972						if (!error && ((val < 1) || (val > 128)))
2973							error = EINVAL;
2974						nfsmerr_if(error);
2975						MALLOC(fss->ns_addresses[addr], char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
2976						if (!fss->ns_addresses[addr])
2977							error = ENOMEM;
2978						nfsmerr_if(error);
2979						error = xb_get_bytes(&xb, fss->ns_addresses[addr], val, 0); /* address */
2980					}
2981				}
2982				xb_get_32(error, &xb, val); /* server info length */
2983				xb_skip(error, &xb, val); /* skip server info */
2984			}
2985			/* get pathname */
2986			fsp = &fsl->nl_path;
2987			xb_get_32(error, &xb, fsp->np_compcount); /* component count */
2988			/* sanity check component count */
2989			if (!error && (fsp->np_compcount > MAXPATHLEN))
2990				error = EINVAL;
2991			nfsmerr_if(error);
2992			if (fsp->np_compcount) {
2993				MALLOC(fsp->np_components, char **, fsp->np_compcount * sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
2994				if (!fsp->np_components)
2995					error = ENOMEM;
2996			}
2997			for (comp = 0; comp < fsp->np_compcount; comp++) {
2998				xb_get_32(error, &xb, val); /* component length */
2999				/* sanity check component length */
3000				if (!error && (val == 0)) {
3001					/*
3002					 * Apparently some people think a path with zero components should
3003					 * be encoded with one zero-length component.  So, just ignore any
3004					 * zero length components.
3005					 */
3006					comp--;
3007					fsp->np_compcount--;
3008					if (fsp->np_compcount == 0) {
3009						FREE(fsp->np_components, M_TEMP);
3010						fsp->np_components = NULL;
3011					}
3012					continue;
3013				}
3014				if (!error && ((val < 1) || (val > MAXPATHLEN)))
3015					error = EINVAL;
3016				nfsmerr_if(error);
3017				MALLOC(fsp->np_components[comp], char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
3018				if (!fsp->np_components[comp])
3019					error = ENOMEM;
3020				nfsmerr_if(error);
3021				error = xb_get_bytes(&xb, fsp->np_components[comp], val, 0); /* component */
3022			}
3023			xb_get_32(error, &xb, val); /* fs location info length */
3024			xb_skip(error, &xb, val); /* skip fs location info */
3025		}
3026	}
3027	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFLAGS))
3028		xb_skip(error, &xb, XDRWORD);
3029	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFROM)) {
3030		xb_get_32(error, &xb, len);
3031		nfsmerr_if(error);
3032		val = len;
3033		if (val >= sizeof(vfs_statfs(mp)->f_mntfromname))
3034			val = sizeof(vfs_statfs(mp)->f_mntfromname) - 1;
3035		error = xb_get_bytes(&xb, vfs_statfs(mp)->f_mntfromname, val, 0);
3036		if ((len - val) > 0)
3037			xb_skip(error, &xb, len - val);
3038		nfsmerr_if(error);
3039		vfs_statfs(mp)->f_mntfromname[val] = '\0';
3040	}
3041	nfsmerr_if(error);
3042
3043	/*
3044	 * Sanity check/finalize settings.
3045	 */
3046
3047	if (nmp->nm_timeo < NFS_MINTIMEO)
3048		nmp->nm_timeo = NFS_MINTIMEO;
3049	else if (nmp->nm_timeo > NFS_MAXTIMEO)
3050		nmp->nm_timeo = NFS_MAXTIMEO;
3051	if (nmp->nm_retry > NFS_MAXREXMIT)
3052		nmp->nm_retry = NFS_MAXREXMIT;
3053
3054	if (nmp->nm_numgrps > NFS_MAXGRPS)
3055		nmp->nm_numgrps = NFS_MAXGRPS;
3056	if (nmp->nm_readahead > NFS_MAXRAHEAD)
3057		nmp->nm_readahead = NFS_MAXRAHEAD;
3058	if (nmp->nm_acregmin > nmp->nm_acregmax)
3059		nmp->nm_acregmin = nmp->nm_acregmax;
3060	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
3061		nmp->nm_acdirmin = nmp->nm_acdirmax;
3062
3063	/* need at least one fs location */
3064	if (nmp->nm_locations.nl_numlocs < 1)
3065		error = EINVAL;
3066	nfsmerr_if(error);
3067
3068	/* init mount's mntfromname to first location */
3069	if (!NM_OMATTR_GIVEN(nmp, MNTFROM))
3070		nfs_location_mntfromname(&nmp->nm_locations, firstloc,
3071			vfs_statfs(mp)->f_mntfromname, sizeof(vfs_statfs(mp)->f_mntfromname), 0);
3072
3073	/* Need to save the mounting credential for v4. */
3074	nmp->nm_mcred = vfs_context_ucred(ctx);
3075	if (IS_VALID_CRED(nmp->nm_mcred))
3076		kauth_cred_ref(nmp->nm_mcred);
3077
3078	/*
3079	 * If a reserved port is required, check for that privilege.
3080	 * (Note that mirror mounts are exempt because the privilege was
3081	 * already checked for the original mount.)
3082	 */
3083	if (NMFLAG(nmp, RESVPORT) && !vfs_iskernelmount(mp))
3084		error = priv_check_cred(nmp->nm_mcred, PRIV_NETINET_RESERVEDPORT, 0);
3085	nfsmerr_if(error);
3086
3087	/* do mount's initial socket connection */
3088	error = nfs_mount_connect(nmp);
3089	nfsmerr_if(error);
3090
3091	/* set up the version-specific function tables */
3092	if (nmp->nm_vers < NFS_VER4)
3093		nmp->nm_funcs = &nfs3_funcs;
3094	else
3095		nmp->nm_funcs = &nfs4_funcs;
3096
3097	/* sanity check settings now that version/connection is set */
3098	if (nmp->nm_vers == NFS_VER2)		/* ignore RDIRPLUS on NFSv2 */
3099		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_RDIRPLUS);
3100	if (nmp->nm_vers >= NFS_VER4) {
3101		if (NFS_BITMAP_ISSET(nmp->nm_flags, NFS_MFLAG_ACLONLY)) /* aclonly trumps noacl */
3102			NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NOACL);
3103		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_CALLUMNT);
3104		if (nmp->nm_lockmode != NFS_LOCK_MODE_ENABLED)
3105			error = EINVAL; /* disabled/local lock mode only allowed on v2/v3 */
3106	} else {
3107		/* ignore these if not v4 */
3108		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NOCALLBACK);
3109		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NONAMEDATTR);
3110		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NOACL);
3111		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_ACLONLY);
3112		if (IS_VALID_CRED(nmp->nm_mcred))
3113			kauth_cred_unref(&nmp->nm_mcred);
3114	}
3115	nfsmerr_if(error);
3116
3117	if (nmp->nm_sotype == SOCK_DGRAM) {
3118		/* I/O size defaults for UDP are different */
3119		if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
3120			nmp->nm_rsize = NFS_DGRAM_RSIZE;
3121		if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
3122			nmp->nm_wsize = NFS_DGRAM_WSIZE;
3123	}
3124
3125	/* round down I/O sizes to multiple of NFS_FABLKSIZE */
3126	nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
3127	if (nmp->nm_rsize <= 0)
3128		nmp->nm_rsize = NFS_FABLKSIZE;
3129	nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
3130	if (nmp->nm_wsize <= 0)
3131		nmp->nm_wsize = NFS_FABLKSIZE;
3132
3133	/* and limit I/O sizes to maximum allowed */
3134	maxio = (nmp->nm_vers == NFS_VER2) ? NFS_V2MAXDATA :
3135		(nmp->nm_sotype == SOCK_DGRAM) ? NFS_MAXDGRAMDATA : NFS_MAXDATA;
3136	if (maxio > NFS_MAXBSIZE)
3137		maxio = NFS_MAXBSIZE;
3138	if (nmp->nm_rsize > maxio)
3139		nmp->nm_rsize = maxio;
3140	if (nmp->nm_wsize > maxio)
3141		nmp->nm_wsize = maxio;
3142
3143	if (nmp->nm_readdirsize > maxio)
3144		nmp->nm_readdirsize = maxio;
3145	if (nmp->nm_readdirsize > nmp->nm_rsize)
3146		nmp->nm_readdirsize = nmp->nm_rsize;
3147
3148	/* Set up the sockets and related info */
3149	if (nmp->nm_sotype == SOCK_DGRAM)
3150		TAILQ_INIT(&nmp->nm_cwndq);
3151
3152	/*
3153	 * Get the root node/attributes from the NFS server and
3154	 * do any basic, version-specific setup.
3155	 */
3156	error = nmp->nm_funcs->nf_mount(nmp, ctx, &np);
3157	nfsmerr_if(error);
3158
3159	/*
3160	 * A reference count is needed on the node representing the
3161	 * remote root.  If this object is not persistent, then backward
3162	 * traversals of the mount point (i.e. "..") will not work if
3163	 * the node gets flushed out of the cache.
3164	 */
3165	nmp->nm_dnp = np;
3166	*vpp = NFSTOV(np);
3167	/* get usecount and drop iocount */
3168	error = vnode_ref(*vpp);
3169	vnode_put(*vpp);
3170	if (error) {
3171		vnode_recycle(*vpp);
3172		goto nfsmerr;
3173	}
3174
3175	/*
3176	 * Do statfs to ensure static info gets set to reasonable values.
3177	 */
3178	if ((error = nmp->nm_funcs->nf_update_statfs(nmp, ctx))) {
3179		int error2 = vnode_getwithref(*vpp);
3180		vnode_rele(*vpp);
3181		if (!error2)
3182			vnode_put(*vpp);
3183		vnode_recycle(*vpp);
3184		goto nfsmerr;
3185	}
3186	sbp = vfs_statfs(mp);
3187	sbp->f_bsize = nmp->nm_fsattr.nfsa_bsize;
3188	sbp->f_blocks = nmp->nm_fsattr.nfsa_space_total / sbp->f_bsize;
3189	sbp->f_bfree = nmp->nm_fsattr.nfsa_space_free / sbp->f_bsize;
3190	sbp->f_bavail = nmp->nm_fsattr.nfsa_space_avail / sbp->f_bsize;
3191	sbp->f_bused = (nmp->nm_fsattr.nfsa_space_total / sbp->f_bsize) -
3192			(nmp->nm_fsattr.nfsa_space_free / sbp->f_bsize);
3193	sbp->f_files = nmp->nm_fsattr.nfsa_files_total;
3194	sbp->f_ffree = nmp->nm_fsattr.nfsa_files_free;
3195	sbp->f_iosize = nfs_iosize;
3196
3197	/*
3198	 * Calculate the size used for I/O buffers.  Use the larger
3199	 * of the two sizes to minimise NFS requests but make sure
3200	 * that it is at least one VM page to avoid wasting buffer
3201	 * space and to allow easy mmapping of I/O buffers.
3202	 * The read/write RPC calls handle the splitting up of
3203	 * buffers into multiple requests if the buffer size is
3204	 * larger than the I/O size.
3205	 */
3206	iosize = max(nmp->nm_rsize, nmp->nm_wsize);
3207	if (iosize < PAGE_SIZE)
3208		iosize = PAGE_SIZE;
3209	nmp->nm_biosize = trunc_page_32(iosize);
3210
3211	/* For NFSv3 and greater, there is a (relatively) reliable ACCESS call. */
3212	if (nmp->nm_vers > NFS_VER2)
3213		vfs_setauthopaqueaccess(mp);
3214
3215	switch (nmp->nm_lockmode) {
3216	case NFS_LOCK_MODE_DISABLED:
3217		break;
3218	case NFS_LOCK_MODE_LOCAL:
3219		vfs_setlocklocal(nmp->nm_mountp);
3220		break;
3221	case NFS_LOCK_MODE_ENABLED:
3222	default:
3223		if (nmp->nm_vers <= NFS_VER3)
3224			nfs_lockd_mount_register(nmp);
3225		break;
3226	}
3227
3228	/* success! */
3229	lck_mtx_lock(&nmp->nm_lock);
3230	nmp->nm_state |= NFSSTA_MOUNTED;
3231	lck_mtx_unlock(&nmp->nm_lock);
3232	return (0);
3233nfsmerr:
3234	nfs_mount_cleanup(nmp);
3235	return (error);
3236}
3237
3238#if CONFIG_TRIGGERS
3239
3240/*
3241 * We've detected a file system boundary on the server and
3242 * need to mount a new file system so that our file systems
3243 * MIRROR the file systems on the server.
3244 *
3245 * Build the mount arguments for the new mount and call kernel_mount().
3246 */
3247int
3248nfs_mirror_mount_domount(vnode_t dvp, vnode_t vp, vfs_context_t ctx)
3249{
3250	nfsnode_t np = VTONFS(vp);
3251	nfsnode_t dnp = VTONFS(dvp);
3252	struct nfsmount *nmp = NFSTONMP(np);
3253	char fstype[MFSTYPENAMELEN], *mntfromname = NULL, *path = NULL, *relpath, *p, *cp;
3254	int error = 0, pathbuflen = MAXPATHLEN, i, mntflags = 0, referral, skipcopy = 0;
3255	size_t nlen;
3256	struct xdrbuf xb, xbnew;
3257	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
3258	uint32_t newmattrs[NFS_MATTR_BITMAP_LEN];
3259	uint32_t newmflags[NFS_MFLAG_BITMAP_LEN];
3260	uint32_t newmflags_mask[NFS_MFLAG_BITMAP_LEN];
3261	uint32_t argslength = 0, val, count, mlen, mlen2, rlen, relpathcomps;
3262	uint32_t argslength_offset, attrslength_offset, end_offset;
3263	uint32_t numlocs, loc, numserv, serv, numaddr, addr, numcomp, comp;
3264	char buf[XDRWORD];
3265	struct nfs_fs_locations nfsls;
3266
3267	referral = (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL);
3268	if (referral)
3269		bzero(&nfsls, sizeof(nfsls));
3270
3271	xb_init(&xbnew, 0);
3272
3273	if (!nmp || (nmp->nm_state & NFSSTA_FORCE))
3274		return (ENXIO);
3275
3276	/* allocate a couple path buffers we need */
3277	MALLOC_ZONE(mntfromname, char *, pathbuflen, M_NAMEI, M_WAITOK);
3278	if (!mntfromname) {
3279		error = ENOMEM;
3280		goto nfsmerr;
3281	}
3282	MALLOC_ZONE(path, char *, pathbuflen, M_NAMEI, M_WAITOK);
3283	if (!path) {
3284		error = ENOMEM;
3285		goto nfsmerr;
3286	}
3287
3288	/* get the path for the directory being mounted on */
3289	error = vn_getpath(vp, path, &pathbuflen);
3290	if (error) {
3291		error = ENOMEM;
3292		goto nfsmerr;
3293	}
3294
3295	/*
3296	 * Set up the mntfromname for the new mount based on the
3297	 * current mount's mntfromname and the directory's path
3298	 * relative to the current mount's mntonname.
3299	 * Set up relpath to point at the relative path on the current mount.
3300	 * Also, count the number of components in relpath.
3301	 * We'll be adding those to each fs location path in the new args.
3302	 */
3303	nlen = strlcpy(mntfromname, vfs_statfs(nmp->nm_mountp)->f_mntfromname, MAXPATHLEN);
3304	if ((nlen > 0) && (mntfromname[nlen-1] == '/')) { /* avoid double '/' in new name */
3305		mntfromname[nlen-1] = '\0';
3306		nlen--;
3307	}
3308	relpath = mntfromname + nlen;
3309	nlen = strlcat(mntfromname, path + strlen(vfs_statfs(nmp->nm_mountp)->f_mntonname), MAXPATHLEN);
3310	if (nlen >= MAXPATHLEN) {
3311		error = ENAMETOOLONG;
3312		goto nfsmerr;
3313	}
3314	/* count the number of components in relpath */
3315	p = relpath;
3316	while (*p && (*p == '/'))
3317		p++;
3318	relpathcomps = 0;
3319	while (*p) {
3320		relpathcomps++;
3321		while (*p && (*p != '/'))
3322			p++;
3323		while (*p && (*p == '/'))
3324			p++;
3325	}
3326
3327	/* grab a copy of the file system type */
3328	vfs_name(vnode_mount(vp), fstype);
3329
3330	/* for referrals, fetch the fs locations */
3331	if (referral) {
3332		const char *vname = vnode_getname(NFSTOV(np));
3333		if (!vname) {
3334			error = ENOENT;
3335		} else {
3336			error = nfs4_get_fs_locations(nmp, dnp, NULL, 0, vname, ctx, &nfsls);
3337			vnode_putname(vname);
3338			if (!error && (nfsls.nl_numlocs < 1))
3339				error = ENOENT;
3340		}
3341		nfsmerr_if(error);
3342	}
3343
3344	/* set up NFS mount args based on current mount args */
3345
3346#define xb_copy_32(E, XBSRC, XBDST, V) \
3347	do { \
3348		if (E) break; \
3349		xb_get_32((E), (XBSRC), (V)); \
3350		if (skipcopy) break; \
3351		xb_add_32((E), (XBDST), (V)); \
3352	} while (0)
3353#define xb_copy_opaque(E, XBSRC, XBDST) \
3354	do { \
3355		uint32_t __count, __val; \
3356		xb_copy_32((E), (XBSRC), (XBDST), __count); \
3357		if (E) break; \
3358		__count = nfsm_rndup(__count); \
3359		__count /= XDRWORD; \
3360		while (__count-- > 0) \
3361			xb_copy_32((E), (XBSRC), (XBDST), __val); \
3362	} while (0)
3363
3364	xb_init_buffer(&xb, nmp->nm_args, 2*XDRWORD);
3365	xb_get_32(error, &xb, val); /* version */
3366	xb_get_32(error, &xb, argslength); /* args length */
3367	xb_init_buffer(&xb, nmp->nm_args, argslength);
3368
3369	xb_init_buffer(&xbnew, NULL, 0);
3370	xb_copy_32(error, &xb, &xbnew, val); /* version */
3371	argslength_offset = xb_offset(&xbnew);
3372	xb_copy_32(error, &xb, &xbnew, val); /* args length */
3373	xb_copy_32(error, &xb, &xbnew, val); /* XDR args version */
3374	count = NFS_MATTR_BITMAP_LEN;
3375	xb_get_bitmap(error, &xb, mattrs, count); /* mount attribute bitmap */
3376	nfsmerr_if(error);
3377	for (i = 0; i < NFS_MATTR_BITMAP_LEN; i++)
3378		newmattrs[i] = mattrs[i];
3379	if (referral)
3380		NFS_BITMAP_SET(newmattrs, NFS_MATTR_FS_LOCATIONS);
3381	else
3382		NFS_BITMAP_SET(newmattrs, NFS_MATTR_FH);
3383	NFS_BITMAP_SET(newmattrs, NFS_MATTR_FLAGS);
3384	NFS_BITMAP_SET(newmattrs, NFS_MATTR_MNTFLAGS);
3385	NFS_BITMAP_CLR(newmattrs, NFS_MATTR_MNTFROM);
3386	xb_add_bitmap(error, &xbnew, newmattrs, NFS_MATTR_BITMAP_LEN);
3387	attrslength_offset = xb_offset(&xbnew);
3388	xb_copy_32(error, &xb, &xbnew, val); /* attrs length */
3389	NFS_BITMAP_ZERO(newmflags_mask, NFS_MFLAG_BITMAP_LEN);
3390	NFS_BITMAP_ZERO(newmflags, NFS_MFLAG_BITMAP_LEN);
3391	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FLAGS)) {
3392		count = NFS_MFLAG_BITMAP_LEN;
3393		xb_get_bitmap(error, &xb, newmflags_mask, count); /* mount flag mask bitmap */
3394		count = NFS_MFLAG_BITMAP_LEN;
3395		xb_get_bitmap(error, &xb, newmflags, count); /* mount flag bitmap */
3396	}
3397	NFS_BITMAP_SET(newmflags_mask, NFS_MFLAG_EPHEMERAL);
3398	NFS_BITMAP_SET(newmflags, NFS_MFLAG_EPHEMERAL);
3399	xb_add_bitmap(error, &xbnew, newmflags_mask, NFS_MFLAG_BITMAP_LEN);
3400	xb_add_bitmap(error, &xbnew, newmflags, NFS_MFLAG_BITMAP_LEN);
3401	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION))
3402		xb_copy_32(error, &xb, &xbnew, val);
3403	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION))
3404		xb_copy_32(error, &xb, &xbnew, val);
3405	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
3406		xb_copy_32(error, &xb, &xbnew, val);
3407	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
3408		xb_copy_32(error, &xb, &xbnew, val);
3409	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READDIR_SIZE))
3410		xb_copy_32(error, &xb, &xbnew, val);
3411	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READAHEAD))
3412		xb_copy_32(error, &xb, &xbnew, val);
3413	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN)) {
3414		xb_copy_32(error, &xb, &xbnew, val);
3415		xb_copy_32(error, &xb, &xbnew, val);
3416	}
3417	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX)) {
3418		xb_copy_32(error, &xb, &xbnew, val);
3419		xb_copy_32(error, &xb, &xbnew, val);
3420	}
3421	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN)) {
3422		xb_copy_32(error, &xb, &xbnew, val);
3423		xb_copy_32(error, &xb, &xbnew, val);
3424	}
3425	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX)) {
3426		xb_copy_32(error, &xb, &xbnew, val);
3427		xb_copy_32(error, &xb, &xbnew, val);
3428	}
3429	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_LOCK_MODE))
3430		xb_copy_32(error, &xb, &xbnew, val);
3431	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SECURITY)) {
3432		xb_copy_32(error, &xb, &xbnew, count);
3433		while (!error && (count-- > 0))
3434			xb_copy_32(error, &xb, &xbnew, val);
3435	}
3436	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MAX_GROUP_LIST))
3437		xb_copy_32(error, &xb, &xbnew, val);
3438	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOCKET_TYPE))
3439		xb_copy_opaque(error, &xb, &xbnew);
3440	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_PORT))
3441		xb_copy_32(error, &xb, &xbnew, val);
3442	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MOUNT_PORT))
3443		xb_copy_32(error, &xb, &xbnew, val);
3444	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REQUEST_TIMEOUT)) {
3445		xb_copy_32(error, &xb, &xbnew, val);
3446		xb_copy_32(error, &xb, &xbnew, val);
3447	}
3448	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT))
3449		xb_copy_32(error, &xb, &xbnew, val);
3450	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_DEAD_TIMEOUT)) {
3451		xb_copy_32(error, &xb, &xbnew, val);
3452		xb_copy_32(error, &xb, &xbnew, val);
3453	}
3454	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FH)) {
3455		xb_get_32(error, &xb, count);
3456		xb_skip(error, &xb, count);
3457	}
3458	if (!referral) {
3459		/* set the initial file handle to the directory's file handle */
3460		xb_add_fh(error, &xbnew, np->n_fhp, np->n_fhsize);
3461	}
3462	/* copy/extend/skip fs locations */
3463	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FS_LOCATIONS)) {
3464		numlocs = numserv = numaddr = numcomp = 0;
3465		if (referral) /* don't copy the fs locations for a referral */
3466			skipcopy = 1;
3467		xb_copy_32(error, &xb, &xbnew, numlocs); /* location count */
3468		for (loc = 0; !error && (loc < numlocs); loc++) {
3469			xb_copy_32(error, &xb, &xbnew, numserv); /* server count */
3470			for (serv = 0; !error && (serv < numserv); serv++) {
3471				xb_copy_opaque(error, &xb, &xbnew); /* server name */
3472				xb_copy_32(error, &xb, &xbnew, numaddr); /* address count */
3473				for (addr = 0; !error && (addr < numaddr); addr++)
3474					xb_copy_opaque(error, &xb, &xbnew); /* address */
3475				xb_copy_opaque(error, &xb, &xbnew); /* server info */
3476			}
3477			/* pathname */
3478			xb_get_32(error, &xb, numcomp); /* component count */
3479			if (!skipcopy)
3480				xb_add_32(error, &xbnew, numcomp+relpathcomps); /* new component count */
3481			for (comp = 0; !error && (comp < numcomp); comp++)
3482				xb_copy_opaque(error, &xb, &xbnew); /* component */
3483			/* add additional components */
3484			for (comp = 0; !skipcopy && !error && (comp < relpathcomps); comp++) {
3485				p = relpath;
3486				while (*p && (*p == '/'))
3487					p++;
3488				while (*p && !error) {
3489					cp = p;
3490					while (*p && (*p != '/'))
3491						p++;
3492					xb_add_string(error, &xbnew, cp, (p - cp)); /* component */
3493					while (*p && (*p == '/'))
3494						p++;
3495				}
3496			}
3497			xb_copy_opaque(error, &xb, &xbnew); /* fs location info */
3498		}
3499		if (referral)
3500			skipcopy = 0;
3501	}
3502	if (referral) {
3503		/* add referral's fs locations */
3504		xb_add_32(error, &xbnew, nfsls.nl_numlocs);			/* FS_LOCATIONS */
3505		for (loc = 0; !error && (loc < nfsls.nl_numlocs); loc++) {
3506			xb_add_32(error, &xbnew, nfsls.nl_locations[loc]->nl_servcount);
3507			for (serv = 0; !error && (serv < nfsls.nl_locations[loc]->nl_servcount); serv++) {
3508				xb_add_string(error, &xbnew, nfsls.nl_locations[loc]->nl_servers[serv]->ns_name,
3509					strlen(nfsls.nl_locations[loc]->nl_servers[serv]->ns_name));
3510				xb_add_32(error, &xbnew, nfsls.nl_locations[loc]->nl_servers[serv]->ns_addrcount);
3511				for (addr = 0; !error && (addr < nfsls.nl_locations[loc]->nl_servers[serv]->ns_addrcount); addr++)
3512					xb_add_string(error, &xbnew, nfsls.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr],
3513						strlen(nfsls.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr]));
3514				xb_add_32(error, &xbnew, 0); /* empty server info */
3515			}
3516			xb_add_32(error, &xbnew, nfsls.nl_locations[loc]->nl_path.np_compcount);
3517			for (comp = 0; !error && (comp < nfsls.nl_locations[loc]->nl_path.np_compcount); comp++)
3518				xb_add_string(error, &xbnew, nfsls.nl_locations[loc]->nl_path.np_components[comp],
3519					strlen(nfsls.nl_locations[loc]->nl_path.np_components[comp]));
3520			xb_add_32(error, &xbnew, 0); /* empty fs location info */
3521		}
3522	}
3523	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFLAGS))
3524		xb_get_32(error, &xb, mntflags);
3525	/*
3526	 * We add the following mount flags to the ones for the mounted-on mount:
3527	 * MNT_DONTBROWSE - to keep the mount from showing up as a separate volume
3528	 * MNT_AUTOMOUNTED - to keep DiskArb from retriggering the mount after
3529	 *                   an unmount (looking for /.autodiskmounted)
3530	 */
3531	mntflags |= (MNT_AUTOMOUNTED | MNT_DONTBROWSE);
3532	xb_add_32(error, &xbnew, mntflags);
3533	if (!referral && NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFROM)) {
3534		/* copy mntfrom string and add relpath */
3535		rlen = strlen(relpath);
3536		xb_get_32(error, &xb, mlen);
3537		nfsmerr_if(error);
3538		mlen2 = mlen + ((relpath[0] != '/') ? 1 : 0) + rlen;
3539		xb_add_32(error, &xbnew, mlen2);
3540		count = mlen/XDRWORD;
3541		/* copy the original string */
3542		while (count-- > 0)
3543			xb_copy_32(error, &xb, &xbnew, val);
3544		if (!error && (mlen % XDRWORD)) {
3545			error = xb_get_bytes(&xb, buf, mlen%XDRWORD, 0);
3546			if (!error)
3547				error = xb_add_bytes(&xbnew, buf, mlen%XDRWORD, 1);
3548		}
3549		/* insert a '/' if the relative path doesn't start with one */
3550		if (!error && (relpath[0] != '/')) {
3551			buf[0] = '/';
3552			error = xb_add_bytes(&xbnew, buf, 1, 1);
3553		}
3554		/* add the additional relative path */
3555		if (!error)
3556			error = xb_add_bytes(&xbnew, relpath, rlen, 1);
3557		/* make sure the resulting string has the right number of pad bytes */
3558		if (!error && (mlen2 != nfsm_rndup(mlen2))) {
3559			bzero(buf, sizeof(buf));
3560			count = nfsm_rndup(mlen2) - mlen2;
3561			error = xb_add_bytes(&xbnew, buf, count, 1);
3562		}
3563	}
3564	xb_build_done(error, &xbnew);
3565
3566	/* update opaque counts */
3567	end_offset = xb_offset(&xbnew);
3568	if (!error) {
3569		error = xb_seek(&xbnew, argslength_offset);
3570		argslength = end_offset - argslength_offset + XDRWORD/*version*/;
3571		xb_add_32(error, &xbnew, argslength);
3572	}
3573	if (!error) {
3574		error = xb_seek(&xbnew, attrslength_offset);
3575		xb_add_32(error, &xbnew, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
3576	}
3577	nfsmerr_if(error);
3578
3579	/*
3580	 * For kernel_mount() call, use the existing mount flags (instead of the
3581	 * original flags) because flags like MNT_NOSUID and MNT_NODEV may have
3582	 * been silently enforced.
3583	 */
3584	mntflags = vnode_vfsvisflags(vp);
3585	mntflags |= (MNT_AUTOMOUNTED | MNT_DONTBROWSE);
3586
3587	/* do the mount */
3588	error = kernel_mount(fstype, dvp, vp, path, xb_buffer_base(&xbnew), argslength,
3589			mntflags, KERNEL_MOUNT_PERMIT_UNMOUNT | KERNEL_MOUNT_NOAUTH, ctx);
3590
3591nfsmerr:
3592	if (error)
3593		printf("nfs: mirror mount of %s on %s failed (%d)\n",
3594			mntfromname, path, error);
3595	/* clean up */
3596	xb_cleanup(&xbnew);
3597	if (referral)
3598		nfs_fs_locations_cleanup(&nfsls);
3599	if (path)
3600		FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
3601	if (mntfromname)
3602		FREE_ZONE(mntfromname, MAXPATHLEN, M_NAMEI);
3603	if (!error)
3604		nfs_ephemeral_mount_harvester_start();
3605	return (error);
3606}
3607
3608/*
3609 * trigger vnode functions
3610 */
3611
3612resolver_result_t
3613nfs_mirror_mount_trigger_resolve(
3614	vnode_t vp,
3615	const struct componentname *cnp,
3616	enum path_operation pop,
3617	__unused int flags,
3618	__unused void *data,
3619	vfs_context_t ctx)
3620{
3621	nfsnode_t np = VTONFS(vp);
3622	vnode_t pvp = NULLVP;
3623	int error = 0;
3624	resolver_result_t result;
3625
3626	/*
3627	 * We have a trigger node that doesn't have anything mounted on it yet.
3628	 * We'll do the mount if either:
3629	 * (a) this isn't the last component of the path OR
3630	 * (b) this is an op that looks like it should trigger the mount.
3631	 */
3632	if (cnp->cn_flags & ISLASTCN) {
3633		switch (pop) {
3634		case OP_MOUNT:
3635		case OP_UNMOUNT:
3636		case OP_STATFS:
3637		case OP_LINK:
3638		case OP_UNLINK:
3639		case OP_RENAME:
3640		case OP_MKNOD:
3641		case OP_MKFIFO:
3642		case OP_SYMLINK:
3643		case OP_ACCESS:
3644		case OP_GETATTR:
3645		case OP_MKDIR:
3646		case OP_RMDIR:
3647		case OP_REVOKE:
3648		case OP_GETXATTR:
3649		case OP_LISTXATTR:
3650			/* don't perform the mount for these operations */
3651			result = vfs_resolver_result(np->n_trigseq, RESOLVER_NOCHANGE, 0);
3652#ifdef NFS_TRIGGER_DEBUG
3653			NP(np, "nfs trigger RESOLVE: no change, last %d nameiop %d, seq %d",
3654				(cnp->cn_flags & ISLASTCN) ? 1 : 0, cnp->cn_nameiop, np->n_trigseq);
3655#endif
3656			return (result);
3657		case OP_OPEN:
3658		case OP_CHDIR:
3659		case OP_CHROOT:
3660		case OP_TRUNCATE:
3661		case OP_COPYFILE:
3662		case OP_PATHCONF:
3663		case OP_READLINK:
3664		case OP_SETATTR:
3665		case OP_EXCHANGEDATA:
3666		case OP_SEARCHFS:
3667		case OP_FSCTL:
3668		case OP_SETXATTR:
3669		case OP_REMOVEXATTR:
3670		default:
3671			/* go ahead and do the mount */
3672			break;
3673		}
3674	}
3675
3676	if (vnode_mountedhere(vp) != NULL) {
3677		/*
3678		 * Um... there's already something mounted.
3679		 * Been there.  Done that.  Let's just say it succeeded.
3680		 */
3681		error = 0;
3682		goto skipmount;
3683	}
3684
3685	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx)))) {
3686		result = vfs_resolver_result(np->n_trigseq, RESOLVER_ERROR, error);
3687#ifdef NFS_TRIGGER_DEBUG
3688		NP(np, "nfs trigger RESOLVE: busy error %d, last %d nameiop %d, seq %d",
3689			error, (cnp->cn_flags & ISLASTCN) ? 1 : 0, cnp->cn_nameiop, np->n_trigseq);
3690#endif
3691		return (result);
3692	}
3693
3694	pvp = vnode_getparent(vp);
3695	if (pvp == NULLVP)
3696		error = EINVAL;
3697	if (!error)
3698		error = nfs_mirror_mount_domount(pvp, vp, ctx);
3699skipmount:
3700	if (!error)
3701		np->n_trigseq++;
3702	result = vfs_resolver_result(np->n_trigseq, error ? RESOLVER_ERROR : RESOLVER_RESOLVED, error);
3703#ifdef NFS_TRIGGER_DEBUG
3704	NP(np, "nfs trigger RESOLVE: %s %d, last %d nameiop %d, seq %d",
3705		error ? "error" : "resolved", error,
3706		(cnp->cn_flags & ISLASTCN) ? 1 : 0, cnp->cn_nameiop, np->n_trigseq);
3707#endif
3708
3709	if (pvp != NULLVP)
3710		vnode_put(pvp);
3711	nfs_node_clear_busy(np);
3712	return (result);
3713}
3714
3715resolver_result_t
3716nfs_mirror_mount_trigger_unresolve(
3717	vnode_t vp,
3718	int flags,
3719	__unused void *data,
3720	vfs_context_t ctx)
3721{
3722	nfsnode_t np = VTONFS(vp);
3723	mount_t mp;
3724	int error;
3725	resolver_result_t result;
3726
3727	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx)))) {
3728		result = vfs_resolver_result(np->n_trigseq, RESOLVER_ERROR, error);
3729#ifdef NFS_TRIGGER_DEBUG
3730		NP(np, "nfs trigger UNRESOLVE: busy error %d, seq %d", error, np->n_trigseq);
3731#endif
3732		return (result);
3733	}
3734
3735	mp = vnode_mountedhere(vp);
3736	if (!mp)
3737		error = EINVAL;
3738	if (!error)
3739		error = vfs_unmountbyfsid(&(vfs_statfs(mp)->f_fsid), flags, ctx);
3740	if (!error)
3741		np->n_trigseq++;
3742	result = vfs_resolver_result(np->n_trigseq, error ? RESOLVER_ERROR : RESOLVER_UNRESOLVED, error);
3743#ifdef NFS_TRIGGER_DEBUG
3744	NP(np, "nfs trigger UNRESOLVE: %s %d, seq %d",
3745		error ? "error" : "unresolved", error, np->n_trigseq);
3746#endif
3747	nfs_node_clear_busy(np);
3748	return (result);
3749}
3750
3751resolver_result_t
3752nfs_mirror_mount_trigger_rearm(
3753	vnode_t vp,
3754	__unused int flags,
3755	__unused void *data,
3756	vfs_context_t ctx)
3757{
3758	nfsnode_t np = VTONFS(vp);
3759	int error;
3760	resolver_result_t result;
3761
3762	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx)))) {
3763		result = vfs_resolver_result(np->n_trigseq, RESOLVER_ERROR, error);
3764#ifdef NFS_TRIGGER_DEBUG
3765		NP(np, "nfs trigger REARM: busy error %d, seq %d", error, np->n_trigseq);
3766#endif
3767		return (result);
3768	}
3769
3770	np->n_trigseq++;
3771	result = vfs_resolver_result(np->n_trigseq,
3772			vnode_mountedhere(vp) ? RESOLVER_RESOLVED : RESOLVER_UNRESOLVED, 0);
3773#ifdef NFS_TRIGGER_DEBUG
3774	NP(np, "nfs trigger REARM: %s, seq %d",
3775		vnode_mountedhere(vp) ? "resolved" : "unresolved", np->n_trigseq);
3776#endif
3777	nfs_node_clear_busy(np);
3778	return (result);
3779}
3780
3781/*
3782 * Periodically attempt to unmount ephemeral (mirror) mounts in an attempt to limit
3783 * the number of unused mounts.
3784 */
3785
3786#define NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL	120	/* how often the harvester runs */
3787struct nfs_ephemeral_mount_harvester_info {
3788	fsid_t		fsid;		/* FSID that we need to try to unmount */
3789	uint32_t	mountcount;	/* count of ephemeral mounts seen in scan */
3790 };
3791/* various globals for the harvester */
3792static thread_call_t nfs_ephemeral_mount_harvester_timer = NULL;
3793static int nfs_ephemeral_mount_harvester_on = 0;
3794
3795kern_return_t thread_terminate(thread_t);
3796
3797static int
3798nfs_ephemeral_mount_harvester_callback(mount_t mp, void *arg)
3799{
3800	struct nfs_ephemeral_mount_harvester_info *hinfo = arg;
3801	struct nfsmount *nmp;
3802	struct timeval now;
3803
3804	if (strcmp(mp->mnt_vfsstat.f_fstypename, "nfs"))
3805		return (VFS_RETURNED);
3806	nmp = VFSTONFS(mp);
3807	if (!nmp || !NMFLAG(nmp, EPHEMERAL))
3808		return (VFS_RETURNED);
3809	hinfo->mountcount++;
3810
3811	/* avoid unmounting mounts that have been triggered within the last harvest interval */
3812	microtime(&now);
3813	if ((nmp->nm_mounttime >> 32) > ((uint32_t)now.tv_sec - NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL))
3814		return (VFS_RETURNED);
3815
3816	if (hinfo->fsid.val[0] || hinfo->fsid.val[1]) {
3817		/* attempt to unmount previously-found ephemeral mount */
3818		vfs_unmountbyfsid(&hinfo->fsid, 0, vfs_context_kernel());
3819		hinfo->fsid.val[0] = hinfo->fsid.val[1] = 0;
3820	}
3821
3822	/*
3823	 * We can't call unmount here since we hold a mount iter ref
3824	 * on mp so save its fsid for the next call iteration to unmount.
3825	 */
3826	hinfo->fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0];
3827	hinfo->fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1];
3828
3829	return (VFS_RETURNED);
3830}
3831
3832/*
3833 * Spawn a thread to do the ephemeral mount harvesting.
3834 */
3835static void
3836nfs_ephemeral_mount_harvester_timer_func(void)
3837{
3838	thread_t thd;
3839
3840	if (kernel_thread_start(nfs_ephemeral_mount_harvester, NULL, &thd) == KERN_SUCCESS)
3841		thread_deallocate(thd);
3842}
3843
3844/*
3845 * Iterate all mounts looking for NFS ephemeral mounts to try to unmount.
3846 */
3847void
3848nfs_ephemeral_mount_harvester(__unused void *arg, __unused wait_result_t wr)
3849{
3850	struct nfs_ephemeral_mount_harvester_info hinfo;
3851	uint64_t deadline;
3852
3853	hinfo.mountcount = 0;
3854	hinfo.fsid.val[0] = hinfo.fsid.val[1] = 0;
3855	vfs_iterate(VFS_ITERATE_TAIL_FIRST, nfs_ephemeral_mount_harvester_callback, &hinfo);
3856	if (hinfo.fsid.val[0] || hinfo.fsid.val[1]) {
3857		/* attempt to unmount last found ephemeral mount */
3858		vfs_unmountbyfsid(&hinfo.fsid, 0, vfs_context_kernel());
3859	}
3860
3861	lck_mtx_lock(nfs_global_mutex);
3862	if (!hinfo.mountcount) {
3863		/* no more ephemeral mounts - don't need timer */
3864		nfs_ephemeral_mount_harvester_on = 0;
3865	} else {
3866		/* re-arm the timer */
3867		clock_interval_to_deadline(NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL, NSEC_PER_SEC, &deadline);
3868		thread_call_enter_delayed(nfs_ephemeral_mount_harvester_timer, deadline);
3869		nfs_ephemeral_mount_harvester_on = 1;
3870	}
3871	lck_mtx_unlock(nfs_global_mutex);
3872
3873	/* thread done */
3874	thread_terminate(current_thread());
3875}
3876
3877/*
3878 * Make sure the NFS ephemeral mount harvester timer is running.
3879 */
3880void
3881nfs_ephemeral_mount_harvester_start(void)
3882{
3883	uint64_t deadline;
3884
3885	lck_mtx_lock(nfs_global_mutex);
3886	if (nfs_ephemeral_mount_harvester_on) {
3887		lck_mtx_unlock(nfs_global_mutex);
3888		return;
3889	}
3890	if (nfs_ephemeral_mount_harvester_timer == NULL)
3891		nfs_ephemeral_mount_harvester_timer = thread_call_allocate((thread_call_func_t)nfs_ephemeral_mount_harvester_timer_func, NULL);
3892	clock_interval_to_deadline(NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL, NSEC_PER_SEC, &deadline);
3893	thread_call_enter_delayed(nfs_ephemeral_mount_harvester_timer, deadline);
3894	nfs_ephemeral_mount_harvester_on = 1;
3895	lck_mtx_unlock(nfs_global_mutex);
3896}
3897
3898#endif
3899
3900/*
3901 * Send a MOUNT protocol MOUNT request to the server to get the initial file handle (and security).
3902 */
3903int
3904nfs3_mount_rpc(struct nfsmount *nmp, struct sockaddr *sa, int sotype, int nfsvers, char *path, vfs_context_t ctx, int timeo, fhandle_t *fh, struct nfs_sec *sec)
3905{
3906	int error = 0, slen, mntproto;
3907	thread_t thd = vfs_context_thread(ctx);
3908	kauth_cred_t cred = vfs_context_ucred(ctx);
3909	uint64_t xid = 0;
3910	struct nfsm_chain nmreq, nmrep;
3911	mbuf_t mreq;
3912	uint32_t mntvers, mntport, val;
3913	struct sockaddr_storage ss;
3914	struct sockaddr *saddr = (struct sockaddr*)&ss;
3915
3916	nfsm_chain_null(&nmreq);
3917	nfsm_chain_null(&nmrep);
3918
3919	mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
3920	mntproto = (NM_OMFLAG(nmp, MNTUDP) || (sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
3921	sec->count = 0;
3922
3923	bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
3924	if (saddr->sa_family == AF_INET) {
3925		if (nmp->nm_mountport)
3926			((struct sockaddr_in*)saddr)->sin_port = htons(nmp->nm_mountport);
3927		mntport = ntohs(((struct sockaddr_in*)saddr)->sin_port);
3928	} else {
3929		if (nmp->nm_mountport)
3930			((struct sockaddr_in6*)saddr)->sin6_port = htons(nmp->nm_mountport);
3931		mntport = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
3932	}
3933
3934	while (!mntport) {
3935		error = nfs_portmap_lookup(nmp, ctx, saddr, NULL, RPCPROG_MNT, mntvers, mntproto, timeo);
3936		nfsmout_if(error);
3937		if (saddr->sa_family == AF_INET)
3938			mntport = ntohs(((struct sockaddr_in*)saddr)->sin_port);
3939		else
3940			mntport = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
3941		if (!mntport) {
3942			/* if not found and TCP, then retry with UDP */
3943			if (mntproto == IPPROTO_UDP) {
3944				error = EPROGUNAVAIL;
3945				break;
3946			}
3947			mntproto = IPPROTO_UDP;
3948			bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
3949		}
3950	}
3951	nfsmout_if(error || !mntport);
3952
3953	/* MOUNT protocol MOUNT request */
3954	slen = strlen(path);
3955	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_UNSIGNED + nfsm_rndup(slen));
3956	nfsm_chain_add_name(error, &nmreq, path, slen, nmp);
3957	nfsm_chain_build_done(error, &nmreq);
3958	nfsmout_if(error);
3959	error = nfsm_rpchead2(nmp, (mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
3960			RPCPROG_MNT, mntvers, RPCMNT_MOUNT,
3961			RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
3962	nfsmout_if(error);
3963	nmreq.nmc_mhead = NULL;
3964	error = nfs_aux_request(nmp, thd, saddr, NULL,
3965			((mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM),
3966			mreq, R_XID32(xid), 1, timeo, &nmrep);
3967	nfsmout_if(error);
3968	nfsm_chain_get_32(error, &nmrep, val);
3969	if (!error && val)
3970		error = val;
3971	nfsm_chain_get_fh(error, &nmrep, nfsvers, fh);
3972	if (!error && (nfsvers > NFS_VER2)) {
3973		sec->count = NX_MAX_SEC_FLAVORS;
3974		error = nfsm_chain_get_secinfo(&nmrep, &sec->flavors[0], &sec->count);
3975	}
3976nfsmout:
3977	nfsm_chain_cleanup(&nmreq);
3978	nfsm_chain_cleanup(&nmrep);
3979	return (error);
3980}
3981
3982
3983/*
3984 * Send a MOUNT protocol UNMOUNT request to tell the server we've unmounted it.
3985 */
3986void
3987nfs3_umount_rpc(struct nfsmount *nmp, vfs_context_t ctx, int timeo)
3988{
3989	int error = 0, slen, mntproto;
3990	thread_t thd = vfs_context_thread(ctx);
3991	kauth_cred_t cred = vfs_context_ucred(ctx);
3992	char *path;
3993	uint64_t xid = 0;
3994	struct nfsm_chain nmreq, nmrep;
3995	mbuf_t mreq;
3996	uint32_t mntvers, mntport;
3997	struct sockaddr_storage ss;
3998	struct sockaddr *saddr = (struct sockaddr*)&ss;
3999
4000	if (!nmp->nm_saddr)
4001		return;
4002
4003	nfsm_chain_null(&nmreq);
4004	nfsm_chain_null(&nmrep);
4005
4006	mntvers = (nmp->nm_vers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
4007	mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nmp->nm_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
4008	mntport = nmp->nm_mountport;
4009
4010	bcopy(nmp->nm_saddr, saddr, min(sizeof(ss), nmp->nm_saddr->sa_len));
4011	if (saddr->sa_family == AF_INET)
4012		((struct sockaddr_in*)saddr)->sin_port = htons(mntport);
4013	else
4014		((struct sockaddr_in6*)saddr)->sin6_port = htons(mntport);
4015
4016	while (!mntport) {
4017		error = nfs_portmap_lookup(nmp, ctx, saddr, NULL, RPCPROG_MNT, mntvers, mntproto, timeo);
4018  		nfsmout_if(error);
4019		if (saddr->sa_family == AF_INET)
4020			mntport = ntohs(((struct sockaddr_in*)saddr)->sin_port);
4021		else
4022			mntport = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
4023		/* if not found and mntvers > VER1, then retry with VER1 */
4024		if (!mntport) {
4025			if (mntvers > RPCMNT_VER1) {
4026				mntvers = RPCMNT_VER1;
4027			} else if (mntproto == IPPROTO_TCP) {
4028				mntproto = IPPROTO_UDP;
4029				mntvers = (nmp->nm_vers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
4030			} else {
4031				break;
4032			}
4033			bcopy(nmp->nm_saddr, saddr, min(sizeof(ss), nmp->nm_saddr->sa_len));
4034		}
4035	}
4036	nfsmout_if(!mntport);
4037
4038	/* MOUNT protocol UNMOUNT request */
4039	path = &vfs_statfs(nmp->nm_mountp)->f_mntfromname[0];
4040	while (*path && (*path != '/'))
4041		path++;
4042	slen = strlen(path);
4043	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_UNSIGNED + nfsm_rndup(slen));
4044	nfsm_chain_add_name(error, &nmreq, path, slen, nmp);
4045	nfsm_chain_build_done(error, &nmreq);
4046	nfsmout_if(error);
4047	error = nfsm_rpchead2(nmp, (mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
4048			RPCPROG_MNT, RPCMNT_VER1, RPCMNT_UMOUNT,
4049			RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
4050	nfsmout_if(error);
4051	nmreq.nmc_mhead = NULL;
4052	error = nfs_aux_request(nmp, thd, saddr, NULL,
4053		((mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM),
4054		mreq, R_XID32(xid), 1, timeo, &nmrep);
4055nfsmout:
4056	nfsm_chain_cleanup(&nmreq);
4057	nfsm_chain_cleanup(&nmrep);
4058}
4059
4060/*
4061 * unmount system call
4062 */
4063int
4064nfs_vfs_unmount(
4065	mount_t mp,
4066	int mntflags,
4067	__unused vfs_context_t ctx)
4068{
4069	struct nfsmount *nmp;
4070	vnode_t vp;
4071	int error, flags = 0;
4072	struct timespec ts = { 1, 0 };
4073
4074	nmp = VFSTONFS(mp);
4075	lck_mtx_lock(&nmp->nm_lock);
4076	/*
4077	 * Set the flag indicating that an unmount attempt is in progress.
4078	 */
4079	nmp->nm_state |= NFSSTA_UNMOUNTING;
4080	/*
4081	 * During a force unmount we want to...
4082	 *   Mark that we are doing a force unmount.
4083	 *   Make the mountpoint soft.
4084	 */
4085	if (mntflags & MNT_FORCE) {
4086		flags |= FORCECLOSE;
4087		nmp->nm_state |= NFSSTA_FORCE;
4088		NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT);
4089	}
4090	/*
4091	 * Wait for any in-progress monitored node scan to complete.
4092	 */
4093	while (nmp->nm_state & NFSSTA_MONITOR_SCAN)
4094		msleep(&nmp->nm_state, &nmp->nm_lock, PZERO-1, "nfswaitmonscan", &ts);
4095	/*
4096	 * Goes something like this..
4097	 * - Call vflush() to clear out vnodes for this file system,
4098	 *   except for the swap files. Deal with them in 2nd pass.
4099	 * - Decrement reference on the vnode representing remote root.
4100	 * - Clean up the NFS mount structure.
4101	 */
4102	vp = NFSTOV(nmp->nm_dnp);
4103	lck_mtx_unlock(&nmp->nm_lock);
4104
4105	/*
4106	 * vflush will check for busy vnodes on mountpoint.
4107	 * Will do the right thing for MNT_FORCE. That is, we should
4108	 * not get EBUSY back.
4109	 */
4110	error = vflush(mp, vp, SKIPSWAP | flags);
4111	if (mntflags & MNT_FORCE) {
4112		error = vflush(mp, NULLVP, flags); /* locks vp in the process */
4113	} else {
4114		if (vnode_isinuse(vp, 1))
4115			error = EBUSY;
4116		else
4117			error = vflush(mp, vp, flags);
4118	}
4119	if (error) {
4120		lck_mtx_lock(&nmp->nm_lock);
4121		nmp->nm_state &= ~NFSSTA_UNMOUNTING;
4122		lck_mtx_unlock(&nmp->nm_lock);
4123		return (error);
4124	}
4125
4126	lck_mtx_lock(&nmp->nm_lock);
4127	nmp->nm_dnp = NULL;
4128	lck_mtx_unlock(&nmp->nm_lock);
4129
4130	/*
4131	 * Release the root vnode reference held by mountnfs()
4132	 */
4133	error = vnode_get(vp);
4134	vnode_rele(vp);
4135	if (!error)
4136		vnode_put(vp);
4137
4138	vflush(mp, NULLVP, FORCECLOSE);
4139
4140	nfs_mount_cleanup(nmp);
4141	return (0);
4142}
4143
4144/*
4145 * cleanup/destroy NFS fs locations structure
4146 */
4147void
4148nfs_fs_locations_cleanup(struct nfs_fs_locations *nfslsp)
4149{
4150	struct nfs_fs_location *fsl;
4151	struct nfs_fs_server *fss;
4152	struct nfs_fs_path *fsp;
4153	uint32_t loc, serv, addr, comp;
4154
4155	/* free up fs locations */
4156	if (!nfslsp->nl_numlocs || !nfslsp->nl_locations)
4157		return;
4158
4159	for (loc = 0; loc < nfslsp->nl_numlocs; loc++) {
4160		fsl = nfslsp->nl_locations[loc];
4161		if (!fsl)
4162			continue;
4163		if ((fsl->nl_servcount > 0) && fsl->nl_servers) {
4164			for (serv = 0; serv < fsl->nl_servcount; serv++) {
4165				fss = fsl->nl_servers[serv];
4166				if (!fss)
4167					continue;
4168				if ((fss->ns_addrcount > 0) && fss->ns_addresses) {
4169					for (addr = 0; addr < fss->ns_addrcount; addr++)
4170						FREE(fss->ns_addresses[addr], M_TEMP);
4171					FREE(fss->ns_addresses, M_TEMP);
4172				}
4173				FREE(fss->ns_name, M_TEMP);
4174				FREE(fss, M_TEMP);
4175			}
4176			FREE(fsl->nl_servers, M_TEMP);
4177		}
4178		fsp = &fsl->nl_path;
4179		if (fsp->np_compcount && fsp->np_components) {
4180			for (comp = 0; comp < fsp->np_compcount; comp++)
4181				if (fsp->np_components[comp])
4182					FREE(fsp->np_components[comp], M_TEMP);
4183			FREE(fsp->np_components, M_TEMP);
4184		}
4185		FREE(fsl, M_TEMP);
4186	}
4187	FREE(nfslsp->nl_locations, M_TEMP);
4188	nfslsp->nl_numlocs = 0;
4189	nfslsp->nl_locations = NULL;
4190}
4191
4192/*
4193 * cleanup/destroy an nfsmount
4194 */
4195void
4196nfs_mount_cleanup(struct nfsmount *nmp)
4197{
4198	struct nfsreq *req, *treq;
4199	struct nfs_reqqhead iodq;
4200	struct timespec ts = { 1, 0 };
4201	struct nfs_open_owner *noop, *nextnoop;
4202	nfsnode_t np;
4203	int docallback;
4204
4205	/* stop callbacks */
4206	if ((nmp->nm_vers >= NFS_VER4) && !NMFLAG(nmp, NOCALLBACK) && nmp->nm_cbid)
4207		nfs4_mount_callback_shutdown(nmp);
4208
4209	/* Destroy any RPCSEC_GSS contexts */
4210	if (!TAILQ_EMPTY(&nmp->nm_gsscl))
4211		nfs_gss_clnt_ctx_unmount(nmp);
4212
4213	/* mark the socket for termination */
4214	lck_mtx_lock(&nmp->nm_lock);
4215	nmp->nm_sockflags |= NMSOCK_UNMOUNT;
4216
4217	/* Have the socket thread send the unmount RPC, if requested/appropriate. */
4218	if ((nmp->nm_vers < NFS_VER4) && (nmp->nm_state & NFSSTA_MOUNTED) &&
4219	    !(nmp->nm_state & NFSSTA_FORCE) && NMFLAG(nmp, CALLUMNT))
4220		nfs_mount_sock_thread_wake(nmp);
4221
4222	/* wait for the socket thread to terminate */
4223	while (nmp->nm_sockthd) {
4224		wakeup(&nmp->nm_sockthd);
4225		msleep(&nmp->nm_sockthd, &nmp->nm_lock, PZERO-1, "nfswaitsockthd", &ts);
4226	}
4227
4228	lck_mtx_unlock(&nmp->nm_lock);
4229
4230	/* tear down the socket */
4231	nfs_disconnect(nmp);
4232
4233	if (nmp->nm_mountp)
4234		vfs_setfsprivate(nmp->nm_mountp, NULL);
4235
4236	lck_mtx_lock(&nmp->nm_lock);
4237
4238	if ((nmp->nm_vers >= NFS_VER4) && !NMFLAG(nmp, NOCALLBACK) && nmp->nm_cbid) {
4239		/* clear out any pending delegation return requests */
4240		while ((np = TAILQ_FIRST(&nmp->nm_dreturnq))) {
4241			TAILQ_REMOVE(&nmp->nm_dreturnq, np, n_dreturn);
4242			np->n_dreturn.tqe_next = NFSNOLIST;
4243		}
4244	}
4245
4246	/* cancel any renew timer */
4247	if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_renew_timer) {
4248		thread_call_cancel(nmp->nm_renew_timer);
4249		thread_call_free(nmp->nm_renew_timer);
4250	}
4251
4252	if (nmp->nm_saddr)
4253		FREE(nmp->nm_saddr, M_SONAME);
4254	if ((nmp->nm_vers < NFS_VER4) && nmp->nm_rqsaddr)
4255		FREE(nmp->nm_rqsaddr, M_SONAME);
4256	lck_mtx_unlock(&nmp->nm_lock);
4257
4258	if (nmp->nm_state & NFSSTA_MOUNTED)
4259		switch (nmp->nm_lockmode) {
4260		case NFS_LOCK_MODE_DISABLED:
4261		case NFS_LOCK_MODE_LOCAL:
4262			break;
4263		case NFS_LOCK_MODE_ENABLED:
4264		default:
4265			if (nmp->nm_vers <= NFS_VER3)
4266				nfs_lockd_mount_unregister(nmp);
4267			break;
4268		}
4269
4270	if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_longid) {
4271		/* remove/deallocate the client ID data */
4272		lck_mtx_lock(nfs_global_mutex);
4273		TAILQ_REMOVE(&nfsclientids, nmp->nm_longid, nci_link);
4274		if (nmp->nm_longid->nci_id)
4275			FREE(nmp->nm_longid->nci_id, M_TEMP);
4276		FREE(nmp->nm_longid, M_TEMP);
4277		lck_mtx_unlock(nfs_global_mutex);
4278	}
4279
4280	/*
4281	 * Loop through outstanding request list and remove dangling
4282	 * references to defunct nfsmount struct
4283	 */
4284	TAILQ_INIT(&iodq);
4285	lck_mtx_lock(nfs_request_mutex);
4286	TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
4287		if (req->r_nmp == nmp) {
4288			lck_mtx_lock(&req->r_mtx);
4289			req->r_nmp = NULL;
4290			lck_mtx_unlock(&req->r_mtx);
4291			if (req->r_callback.rcb_func) {
4292				/* async I/O RPC needs to be finished */
4293				lck_mtx_lock(nfsiod_mutex);
4294				if (req->r_achain.tqe_next == NFSREQNOLIST)
4295					TAILQ_INSERT_TAIL(&iodq, req, r_achain);
4296				lck_mtx_unlock(nfsiod_mutex);
4297			}
4298			lck_mtx_lock(&req->r_mtx);
4299			lck_mtx_lock(&nmp->nm_lock);
4300			if (req->r_flags & R_RESENDQ) {
4301				if (req->r_rchain.tqe_next != NFSREQNOLIST) {
4302					TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
4303					req->r_rchain.tqe_next = NFSREQNOLIST;
4304				}
4305				req->r_flags &= ~R_RESENDQ;
4306			}
4307			lck_mtx_unlock(&nmp->nm_lock);
4308			lck_mtx_unlock(&req->r_mtx);
4309			wakeup(req);
4310		}
4311	}
4312	lck_mtx_unlock(nfs_request_mutex);
4313
4314	/* finish any async I/O RPCs queued up */
4315	lck_mtx_lock(nfsiod_mutex);
4316	TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
4317	lck_mtx_unlock(nfsiod_mutex);
4318	TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
4319		TAILQ_REMOVE(&iodq, req, r_achain);
4320		req->r_achain.tqe_next = NFSREQNOLIST;
4321		lck_mtx_lock(&req->r_mtx);
4322		req->r_error = ENXIO;
4323		docallback = !(req->r_flags & R_WAITSENT);
4324		lck_mtx_unlock(&req->r_mtx);
4325		if (docallback)
4326			req->r_callback.rcb_func(req);
4327	}
4328
4329	/* clean up common state */
4330	lck_mtx_lock(&nmp->nm_lock);
4331 	while ((np = LIST_FIRST(&nmp->nm_monlist))) {
4332 		LIST_REMOVE(np, n_monlink);
4333 		np->n_monlink.le_next = NFSNOLIST;
4334 	}
4335	TAILQ_FOREACH_SAFE(noop, &nmp->nm_open_owners, noo_link, nextnoop) {
4336		TAILQ_REMOVE(&nmp->nm_open_owners, noop, noo_link);
4337		noop->noo_flags &= ~NFS_OPEN_OWNER_LINK;
4338		if (noop->noo_refcnt)
4339			continue;
4340		nfs_open_owner_destroy(noop);
4341	}
4342	lck_mtx_unlock(&nmp->nm_lock);
4343
4344	/* clean up NFSv4 state */
4345	if (nmp->nm_vers >= NFS_VER4) {
4346		lck_mtx_lock(&nmp->nm_lock);
4347		while ((np = TAILQ_FIRST(&nmp->nm_delegations))) {
4348			TAILQ_REMOVE(&nmp->nm_delegations, np, n_dlink);
4349			np->n_dlink.tqe_next = NFSNOLIST;
4350		}
4351		lck_mtx_unlock(&nmp->nm_lock);
4352	}
4353	if (IS_VALID_CRED(nmp->nm_mcred))
4354		kauth_cred_unref(&nmp->nm_mcred);
4355
4356	nfs_fs_locations_cleanup(&nmp->nm_locations);
4357
4358	if (nmp->nm_args)
4359		xb_free(nmp->nm_args);
4360	lck_mtx_destroy(&nmp->nm_lock, nfs_mount_grp);
4361	if (nmp->nm_fh)
4362		FREE(nmp->nm_fh, M_TEMP);
4363	FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT);
4364}
4365
4366/*
4367 * Return root of a filesystem
4368 */
4369int
4370nfs_vfs_root(mount_t mp, vnode_t *vpp, __unused vfs_context_t ctx)
4371{
4372	vnode_t vp;
4373	struct nfsmount *nmp;
4374	int error;
4375	u_int32_t vpid;
4376
4377	nmp = VFSTONFS(mp);
4378	if (!nmp || !nmp->nm_dnp)
4379		return (ENXIO);
4380	vp = NFSTOV(nmp->nm_dnp);
4381	vpid = vnode_vid(vp);
4382	while ((error = vnode_getwithvid(vp, vpid))) {
4383		/* vnode_get() may return ENOENT if the dir changes. */
4384		/* If that happens, just try it again, else return the error. */
4385		if ((error != ENOENT) || (vnode_vid(vp) == vpid))
4386			return (error);
4387		vpid = vnode_vid(vp);
4388	}
4389	*vpp = vp;
4390	return (0);
4391}
4392
4393/*
4394 * Do operations associated with quotas
4395 */
4396#if !QUOTA
4397int
4398nfs_vfs_quotactl(
4399	__unused mount_t mp,
4400	__unused int cmds,
4401	__unused uid_t uid,
4402	__unused caddr_t datap,
4403	__unused vfs_context_t context)
4404{
4405	return (ENOTSUP);
4406}
4407#else
4408
4409int
4410nfs3_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struct dqblk *dqb)
4411{
4412	int error = 0, slen, timeo;
4413	int rqport = 0, rqproto, rqvers = (type == GRPQUOTA) ? RPCRQUOTA_EXT_VER : RPCRQUOTA_VER;
4414	thread_t thd = vfs_context_thread(ctx);
4415	kauth_cred_t cred = vfs_context_ucred(ctx);
4416	char *path;
4417	uint64_t xid = 0;
4418	struct nfsm_chain nmreq, nmrep;
4419	mbuf_t mreq;
4420	uint32_t val = 0, bsize = 0;
4421	struct sockaddr *rqsaddr;
4422	struct timeval now;
4423
4424	if (!nmp->nm_saddr)
4425		return (ENXIO);
4426
4427	if (NMFLAG(nmp, NOQUOTA))
4428		return (ENOTSUP);
4429
4430	if (!nmp->nm_rqsaddr)
4431		MALLOC(nmp->nm_rqsaddr, struct sockaddr *, sizeof(struct sockaddr_storage), M_SONAME, M_WAITOK|M_ZERO);
4432	if (!nmp->nm_rqsaddr)
4433		return (ENOMEM);
4434	rqsaddr = nmp->nm_rqsaddr;
4435	if (rqsaddr->sa_family == AF_INET6)
4436		rqport = ntohs(((struct sockaddr_in6*)rqsaddr)->sin6_port);
4437	else if (rqsaddr->sa_family == AF_INET)
4438		rqport = ntohs(((struct sockaddr_in*)rqsaddr)->sin_port);
4439
4440	timeo = NMFLAG(nmp, SOFT) ? 10 : 60;
4441	rqproto = IPPROTO_UDP; /* XXX should prefer TCP if mount is TCP */
4442
4443	/* check if we have a recently cached rquota port */
4444	microuptime(&now);
4445	if (!rqport || ((nmp->nm_rqsaddrstamp + 60) >= (uint32_t)now.tv_sec)) {
4446		/* send portmap request to get rquota port */
4447		bcopy(nmp->nm_saddr, rqsaddr, min(sizeof(struct sockaddr_storage), nmp->nm_saddr->sa_len));
4448		error = nfs_portmap_lookup(nmp, ctx, rqsaddr, NULL, RPCPROG_RQUOTA, rqvers, rqproto, timeo);
4449		if (error)
4450			return (error);
4451		if (rqsaddr->sa_family == AF_INET6)
4452			rqport = ntohs(((struct sockaddr_in6*)rqsaddr)->sin6_port);
4453		else if (rqsaddr->sa_family == AF_INET)
4454			rqport = ntohs(((struct sockaddr_in*)rqsaddr)->sin_port);
4455		else
4456			return (EIO);
4457		if (!rqport)
4458			return (ENOTSUP);
4459		microuptime(&now);
4460		nmp->nm_rqsaddrstamp = now.tv_sec;
4461	}
4462
4463	/* rquota request */
4464	nfsm_chain_null(&nmreq);
4465	nfsm_chain_null(&nmrep);
4466	path = &vfs_statfs(nmp->nm_mountp)->f_mntfromname[0];
4467	while (*path && (*path != '/'))
4468		path++;
4469	slen = strlen(path);
4470	nfsm_chain_build_alloc_init(error, &nmreq, 3 * NFSX_UNSIGNED + nfsm_rndup(slen));
4471	nfsm_chain_add_name(error, &nmreq, path, slen, nmp);
4472	if (type == GRPQUOTA)
4473		nfsm_chain_add_32(error, &nmreq, type);
4474	nfsm_chain_add_32(error, &nmreq, id);
4475	nfsm_chain_build_done(error, &nmreq);
4476	nfsmout_if(error);
4477	error = nfsm_rpchead2(nmp, (rqproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
4478			RPCPROG_RQUOTA, rqvers, RPCRQUOTA_GET,
4479			RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
4480	nfsmout_if(error);
4481	nmreq.nmc_mhead = NULL;
4482	error = nfs_aux_request(nmp, thd, rqsaddr, NULL,
4483			(rqproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
4484			mreq, R_XID32(xid), 0, timeo, &nmrep);
4485	nfsmout_if(error);
4486
4487	/* parse rquota response */
4488	nfsm_chain_get_32(error, &nmrep, val);
4489	if (!error && (val != RQUOTA_STAT_OK)) {
4490		if (val == RQUOTA_STAT_NOQUOTA)
4491			error = ENOENT;
4492		else if (val == RQUOTA_STAT_EPERM)
4493			error = EPERM;
4494		else
4495			error = EIO;
4496	}
4497	nfsm_chain_get_32(error, &nmrep, bsize);
4498	nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED);
4499	nfsm_chain_get_32(error, &nmrep, val);
4500	nfsmout_if(error);
4501	dqb->dqb_bhardlimit = (uint64_t)val * bsize;
4502	nfsm_chain_get_32(error, &nmrep, val);
4503	nfsmout_if(error);
4504	dqb->dqb_bsoftlimit = (uint64_t)val * bsize;
4505	nfsm_chain_get_32(error, &nmrep, val);
4506	nfsmout_if(error);
4507	dqb->dqb_curbytes = (uint64_t)val * bsize;
4508	nfsm_chain_get_32(error, &nmrep, dqb->dqb_ihardlimit);
4509	nfsm_chain_get_32(error, &nmrep, dqb->dqb_isoftlimit);
4510	nfsm_chain_get_32(error, &nmrep, dqb->dqb_curinodes);
4511	nfsm_chain_get_32(error, &nmrep, dqb->dqb_btime);
4512	nfsm_chain_get_32(error, &nmrep, dqb->dqb_itime);
4513	nfsmout_if(error);
4514	dqb->dqb_id = id;
4515nfsmout:
4516	nfsm_chain_cleanup(&nmreq);
4517	nfsm_chain_cleanup(&nmrep);
4518	return (error);
4519}
4520
4521int
4522nfs4_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struct dqblk *dqb)
4523{
4524	nfsnode_t np;
4525	int error = 0, status, nfsvers, numops;
4526	u_int64_t xid;
4527	struct nfsm_chain nmreq, nmrep;
4528	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
4529	thread_t thd = vfs_context_thread(ctx);
4530	kauth_cred_t cred = vfs_context_ucred(ctx);
4531	struct nfsreq_secinfo_args si;
4532
4533	if (type != USRQUOTA)  /* NFSv4 only supports user quotas */
4534		return (ENOTSUP);
4535
4536	/* first check that the server supports any of the quota attributes */
4537	if (!NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_HARD) &&
4538	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_SOFT) &&
4539	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_USED))
4540		return (ENOTSUP);
4541
4542	/*
4543	 * The credential passed to the server needs to have
4544	 * an effective uid that matches the given uid.
4545	 */
4546	if (id != kauth_cred_getuid(cred)) {
4547		struct posix_cred temp_pcred;
4548		posix_cred_t pcred = posix_cred_get(cred);
4549		bzero(&temp_pcred, sizeof(temp_pcred));
4550		temp_pcred.cr_uid = id;
4551		temp_pcred.cr_ngroups = pcred->cr_ngroups;
4552		bcopy(pcred->cr_groups, temp_pcred.cr_groups, sizeof(temp_pcred.cr_groups));
4553		cred = posix_cred_create(&temp_pcred);
4554		if (!IS_VALID_CRED(cred))
4555			return (ENOMEM);
4556	} else {
4557		kauth_cred_ref(cred);
4558	}
4559
4560	nfsvers = nmp->nm_vers;
4561	np = nmp->nm_dnp;
4562	if (!np)
4563		error = ENXIO;
4564	if (error || ((error = vnode_get(NFSTOV(np))))) {
4565		kauth_cred_unref(&cred);
4566		return(error);
4567	}
4568
4569	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
4570	nfsm_chain_null(&nmreq);
4571	nfsm_chain_null(&nmrep);
4572
4573	// PUTFH + GETATTR
4574	numops = 2;
4575	nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
4576	nfsm_chain_add_compound_header(error, &nmreq, "quota", numops);
4577	numops--;
4578	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
4579	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
4580	numops--;
4581	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
4582	NFS_CLEAR_ATTRIBUTES(bitmap);
4583	NFS_BITMAP_SET(bitmap, NFS_FATTR_QUOTA_AVAIL_HARD);
4584	NFS_BITMAP_SET(bitmap, NFS_FATTR_QUOTA_AVAIL_SOFT);
4585	NFS_BITMAP_SET(bitmap, NFS_FATTR_QUOTA_USED);
4586	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, NULL);
4587	nfsm_chain_build_done(error, &nmreq);
4588	nfsm_assert(error, (numops == 0), EPROTO);
4589	nfsmout_if(error);
4590	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, 0, &nmrep, &xid, &status);
4591	nfsm_chain_skip_tag(error, &nmrep);
4592	nfsm_chain_get_32(error, &nmrep, numops);
4593	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
4594	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
4595	nfsm_assert(error, NFSTONMP(np), ENXIO);
4596	nfsmout_if(error);
4597	error = nfs4_parsefattr(&nmrep, NULL, NULL, NULL, dqb, NULL);
4598	nfsmout_if(error);
4599	nfsm_assert(error, NFSTONMP(np), ENXIO);
4600nfsmout:
4601	nfsm_chain_cleanup(&nmreq);
4602	nfsm_chain_cleanup(&nmrep);
4603	vnode_put(NFSTOV(np));
4604	kauth_cred_unref(&cred);
4605	return (error);
4606}
4607
4608int
4609nfs_vfs_quotactl(mount_t mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t ctx)
4610{
4611	struct nfsmount *nmp;
4612	int cmd, type, error, nfsvers;
4613	uid_t euid = kauth_cred_getuid(vfs_context_ucred(ctx));
4614	struct dqblk *dqb = (struct dqblk*)datap;
4615
4616	if (!(nmp = VFSTONFS(mp)))
4617		return (ENXIO);
4618	nfsvers = nmp->nm_vers;
4619
4620	if (uid == ~0U)
4621		uid = euid;
4622
4623	/* we can only support Q_GETQUOTA */
4624	cmd = cmds >> SUBCMDSHIFT;
4625	switch (cmd) {
4626	case Q_GETQUOTA:
4627		break;
4628	case Q_QUOTAON:
4629	case Q_QUOTAOFF:
4630	case Q_SETQUOTA:
4631	case Q_SETUSE:
4632	case Q_SYNC:
4633	case Q_QUOTASTAT:
4634		return (ENOTSUP);
4635	default:
4636		return (EINVAL);
4637	}
4638
4639	type = cmds & SUBCMDMASK;
4640	if ((u_int)type >= MAXQUOTAS)
4641		return (EINVAL);
4642	if ((uid != euid) && ((error = vfs_context_suser(ctx))))
4643		return (error);
4644
4645	if (vfs_busy(mp, LK_NOWAIT))
4646		return (0);
4647	bzero(dqb, sizeof(*dqb));
4648	error = nmp->nm_funcs->nf_getquota(nmp, ctx, uid, type, dqb);
4649	vfs_unbusy(mp);
4650	return (error);
4651}
4652#endif
4653
4654/*
4655 * Flush out the buffer cache
4656 */
4657int nfs_sync_callout(vnode_t, void *);
4658
4659struct nfs_sync_cargs {
4660	vfs_context_t	ctx;
4661	int		waitfor;
4662	int		error;
4663};
4664
4665int
4666nfs_sync_callout(vnode_t vp, void *arg)
4667{
4668	struct nfs_sync_cargs *cargs = (struct nfs_sync_cargs*)arg;
4669	nfsnode_t np = VTONFS(vp);
4670	int error;
4671
4672	if (np->n_flag & NREVOKE) {
4673		vn_revoke(vp, REVOKEALL, cargs->ctx);
4674		return (VNODE_RETURNED);
4675	}
4676
4677	if (LIST_EMPTY(&np->n_dirtyblkhd))
4678		return (VNODE_RETURNED);
4679	if (np->n_wrbusy > 0)
4680		return (VNODE_RETURNED);
4681	if (np->n_bflag & (NBFLUSHINPROG|NBINVALINPROG))
4682		return (VNODE_RETURNED);
4683
4684	error = nfs_flush(np, cargs->waitfor, vfs_context_thread(cargs->ctx), 0);
4685	if (error)
4686		cargs->error = error;
4687
4688	return (VNODE_RETURNED);
4689}
4690
4691int
4692nfs_vfs_sync(mount_t mp, int waitfor, vfs_context_t ctx)
4693{
4694	struct nfs_sync_cargs cargs;
4695
4696	cargs.waitfor = waitfor;
4697	cargs.ctx = ctx;
4698	cargs.error = 0;
4699
4700	vnode_iterate(mp, 0, nfs_sync_callout, &cargs);
4701
4702	return (cargs.error);
4703}
4704
4705/*
4706 * NFS flat namespace lookup.
4707 * Currently unsupported.
4708 */
4709/*ARGSUSED*/
4710int
4711nfs_vfs_vget(
4712	__unused mount_t mp,
4713	__unused ino64_t ino,
4714	__unused vnode_t *vpp,
4715	__unused vfs_context_t ctx)
4716{
4717
4718	return (ENOTSUP);
4719}
4720
4721/*
4722 * At this point, this should never happen
4723 */
4724/*ARGSUSED*/
4725int
4726nfs_vfs_fhtovp(
4727	__unused mount_t mp,
4728	__unused int fhlen,
4729	__unused unsigned char *fhp,
4730	__unused vnode_t *vpp,
4731	__unused vfs_context_t ctx)
4732{
4733
4734	return (ENOTSUP);
4735}
4736
4737/*
4738 * Vnode pointer to File handle, should never happen either
4739 */
4740/*ARGSUSED*/
4741int
4742nfs_vfs_vptofh(
4743	__unused vnode_t vp,
4744	__unused int *fhlenp,
4745	__unused unsigned char *fhp,
4746	__unused vfs_context_t ctx)
4747{
4748
4749	return (ENOTSUP);
4750}
4751
4752/*
4753 * Vfs start routine, a no-op.
4754 */
4755/*ARGSUSED*/
4756int
4757nfs_vfs_start(
4758	__unused mount_t mp,
4759	__unused int flags,
4760	__unused vfs_context_t ctx)
4761{
4762
4763	return (0);
4764}
4765
4766/*
4767 * Build the mount info buffer for NFS_MOUNTINFO.
4768 */
4769int
4770nfs_mountinfo_assemble(struct nfsmount *nmp, struct xdrbuf *xb)
4771{
4772	struct xdrbuf xbinfo, xborig;
4773	char sotype[6];
4774	uint32_t origargsvers, origargslength;
4775	uint32_t infolength_offset, curargsopaquelength_offset, curargslength_offset, attrslength_offset, curargs_end_offset, end_offset;
4776	uint32_t miattrs[NFS_MIATTR_BITMAP_LEN];
4777	uint32_t miflags_mask[NFS_MIFLAG_BITMAP_LEN];
4778	uint32_t miflags[NFS_MIFLAG_BITMAP_LEN];
4779	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
4780	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN];
4781	uint32_t mflags[NFS_MFLAG_BITMAP_LEN];
4782	uint32_t loc, serv, addr, comp;
4783	int i, timeo, error = 0;
4784
4785	/* set up mount info attr and flag bitmaps */
4786	NFS_BITMAP_ZERO(miattrs, NFS_MIATTR_BITMAP_LEN);
4787	NFS_BITMAP_SET(miattrs, NFS_MIATTR_FLAGS);
4788	NFS_BITMAP_SET(miattrs, NFS_MIATTR_ORIG_ARGS);
4789	NFS_BITMAP_SET(miattrs, NFS_MIATTR_CUR_ARGS);
4790	NFS_BITMAP_SET(miattrs, NFS_MIATTR_CUR_LOC_INDEX);
4791	NFS_BITMAP_ZERO(miflags_mask, NFS_MIFLAG_BITMAP_LEN);
4792	NFS_BITMAP_ZERO(miflags, NFS_MIFLAG_BITMAP_LEN);
4793	NFS_BITMAP_SET(miflags_mask, NFS_MIFLAG_DEAD);
4794	NFS_BITMAP_SET(miflags_mask, NFS_MIFLAG_NOTRESP);
4795	NFS_BITMAP_SET(miflags_mask, NFS_MIFLAG_RECOVERY);
4796	if (nmp->nm_state & NFSSTA_DEAD)
4797		NFS_BITMAP_SET(miflags, NFS_MIFLAG_DEAD);
4798	if ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_JUKEBOXTIMEO)) ||
4799	    ((nmp->nm_state & NFSSTA_LOCKTIMEO) && (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED)))
4800		NFS_BITMAP_SET(miflags, NFS_MIFLAG_NOTRESP);
4801	if (nmp->nm_state & NFSSTA_RECOVER)
4802		NFS_BITMAP_SET(miflags, NFS_MIFLAG_RECOVERY);
4803
4804	/* get original mount args length */
4805	xb_init_buffer(&xborig, nmp->nm_args, 2*XDRWORD);
4806	xb_get_32(error, &xborig, origargsvers); /* version */
4807	xb_get_32(error, &xborig, origargslength); /* args length */
4808	nfsmerr_if(error);
4809
4810	/* set up current mount attributes bitmap */
4811	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
4812	NFS_BITMAP_SET(mattrs, NFS_MATTR_FLAGS);
4813	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
4814	if (nmp->nm_vers >= NFS_VER4)
4815		NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_MINOR_VERSION);
4816	NFS_BITMAP_SET(mattrs, NFS_MATTR_READ_SIZE);
4817	NFS_BITMAP_SET(mattrs, NFS_MATTR_WRITE_SIZE);
4818	NFS_BITMAP_SET(mattrs, NFS_MATTR_READDIR_SIZE);
4819	NFS_BITMAP_SET(mattrs, NFS_MATTR_READAHEAD);
4820	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN);
4821	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX);
4822	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN);
4823	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX);
4824	NFS_BITMAP_SET(mattrs, NFS_MATTR_LOCK_MODE);
4825	NFS_BITMAP_SET(mattrs, NFS_MATTR_SECURITY);
4826	NFS_BITMAP_SET(mattrs, NFS_MATTR_MAX_GROUP_LIST);
4827	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
4828	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
4829	if ((nmp->nm_vers < NFS_VER4) && nmp->nm_mountport)
4830		NFS_BITMAP_SET(mattrs, NFS_MATTR_MOUNT_PORT);
4831	NFS_BITMAP_SET(mattrs, NFS_MATTR_REQUEST_TIMEOUT);
4832	if (NMFLAG(nmp, SOFT))
4833		NFS_BITMAP_SET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT);
4834	if (nmp->nm_deadtimeout)
4835		NFS_BITMAP_SET(mattrs, NFS_MATTR_DEAD_TIMEOUT);
4836	if (nmp->nm_fh)
4837		NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
4838	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
4839	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
4840	if (origargsvers < NFS_ARGSVERSION_XDR)
4841		NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFROM);
4842
4843	/* set up current mount flags bitmap */
4844	/* first set the flags that we will be setting - either on OR off */
4845	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
4846	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_SOFT);
4847	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_INTR);
4848	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
4849	if (nmp->nm_sotype == SOCK_DGRAM)
4850		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOCONNECT);
4851	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_DUMBTIMER);
4852	if (nmp->nm_vers < NFS_VER4)
4853		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_CALLUMNT);
4854	if (nmp->nm_vers >= NFS_VER3)
4855		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RDIRPLUS);
4856	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NONEGNAMECACHE);
4857	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MUTEJUKEBOX);
4858	if (nmp->nm_vers >= NFS_VER4) {
4859		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_EPHEMERAL);
4860		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOCALLBACK);
4861		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NONAMEDATTR);
4862		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOACL);
4863		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_ACLONLY);
4864	}
4865	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NFC);
4866	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOQUOTA);
4867	if (nmp->nm_vers < NFS_VER4)
4868		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MNTUDP);
4869	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MNTQUICK);
4870	/* now set the flags that should be set */
4871	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
4872	if (NMFLAG(nmp, SOFT))
4873		NFS_BITMAP_SET(mflags, NFS_MFLAG_SOFT);
4874	if (NMFLAG(nmp, INTR))
4875		NFS_BITMAP_SET(mflags, NFS_MFLAG_INTR);
4876	if (NMFLAG(nmp, RESVPORT))
4877		NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
4878	if ((nmp->nm_sotype == SOCK_DGRAM) && NMFLAG(nmp, NOCONNECT))
4879		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOCONNECT);
4880	if (NMFLAG(nmp, DUMBTIMER))
4881		NFS_BITMAP_SET(mflags, NFS_MFLAG_DUMBTIMER);
4882	if ((nmp->nm_vers < NFS_VER4) && NMFLAG(nmp, CALLUMNT))
4883		NFS_BITMAP_SET(mflags, NFS_MFLAG_CALLUMNT);
4884	if ((nmp->nm_vers >= NFS_VER3) && NMFLAG(nmp, RDIRPLUS))
4885		NFS_BITMAP_SET(mflags, NFS_MFLAG_RDIRPLUS);
4886	if (NMFLAG(nmp, NONEGNAMECACHE))
4887		NFS_BITMAP_SET(mflags, NFS_MFLAG_NONEGNAMECACHE);
4888	if (NMFLAG(nmp, MUTEJUKEBOX))
4889		NFS_BITMAP_SET(mflags, NFS_MFLAG_MUTEJUKEBOX);
4890	if (nmp->nm_vers >= NFS_VER4) {
4891		if (NMFLAG(nmp, EPHEMERAL))
4892			NFS_BITMAP_SET(mflags, NFS_MFLAG_EPHEMERAL);
4893		if (NMFLAG(nmp, NOCALLBACK))
4894			NFS_BITMAP_SET(mflags, NFS_MFLAG_NOCALLBACK);
4895		if (NMFLAG(nmp, NONAMEDATTR))
4896			NFS_BITMAP_SET(mflags, NFS_MFLAG_NONAMEDATTR);
4897		if (NMFLAG(nmp, NOACL))
4898			NFS_BITMAP_SET(mflags, NFS_MFLAG_NOACL);
4899		if (NMFLAG(nmp, ACLONLY))
4900			NFS_BITMAP_SET(mflags, NFS_MFLAG_ACLONLY);
4901	}
4902	if (NMFLAG(nmp, NFC))
4903		NFS_BITMAP_SET(mflags, NFS_MFLAG_NFC);
4904	if (NMFLAG(nmp, NOQUOTA) || ((nmp->nm_vers >= NFS_VER4) &&
4905	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_HARD) &&
4906	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_SOFT) &&
4907	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_USED)))
4908		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOQUOTA);
4909	if ((nmp->nm_vers < NFS_VER4) && NMFLAG(nmp, MNTUDP))
4910		NFS_BITMAP_SET(mflags, NFS_MFLAG_MNTUDP);
4911	if (NMFLAG(nmp, MNTQUICK))
4912		NFS_BITMAP_SET(mflags, NFS_MFLAG_MNTQUICK);
4913
4914	/* assemble info buffer: */
4915	xb_init_buffer(&xbinfo, NULL, 0);
4916	xb_add_32(error, &xbinfo, NFS_MOUNT_INFO_VERSION);
4917	infolength_offset = xb_offset(&xbinfo);
4918	xb_add_32(error, &xbinfo, 0);
4919	xb_add_bitmap(error, &xbinfo, miattrs, NFS_MIATTR_BITMAP_LEN);
4920	xb_add_bitmap(error, &xbinfo, miflags, NFS_MIFLAG_BITMAP_LEN);
4921	xb_add_32(error, &xbinfo, origargslength);
4922	if (!error)
4923		error = xb_add_bytes(&xbinfo, nmp->nm_args, origargslength, 0);
4924
4925	/* the opaque byte count for the current mount args values: */
4926	curargsopaquelength_offset = xb_offset(&xbinfo);
4927	xb_add_32(error, &xbinfo, 0);
4928
4929	/* Encode current mount args values */
4930	xb_add_32(error, &xbinfo, NFS_ARGSVERSION_XDR);
4931	curargslength_offset = xb_offset(&xbinfo);
4932	xb_add_32(error, &xbinfo, 0);
4933	xb_add_32(error, &xbinfo, NFS_XDRARGS_VERSION_0);
4934	xb_add_bitmap(error, &xbinfo, mattrs, NFS_MATTR_BITMAP_LEN);
4935	attrslength_offset = xb_offset(&xbinfo);
4936	xb_add_32(error, &xbinfo, 0);
4937	xb_add_bitmap(error, &xbinfo, mflags_mask, NFS_MFLAG_BITMAP_LEN);
4938	xb_add_bitmap(error, &xbinfo, mflags, NFS_MFLAG_BITMAP_LEN);
4939	xb_add_32(error, &xbinfo, nmp->nm_vers);		/* NFS_VERSION */
4940	if (nmp->nm_vers >= NFS_VER4)
4941		xb_add_32(error, &xbinfo, 0);			/* NFS_MINOR_VERSION */
4942	xb_add_32(error, &xbinfo, nmp->nm_rsize);		/* READ_SIZE */
4943	xb_add_32(error, &xbinfo, nmp->nm_wsize);		/* WRITE_SIZE */
4944	xb_add_32(error, &xbinfo, nmp->nm_readdirsize);		/* READDIR_SIZE */
4945	xb_add_32(error, &xbinfo, nmp->nm_readahead);		/* READAHEAD */
4946	xb_add_32(error, &xbinfo, nmp->nm_acregmin);		/* ATTRCACHE_REG_MIN */
4947	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_REG_MIN */
4948	xb_add_32(error, &xbinfo, nmp->nm_acregmax);		/* ATTRCACHE_REG_MAX */
4949	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_REG_MAX */
4950	xb_add_32(error, &xbinfo, nmp->nm_acdirmin);		/* ATTRCACHE_DIR_MIN */
4951	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_DIR_MIN */
4952	xb_add_32(error, &xbinfo, nmp->nm_acdirmax);		/* ATTRCACHE_DIR_MAX */
4953	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_DIR_MAX */
4954	xb_add_32(error, &xbinfo, nmp->nm_lockmode);		/* LOCK_MODE */
4955	if (nmp->nm_sec.count) {
4956		xb_add_32(error, &xbinfo, nmp->nm_sec.count);		/* SECURITY */
4957		nfsmerr_if(error);
4958		for (i=0; i < nmp->nm_sec.count; i++)
4959			xb_add_32(error, &xbinfo, nmp->nm_sec.flavors[i]);
4960	} else if (nmp->nm_servsec.count) {
4961		xb_add_32(error, &xbinfo, nmp->nm_servsec.count);	/* SECURITY */
4962		nfsmerr_if(error);
4963		for (i=0; i < nmp->nm_servsec.count; i++)
4964			xb_add_32(error, &xbinfo, nmp->nm_servsec.flavors[i]);
4965	} else {
4966		xb_add_32(error, &xbinfo, 1);				/* SECURITY */
4967		xb_add_32(error, &xbinfo, nmp->nm_auth);
4968	}
4969	xb_add_32(error, &xbinfo, nmp->nm_numgrps);		/* MAX_GROUP_LIST */
4970	nfsmerr_if(error);
4971	snprintf(sotype, sizeof(sotype), "%s%s", (nmp->nm_sotype == SOCK_DGRAM) ? "udp" : "tcp",
4972		nmp->nm_sofamily ? (nmp->nm_sofamily == AF_INET) ? "4" : "6" : "");
4973	xb_add_string(error, &xbinfo, sotype, strlen(sotype));	/* SOCKET_TYPE */
4974	xb_add_32(error, &xbinfo, ntohs(((struct sockaddr_in*)nmp->nm_saddr)->sin_port)); /* NFS_PORT */
4975	if ((nmp->nm_vers < NFS_VER4) && nmp->nm_mountport)
4976		xb_add_32(error, &xbinfo, nmp->nm_mountport);	/* MOUNT_PORT */
4977	timeo = (nmp->nm_timeo * 10) / NFS_HZ;
4978	xb_add_32(error, &xbinfo, timeo/10);			/* REQUEST_TIMEOUT */
4979	xb_add_32(error, &xbinfo, (timeo%10)*100000000);	/* REQUEST_TIMEOUT */
4980	if (NMFLAG(nmp, SOFT))
4981		xb_add_32(error, &xbinfo, nmp->nm_retry);	/* SOFT_RETRY_COUNT */
4982	if (nmp->nm_deadtimeout) {
4983		xb_add_32(error, &xbinfo, nmp->nm_deadtimeout);	/* DEAD_TIMEOUT */
4984		xb_add_32(error, &xbinfo, 0);			/* DEAD_TIMEOUT */
4985	}
4986	if (nmp->nm_fh)
4987		xb_add_fh(error, &xbinfo, &nmp->nm_fh->fh_data[0], nmp->nm_fh->fh_len); /* FH */
4988	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_numlocs);			/* FS_LOCATIONS */
4989	for (loc = 0; !error && (loc < nmp->nm_locations.nl_numlocs); loc++) {
4990		xb_add_32(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servcount);
4991		for (serv = 0; !error && (serv < nmp->nm_locations.nl_locations[loc]->nl_servcount); serv++) {
4992			xb_add_string(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name,
4993				strlen(nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name));
4994			xb_add_32(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount);
4995			for (addr = 0; !error && (addr < nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount); addr++)
4996				xb_add_string(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr],
4997					strlen(nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr]));
4998			xb_add_32(error, &xbinfo, 0); /* empty server info */
4999		}
5000		xb_add_32(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_path.np_compcount);
5001		for (comp = 0; !error && (comp < nmp->nm_locations.nl_locations[loc]->nl_path.np_compcount); comp++)
5002			xb_add_string(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_path.np_components[comp],
5003				strlen(nmp->nm_locations.nl_locations[loc]->nl_path.np_components[comp]));
5004		xb_add_32(error, &xbinfo, 0); /* empty fs location info */
5005	}
5006	xb_add_32(error, &xbinfo, vfs_flags(nmp->nm_mountp));		/* MNTFLAGS */
5007	if (origargsvers < NFS_ARGSVERSION_XDR)
5008		xb_add_string(error, &xbinfo, vfs_statfs(nmp->nm_mountp)->f_mntfromname,
5009			strlen(vfs_statfs(nmp->nm_mountp)->f_mntfromname));	/* MNTFROM */
5010	curargs_end_offset = xb_offset(&xbinfo);
5011
5012	/* NFS_MIATTR_CUR_LOC_INDEX */
5013	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_flags);
5014	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_loc);
5015	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_serv);
5016	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_addr);
5017
5018	xb_build_done(error, &xbinfo);
5019
5020	/* update opaque counts */
5021	end_offset = xb_offset(&xbinfo);
5022	if (!error) {
5023		error = xb_seek(&xbinfo, attrslength_offset);
5024		xb_add_32(error, &xbinfo, curargs_end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
5025	}
5026	if (!error) {
5027		error = xb_seek(&xbinfo, curargslength_offset);
5028		xb_add_32(error, &xbinfo, curargs_end_offset - curargslength_offset + XDRWORD/*version*/);
5029	}
5030	if (!error) {
5031		error = xb_seek(&xbinfo, curargsopaquelength_offset);
5032		xb_add_32(error, &xbinfo, curargs_end_offset - curargslength_offset + XDRWORD/*version*/);
5033	}
5034	if (!error) {
5035		error = xb_seek(&xbinfo, infolength_offset);
5036		xb_add_32(error, &xbinfo, end_offset - infolength_offset + XDRWORD/*version*/);
5037	}
5038	nfsmerr_if(error);
5039
5040	/* copy result xdrbuf to caller */
5041	*xb = xbinfo;
5042
5043	/* and mark the local copy as not needing cleanup */
5044	xbinfo.xb_flags &= ~XB_CLEANUP;
5045nfsmerr:
5046	xb_cleanup(&xbinfo);
5047	return (error);
5048}
5049
5050/*
5051 * Do that sysctl thang...
5052 */
5053int
5054nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
5055           user_addr_t newp, size_t newlen, vfs_context_t ctx)
5056{
5057	int error = 0, val, softnobrowse;
5058	struct sysctl_req *req = NULL;
5059	union union_vfsidctl vc;
5060	mount_t mp;
5061	struct nfsmount *nmp = NULL;
5062	struct vfsquery vq;
5063	boolean_t is_64_bit;
5064	fsid_t fsid;
5065	struct xdrbuf xb;
5066#if NFSSERVER
5067	struct nfs_exportfs *nxfs;
5068	struct nfs_export *nx;
5069	struct nfs_active_user_list *ulist;
5070	struct nfs_export_stat_desc stat_desc;
5071	struct nfs_export_stat_rec statrec;
5072	struct nfs_user_stat_node *unode, *unode_next;
5073	struct nfs_user_stat_desc ustat_desc;
5074	struct nfs_user_stat_user_rec ustat_rec;
5075	struct nfs_user_stat_path_rec upath_rec;
5076	uint bytes_avail, bytes_total, recs_copied;
5077	uint numExports, totlen, pos, numRecs, count;
5078#endif /* NFSSERVER */
5079
5080	/*
5081	 * All names at this level are terminal.
5082	 */
5083	if (namelen > 1)
5084		return (ENOTDIR);	/* overloaded */
5085
5086	is_64_bit = vfs_context_is64bit(ctx);
5087
5088	/* common code for "new style" VFS_CTL sysctl, get the mount. */
5089	switch (name[0]) {
5090	case VFS_CTL_TIMEO:
5091	case VFS_CTL_QUERY:
5092	case VFS_CTL_NOLOCKS:
5093		req = CAST_DOWN(struct sysctl_req *, oldp);
5094		error = SYSCTL_IN(req, &vc, is_64_bit? sizeof(vc.vc64):sizeof(vc.vc32));
5095		if (error)
5096			return (error);
5097		mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
5098		if (mp == NULL)
5099			return (ENOENT);
5100		nmp = VFSTONFS(mp);
5101		if (nmp == NULL)
5102			return (ENOENT);
5103		bzero(&vq, sizeof(vq));
5104		req->newidx = 0;
5105		if (is_64_bit) {
5106			req->newptr = vc.vc64.vc_ptr;
5107			req->newlen = (size_t)vc.vc64.vc_len;
5108		} else {
5109			req->newptr = CAST_USER_ADDR_T(vc.vc32.vc_ptr);
5110			req->newlen = vc.vc32.vc_len;
5111		}
5112	}
5113
5114	switch(name[0]) {
5115	case NFS_NFSSTATS:
5116		if (!oldp) {
5117			*oldlenp = sizeof nfsstats;
5118			return (0);
5119		}
5120
5121		if (*oldlenp < sizeof nfsstats) {
5122			*oldlenp = sizeof nfsstats;
5123			return (ENOMEM);
5124		}
5125
5126		error = copyout(&nfsstats, oldp, sizeof nfsstats);
5127		if (error)
5128			return (error);
5129
5130		if (newp && newlen != sizeof nfsstats)
5131			return (EINVAL);
5132
5133		if (newp)
5134			return copyin(newp, &nfsstats, sizeof nfsstats);
5135		return (0);
5136	case NFS_MOUNTINFO:
5137		/* read in the fsid */
5138		if (*oldlenp < sizeof(fsid))
5139			return (EINVAL);
5140		if ((error = copyin(oldp, &fsid, sizeof(fsid))))
5141			return (error);
5142		/* swizzle it back to host order */
5143		fsid.val[0] = ntohl(fsid.val[0]);
5144		fsid.val[1] = ntohl(fsid.val[1]);
5145		/* find mount and make sure it's NFS */
5146		if (((mp = vfs_getvfs(&fsid))) == NULL)
5147			return (ENOENT);
5148		if (strcmp(mp->mnt_vfsstat.f_fstypename, "nfs"))
5149			return (EINVAL);
5150		if (((nmp = VFSTONFS(mp))) == NULL)
5151			return (ENOENT);
5152		xb_init(&xb, 0);
5153		if ((error = nfs_mountinfo_assemble(nmp, &xb)))
5154			return (error);
5155		if (*oldlenp < xb.xb_u.xb_buffer.xbb_len)
5156			error = ENOMEM;
5157		else
5158			error = copyout(xb_buffer_base(&xb), oldp, xb.xb_u.xb_buffer.xbb_len);
5159		*oldlenp = xb.xb_u.xb_buffer.xbb_len;
5160		xb_cleanup(&xb);
5161		break;
5162#if NFSSERVER
5163	case NFS_EXPORTSTATS:
5164		/* setup export stat descriptor */
5165		stat_desc.rec_vers = NFS_EXPORT_STAT_REC_VERSION;
5166
5167		if (!nfsrv_is_initialized()) {
5168			stat_desc.rec_count = 0;
5169			if (oldp && (*oldlenp >= sizeof(struct nfs_export_stat_desc)))
5170				error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
5171			*oldlenp = sizeof(struct nfs_export_stat_desc);
5172			return (error);
5173		}
5174
5175		/* Count the number of exported directories */
5176		lck_rw_lock_shared(&nfsrv_export_rwlock);
5177		numExports = 0;
5178		LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next)
5179			LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next)
5180					numExports += 1;
5181
5182		/* update stat descriptor's export record count */
5183		stat_desc.rec_count = numExports;
5184
5185		/* calculate total size of required buffer */
5186		totlen = sizeof(struct nfs_export_stat_desc) + (numExports * sizeof(struct nfs_export_stat_rec));
5187
5188		/* Check caller's buffer */
5189		if (oldp == 0) {
5190			lck_rw_done(&nfsrv_export_rwlock);
5191			/* indicate required buffer len */
5192			*oldlenp = totlen;
5193			return (0);
5194		}
5195
5196		/* We require the caller's buffer to be at least large enough to hold the descriptor */
5197		if (*oldlenp < sizeof(struct nfs_export_stat_desc)) {
5198			lck_rw_done(&nfsrv_export_rwlock);
5199			/* indicate required buffer len */
5200			*oldlenp = totlen;
5201			return (ENOMEM);
5202		}
5203
5204		/* indicate required buffer len */
5205		*oldlenp = totlen;
5206
5207		/* check if export table is empty */
5208		if (!numExports) {
5209			lck_rw_done(&nfsrv_export_rwlock);
5210			error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
5211			return (error);
5212		}
5213
5214		/* calculate how many actual export stat records fit into caller's buffer */
5215		numRecs = (*oldlenp - sizeof(struct nfs_export_stat_desc)) / sizeof(struct nfs_export_stat_rec);
5216
5217		if (!numRecs) {
5218			/* caller's buffer can only accomodate descriptor */
5219			lck_rw_done(&nfsrv_export_rwlock);
5220			stat_desc.rec_count = 0;
5221			error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
5222			return (error);
5223		}
5224
5225		/* adjust to actual number of records to copyout to caller's buffer */
5226		if (numRecs > numExports)
5227			numRecs = numExports;
5228
5229		/* set actual number of records we are returning */
5230		stat_desc.rec_count = numRecs;
5231
5232		/* first copy out the stat descriptor */
5233		pos = 0;
5234		error = copyout(&stat_desc, oldp + pos, sizeof(struct nfs_export_stat_desc));
5235		if (error) {
5236			lck_rw_done(&nfsrv_export_rwlock);
5237			return (error);
5238		}
5239		pos += sizeof(struct nfs_export_stat_desc);
5240
5241		/* Loop through exported directories */
5242		count = 0;
5243		LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
5244			LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
5245
5246				if (count >= numRecs)
5247					break;
5248
5249				/* build exported filesystem path */
5250				snprintf(statrec.path, sizeof(statrec.path), "%s%s%s",
5251					nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
5252					nx->nx_path);
5253
5254				/* build the 64-bit export stat counters */
5255				statrec.ops = ((uint64_t)nx->nx_stats.ops.hi << 32) |
5256						nx->nx_stats.ops.lo;
5257				statrec.bytes_read = ((uint64_t)nx->nx_stats.bytes_read.hi << 32) |
5258						nx->nx_stats.bytes_read.lo;
5259				statrec.bytes_written = ((uint64_t)nx->nx_stats.bytes_written.hi << 32) |
5260						nx->nx_stats.bytes_written.lo;
5261				error = copyout(&statrec, oldp + pos, sizeof(statrec));
5262				if (error) {
5263					lck_rw_done(&nfsrv_export_rwlock);
5264					return (error);
5265				}
5266				/* advance buffer position */
5267				pos += sizeof(statrec);
5268			}
5269		}
5270		lck_rw_done(&nfsrv_export_rwlock);
5271		break;
5272	case NFS_USERSTATS:
5273		/* init structures used for copying out of kernel */
5274		ustat_desc.rec_vers = NFS_USER_STAT_REC_VERSION;
5275		ustat_rec.rec_type = NFS_USER_STAT_USER_REC;
5276		upath_rec.rec_type = NFS_USER_STAT_PATH_REC;
5277
5278		/* initialize counters */
5279		bytes_total = sizeof(struct nfs_user_stat_desc);
5280		bytes_avail  = *oldlenp;
5281		recs_copied = 0;
5282
5283		if (!nfsrv_is_initialized()) /* NFS server not initialized, so no stats */
5284			goto ustat_skip;
5285
5286		/* reclaim old expired user nodes */
5287		nfsrv_active_user_list_reclaim();
5288
5289		/* reserve space for the buffer descriptor */
5290		if (bytes_avail >= sizeof(struct nfs_user_stat_desc))
5291			bytes_avail -= sizeof(struct nfs_user_stat_desc);
5292		else
5293			bytes_avail = 0;
5294
5295		/* put buffer position past the buffer descriptor */
5296		pos = sizeof(struct nfs_user_stat_desc);
5297
5298		/* Loop through exported directories */
5299		lck_rw_lock_shared(&nfsrv_export_rwlock);
5300		LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
5301			LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
5302				/* copy out path */
5303				if (bytes_avail >= sizeof(struct nfs_user_stat_path_rec)) {
5304					snprintf(upath_rec.path, sizeof(upath_rec.path), "%s%s%s",
5305					    nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
5306					    nx->nx_path);
5307
5308					error = copyout(&upath_rec, oldp + pos, sizeof(struct nfs_user_stat_path_rec));
5309					if (error) {
5310						/* punt */
5311						goto ustat_done;
5312					}
5313
5314					pos += sizeof(struct nfs_user_stat_path_rec);
5315					bytes_avail -= sizeof(struct nfs_user_stat_path_rec);
5316					recs_copied++;
5317				}
5318				else {
5319					/* Caller's buffer is exhausted */
5320					bytes_avail = 0;
5321				}
5322
5323				bytes_total += sizeof(struct nfs_user_stat_path_rec);
5324
5325				/* Scan through all user nodes of this export */
5326				ulist = &nx->nx_user_list;
5327				lck_mtx_lock(&ulist->user_mutex);
5328				for (unode = TAILQ_FIRST(&ulist->user_lru); unode; unode = unode_next) {
5329					unode_next = TAILQ_NEXT(unode, lru_link);
5330
5331					/* copy out node if there is space */
5332					if (bytes_avail >= sizeof(struct nfs_user_stat_user_rec)) {
5333						/* prepare a user stat rec for copying out */
5334						ustat_rec.uid = unode->uid;
5335						bcopy(&unode->sock, &ustat_rec.sock, unode->sock.ss_len);
5336						ustat_rec.ops = unode->ops;
5337						ustat_rec.bytes_read = unode->bytes_read;
5338						ustat_rec.bytes_written = unode->bytes_written;
5339						ustat_rec.tm_start = unode->tm_start;
5340						ustat_rec.tm_last = unode->tm_last;
5341
5342						error = copyout(&ustat_rec, oldp + pos, sizeof(struct nfs_user_stat_user_rec));
5343
5344						if (error) {
5345							/* punt */
5346							lck_mtx_unlock(&ulist->user_mutex);
5347							goto ustat_done;
5348						}
5349
5350						pos += sizeof(struct nfs_user_stat_user_rec);
5351						bytes_avail -= sizeof(struct nfs_user_stat_user_rec);
5352						recs_copied++;
5353					}
5354					else {
5355						/* Caller's buffer is exhausted */
5356						bytes_avail = 0;
5357					}
5358					bytes_total += sizeof(struct nfs_user_stat_user_rec);
5359				}
5360				/* can unlock this export's list now */
5361				lck_mtx_unlock(&ulist->user_mutex);
5362			}
5363		}
5364
5365ustat_done:
5366		/* unlock the export table */
5367		lck_rw_done(&nfsrv_export_rwlock);
5368
5369ustat_skip:
5370		/* indicate number of actual records copied */
5371		ustat_desc.rec_count = recs_copied;
5372
5373		if (!error) {
5374			/* check if there was enough room for the buffer descriptor */
5375			if (*oldlenp >= sizeof(struct nfs_user_stat_desc))
5376				error = copyout(&ustat_desc, oldp, sizeof(struct nfs_user_stat_desc));
5377			else
5378				error = ENOMEM;
5379
5380			/* always indicate required buffer size */
5381			*oldlenp = bytes_total;
5382		}
5383		break;
5384	case NFS_USERCOUNT:
5385		if (!oldp) {
5386			*oldlenp = sizeof(nfsrv_user_stat_node_count);
5387			return (0);
5388		}
5389
5390		if (*oldlenp < sizeof(nfsrv_user_stat_node_count)) {
5391			*oldlenp = sizeof(nfsrv_user_stat_node_count);
5392			return (ENOMEM);
5393		}
5394
5395		if (nfsrv_is_initialized()) {
5396			/* reclaim old expired user nodes */
5397			nfsrv_active_user_list_reclaim();
5398		}
5399
5400		error = copyout(&nfsrv_user_stat_node_count, oldp, sizeof(nfsrv_user_stat_node_count));
5401		break;
5402#endif /* NFSSERVER */
5403	case VFS_CTL_NOLOCKS:
5404 		if (req->oldptr != USER_ADDR_NULL) {
5405			lck_mtx_lock(&nmp->nm_lock);
5406			val = (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) ? 1 : 0;
5407			lck_mtx_unlock(&nmp->nm_lock);
5408 			error = SYSCTL_OUT(req, &val, sizeof(val));
5409 			if (error)
5410 				return (error);
5411 		}
5412 		if (req->newptr != USER_ADDR_NULL) {
5413 			error = SYSCTL_IN(req, &val, sizeof(val));
5414 			if (error)
5415 				return (error);
5416			lck_mtx_lock(&nmp->nm_lock);
5417			if (nmp->nm_lockmode == NFS_LOCK_MODE_LOCAL) {
5418				/* can't toggle locks when using local locks */
5419				error = EINVAL;
5420			} else if ((nmp->nm_vers >= NFS_VER4) && val) {
5421				/* can't disable locks for NFSv4 */
5422				error = EINVAL;
5423			} else if (val) {
5424				if ((nmp->nm_vers <= NFS_VER3) && (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED))
5425					nfs_lockd_mount_unregister(nmp);
5426				nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
5427				nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
5428			} else {
5429				if ((nmp->nm_vers <= NFS_VER3) && (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED))
5430					nfs_lockd_mount_register(nmp);
5431				nmp->nm_lockmode = NFS_LOCK_MODE_ENABLED;
5432			}
5433			lck_mtx_unlock(&nmp->nm_lock);
5434 		}
5435		break;
5436	case VFS_CTL_QUERY:
5437		lck_mtx_lock(&nmp->nm_lock);
5438		/* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
5439		softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
5440		if (!softnobrowse && (nmp->nm_state & NFSSTA_TIMEO))
5441			vq.vq_flags |= VQ_NOTRESP;
5442		if (!softnobrowse && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO) && !NMFLAG(nmp, MUTEJUKEBOX))
5443			vq.vq_flags |= VQ_NOTRESP;
5444		if (!softnobrowse && (nmp->nm_state & NFSSTA_LOCKTIMEO) &&
5445		    (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED))
5446			vq.vq_flags |= VQ_NOTRESP;
5447		if (nmp->nm_state & NFSSTA_DEAD)
5448			vq.vq_flags |= VQ_DEAD;
5449		lck_mtx_unlock(&nmp->nm_lock);
5450		error = SYSCTL_OUT(req, &vq, sizeof(vq));
5451		break;
5452 	case VFS_CTL_TIMEO:
5453 		if (req->oldptr != USER_ADDR_NULL) {
5454			lck_mtx_lock(&nmp->nm_lock);
5455			val = nmp->nm_tprintf_initial_delay;
5456			lck_mtx_unlock(&nmp->nm_lock);
5457 			error = SYSCTL_OUT(req, &val, sizeof(val));
5458 			if (error)
5459 				return (error);
5460 		}
5461 		if (req->newptr != USER_ADDR_NULL) {
5462 			error = SYSCTL_IN(req, &val, sizeof(val));
5463 			if (error)
5464 				return (error);
5465			lck_mtx_lock(&nmp->nm_lock);
5466 			if (val < 0)
5467 				nmp->nm_tprintf_initial_delay = 0;
5468			else
5469				nmp->nm_tprintf_initial_delay = val;
5470			lck_mtx_unlock(&nmp->nm_lock);
5471 		}
5472		break;
5473	default:
5474		return (ENOTSUP);
5475	}
5476	return (error);
5477}
5478