1/*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993, 1995
31 *	The Regents of the University of California.  All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 *    must display the following acknowledgement:
46 *	This product includes software developed by the University of
47 *	California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 *    may be used to endorse or promote products derived from this software
50 *    without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 *	@(#)nfs_vfsops.c	8.12 (Berkeley) 5/20/95
65 * FreeBSD-Id: nfs_vfsops.c,v 1.52 1997/11/12 05:42:21 julian Exp $
66 */
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections.  This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/conf.h>
77#include <sys/ioctl.h>
78#include <sys/signal.h>
79#include <sys/proc_internal.h> /* for fs rooting to update rootdir in fdp */
80#include <sys/kauth.h>
81#include <sys/vnode_internal.h>
82#include <sys/malloc.h>
83#include <sys/kernel.h>
84#include <sys/sysctl.h>
85#include <sys/mount_internal.h>
86#include <sys/kpi_mbuf.h>
87#include <sys/socket.h>
88#include <sys/socketvar.h>
89#include <sys/fcntl.h>
90#include <sys/quota.h>
91#include <sys/priv.h>
92#include <libkern/OSAtomic.h>
93
94#include <sys/vm.h>
95#include <sys/vmparam.h>
96
97#if !defined(NO_MOUNT_PRIVATE)
98#include <sys/filedesc.h>
99#endif /* NO_MOUNT_PRIVATE */
100
101#include <net/if.h>
102#include <net/route.h>
103#include <netinet/in.h>
104
105#include <nfs/rpcv2.h>
106#include <nfs/krpc.h>
107#include <nfs/nfsproto.h>
108#include <nfs/nfs.h>
109#include <nfs/nfsnode.h>
110#include <nfs/nfs_gss.h>
111#include <nfs/nfsmount.h>
112#include <nfs/xdr_subs.h>
113#include <nfs/nfsm_subs.h>
114#include <nfs/nfsdiskless.h>
115#include <nfs/nfs_lock.h>
116#if CONFIG_MACF
117#include <security/mac_framework.h>
118#endif
119
120#include <pexpert/pexpert.h>
121
122#define NFS_VFS_DBG(...) NFS_DBG(NFS_FAC_VFS, 7, ## __VA_ARGS__)
123
124/*
125 * NFS client globals
126 */
127
128int nfs_ticks;
129static lck_grp_t *nfs_global_grp, *nfs_mount_grp;
130lck_mtx_t *nfs_global_mutex;
131uint32_t nfs_fs_attr_bitmap[NFS_ATTR_BITMAP_LEN];
132uint32_t nfs_object_attr_bitmap[NFS_ATTR_BITMAP_LEN];
133uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN];
134struct nfsclientidlist nfsclientids;
135
136/* NFS requests */
137struct nfs_reqqhead nfs_reqq;
138lck_grp_t *nfs_request_grp;
139lck_mtx_t *nfs_request_mutex;
140thread_call_t nfs_request_timer_call;
141int nfs_request_timer_on;
142u_int32_t nfs_xid = 0;
143u_int32_t nfs_xidwrap = 0;		/* to build a (non-wrapping) 64 bit xid */
144
145thread_call_t nfs_buf_timer_call;
146
147/* NFSv4 */
148lck_grp_t *nfs_open_grp;
149uint32_t nfs_open_owner_seqnum = 0;
150uint32_t nfs_lock_owner_seqnum = 0;
151thread_call_t nfs4_callback_timer_call;
152int nfs4_callback_timer_on = 0;
153
154/* nfsiod */
155lck_grp_t *nfsiod_lck_grp;
156lck_mtx_t *nfsiod_mutex;
157struct nfsiodlist nfsiodfree, nfsiodwork;
158struct nfsiodmountlist nfsiodmounts;
159int nfsiod_thread_count = 0;
160int nfsiod_thread_max = NFS_DEFASYNCTHREAD;
161int nfs_max_async_writes = NFS_DEFMAXASYNCWRITES;
162
163int nfs_iosize = NFS_IOSIZE;
164int nfs_access_cache_timeout = NFS_MAXATTRTIMO;
165int nfs_access_delete = 1; /* too many servers get this wrong - workaround on by default */
166int nfs_access_dotzfs = 1;
167int nfs_access_for_getattr = 0;
168int nfs_allow_async = 0;
169int nfs_statfs_rate_limit = NFS_DEFSTATFSRATELIMIT;
170int nfs_lockd_mounts = 0;
171int nfs_lockd_request_sent = 0;
172int nfs_idmap_ctrl = NFS_IDMAP_CTRL_USE_IDMAP_SERVICE;
173int nfs_callback_port = 0;
174
175int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
176int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
177
178
179int		mountnfs(char *, mount_t, vfs_context_t, vnode_t *);
180static int	nfs_mount_diskless(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *, vfs_context_t);
181#if !defined(NO_MOUNT_PRIVATE)
182static int	nfs_mount_diskless_private(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *, vfs_context_t);
183#endif /* NO_MOUNT_PRIVATE */
184int		nfs_mount_connect(struct nfsmount *);
185void		nfs_mount_drain_and_cleanup(struct nfsmount *);
186void		nfs_mount_cleanup(struct nfsmount *);
187int		nfs_mountinfo_assemble(struct nfsmount *, struct xdrbuf *);
188int		nfs4_mount_update_path_with_symlink(struct nfsmount *, struct nfs_fs_path *, uint32_t, fhandle_t *, int *, fhandle_t *, vfs_context_t);
189
190/*
191 * NFS VFS operations.
192 */
193int	nfs_vfs_mount(mount_t, vnode_t, user_addr_t, vfs_context_t);
194int	nfs_vfs_start(mount_t, int, vfs_context_t);
195int	nfs_vfs_unmount(mount_t, int, vfs_context_t);
196int	nfs_vfs_root(mount_t, vnode_t *, vfs_context_t);
197int	nfs_vfs_quotactl(mount_t, int, uid_t, caddr_t, vfs_context_t);
198int	nfs_vfs_getattr(mount_t, struct vfs_attr *, vfs_context_t);
199int	nfs_vfs_sync(mount_t, int, vfs_context_t);
200int	nfs_vfs_vget(mount_t, ino64_t, vnode_t *, vfs_context_t);
201int	nfs_vfs_vptofh(vnode_t, int *, unsigned char *, vfs_context_t);
202int	nfs_vfs_fhtovp(mount_t, int, unsigned char *, vnode_t *, vfs_context_t);
203int	nfs_vfs_init(struct vfsconf *);
204int	nfs_vfs_sysctl(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t);
205
206struct vfsops nfs_vfsops = {
207	nfs_vfs_mount,
208	nfs_vfs_start,
209	nfs_vfs_unmount,
210	nfs_vfs_root,
211	nfs_vfs_quotactl,
212	nfs_vfs_getattr,
213	nfs_vfs_sync,
214	nfs_vfs_vget,
215	nfs_vfs_fhtovp,
216	nfs_vfs_vptofh,
217	nfs_vfs_init,
218	nfs_vfs_sysctl,
219	NULL,		/* setattr */
220	{ NULL,		/* reserved */
221	  NULL,		/* reserved */
222	  NULL,		/* reserved */
223	  NULL,		/* reserved */
224	  NULL,		/* reserved */
225	  NULL,		/* reserved */
226	  NULL }	/* reserved */
227};
228
229
230/*
231 * version-specific NFS functions
232 */
233int nfs3_mount(struct nfsmount *, vfs_context_t, nfsnode_t *);
234int nfs4_mount(struct nfsmount *, vfs_context_t, nfsnode_t *);
235int nfs3_fsinfo(struct nfsmount *, nfsnode_t, vfs_context_t);
236int nfs3_update_statfs(struct nfsmount *, vfs_context_t);
237int nfs4_update_statfs(struct nfsmount *, vfs_context_t);
238#if !QUOTA
239#define nfs3_getquota	NULL
240#define nfs4_getquota	NULL
241#else
242int nfs3_getquota(struct nfsmount *, vfs_context_t, uid_t, int, struct dqblk *);
243int nfs4_getquota(struct nfsmount *, vfs_context_t, uid_t, int, struct dqblk *);
244#endif
245
246struct nfs_funcs nfs3_funcs = {
247	nfs3_mount,
248	nfs3_update_statfs,
249	nfs3_getquota,
250	nfs3_access_rpc,
251	nfs3_getattr_rpc,
252	nfs3_setattr_rpc,
253	nfs3_read_rpc_async,
254	nfs3_read_rpc_async_finish,
255	nfs3_readlink_rpc,
256	nfs3_write_rpc_async,
257	nfs3_write_rpc_async_finish,
258	nfs3_commit_rpc,
259	nfs3_lookup_rpc_async,
260	nfs3_lookup_rpc_async_finish,
261	nfs3_remove_rpc,
262	nfs3_rename_rpc,
263	nfs3_setlock_rpc,
264	nfs3_unlock_rpc,
265	nfs3_getlock_rpc
266	};
267struct nfs_funcs nfs4_funcs = {
268	nfs4_mount,
269	nfs4_update_statfs,
270	nfs4_getquota,
271	nfs4_access_rpc,
272	nfs4_getattr_rpc,
273	nfs4_setattr_rpc,
274	nfs4_read_rpc_async,
275	nfs4_read_rpc_async_finish,
276	nfs4_readlink_rpc,
277	nfs4_write_rpc_async,
278	nfs4_write_rpc_async_finish,
279	nfs4_commit_rpc,
280	nfs4_lookup_rpc_async,
281	nfs4_lookup_rpc_async_finish,
282	nfs4_remove_rpc,
283	nfs4_rename_rpc,
284	nfs4_setlock_rpc,
285	nfs4_unlock_rpc,
286	nfs4_getlock_rpc
287	};
288
289/*
290 * Called once to initialize data structures...
291 */
292int
293nfs_vfs_init(__unused struct vfsconf *vfsp)
294{
295	int i;
296
297	/*
298	 * Check to see if major data structures haven't bloated.
299	 */
300	if (sizeof (struct nfsnode) > NFS_NODEALLOC) {
301		printf("struct nfsnode bloated (> %dbytes)\n", NFS_NODEALLOC);
302		printf("Try reducing NFS_SMALLFH\n");
303	}
304	if (sizeof (struct nfsmount) > NFS_MNTALLOC)
305		printf("struct nfsmount bloated (> %dbytes)\n", NFS_MNTALLOC);
306
307	nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
308	if (nfs_ticks < 1)
309		nfs_ticks = 1;
310
311	/* init async I/O thread pool state */
312	TAILQ_INIT(&nfsiodfree);
313	TAILQ_INIT(&nfsiodwork);
314	TAILQ_INIT(&nfsiodmounts);
315	nfsiod_lck_grp = lck_grp_alloc_init("nfsiod", LCK_GRP_ATTR_NULL);
316	nfsiod_mutex = lck_mtx_alloc_init(nfsiod_lck_grp, LCK_ATTR_NULL);
317
318	/* init lock groups, etc. */
319	nfs_mount_grp = lck_grp_alloc_init("nfs_mount", LCK_GRP_ATTR_NULL);
320	nfs_open_grp = lck_grp_alloc_init("nfs_open", LCK_GRP_ATTR_NULL);
321	nfs_global_grp = lck_grp_alloc_init("nfs_global", LCK_GRP_ATTR_NULL);
322
323	nfs_global_mutex = lck_mtx_alloc_init(nfs_global_grp, LCK_ATTR_NULL);
324
325	/* init request list mutex */
326	nfs_request_grp = lck_grp_alloc_init("nfs_request", LCK_GRP_ATTR_NULL);
327	nfs_request_mutex = lck_mtx_alloc_init(nfs_request_grp, LCK_ATTR_NULL);
328
329	/* initialize NFS request list */
330	TAILQ_INIT(&nfs_reqq);
331
332	nfs_nbinit();			/* Init the nfsbuf table */
333	nfs_nhinit();			/* Init the nfsnode table */
334	nfs_lockinit();			/* Init the nfs lock state */
335	nfs_gss_init();			/* Init RPCSEC_GSS security */
336
337	/* NFSv4 stuff */
338	NFS4_PER_FS_ATTRIBUTES(nfs_fs_attr_bitmap);
339	NFS4_PER_OBJECT_ATTRIBUTES(nfs_object_attr_bitmap);
340	NFS4_DEFAULT_ATTRIBUTES(nfs_getattr_bitmap);
341	for (i=0; i < NFS_ATTR_BITMAP_LEN; i++)
342		nfs_getattr_bitmap[i] &= nfs_object_attr_bitmap[i];
343	TAILQ_INIT(&nfsclientids);
344
345	/* initialize NFS timer callouts */
346	nfs_request_timer_call = thread_call_allocate(nfs_request_timer, NULL);
347	nfs_buf_timer_call = thread_call_allocate(nfs_buf_timer, NULL);
348	nfs4_callback_timer_call = thread_call_allocate(nfs4_callback_timer, NULL);
349
350	return (0);
351}
352
353/*
354 * nfs statfs call
355 */
356int
357nfs3_update_statfs(struct nfsmount *nmp, vfs_context_t ctx)
358{
359	nfsnode_t np;
360	int error = 0, lockerror, status, nfsvers;
361	u_int64_t xid;
362	struct nfsm_chain nmreq, nmrep;
363	uint32_t val = 0;
364
365	nfsvers = nmp->nm_vers;
366	np = nmp->nm_dnp;
367	if (!np)
368		return (ENXIO);
369	if ((error = vnode_get(NFSTOV(np))))
370		return (error);
371
372	nfsm_chain_null(&nmreq);
373	nfsm_chain_null(&nmrep);
374
375	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
376	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
377	nfsm_chain_build_done(error, &nmreq);
378	nfsmout_if(error);
379	error = nfs_request2(np, NULL, &nmreq, NFSPROC_FSSTAT, vfs_context_thread(ctx),
380		vfs_context_ucred(ctx), NULL, R_SOFT, &nmrep, &xid, &status);
381	if (error == ETIMEDOUT)
382		goto nfsmout;
383	if ((lockerror = nfs_node_lock(np)))
384		error = lockerror;
385	if (nfsvers == NFS_VER3)
386		nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
387	if (!lockerror)
388		nfs_node_unlock(np);
389	if (!error)
390		error = status;
391	nfsm_assert(error, NFSTONMP(np), ENXIO);
392	nfsmout_if(error);
393	lck_mtx_lock(&nmp->nm_lock);
394	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_TOTAL);
395	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_FREE);
396	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_AVAIL);
397	if (nfsvers == NFS_VER3) {
398		NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_AVAIL);
399		NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_TOTAL);
400		NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_FREE);
401		nmp->nm_fsattr.nfsa_bsize = NFS_FABLKSIZE;
402		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_space_total);
403		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_space_free);
404		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_space_avail);
405		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_files_total);
406		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_files_free);
407		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_files_avail);
408		// skip invarsec
409	} else {
410		nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED); // skip tsize?
411		nfsm_chain_get_32(error, &nmrep, nmp->nm_fsattr.nfsa_bsize);
412		nfsm_chain_get_32(error, &nmrep, val);
413		nfsmout_if(error);
414		if (nmp->nm_fsattr.nfsa_bsize <= 0)
415			nmp->nm_fsattr.nfsa_bsize = NFS_FABLKSIZE;
416		nmp->nm_fsattr.nfsa_space_total = (uint64_t)val * nmp->nm_fsattr.nfsa_bsize;
417		nfsm_chain_get_32(error, &nmrep, val);
418		nfsmout_if(error);
419		nmp->nm_fsattr.nfsa_space_free = (uint64_t)val * nmp->nm_fsattr.nfsa_bsize;
420		nfsm_chain_get_32(error, &nmrep, val);
421		nfsmout_if(error);
422		nmp->nm_fsattr.nfsa_space_avail = (uint64_t)val * nmp->nm_fsattr.nfsa_bsize;
423	}
424	lck_mtx_unlock(&nmp->nm_lock);
425nfsmout:
426	nfsm_chain_cleanup(&nmreq);
427	nfsm_chain_cleanup(&nmrep);
428	vnode_put(NFSTOV(np));
429	return (error);
430}
431
432int
433nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx)
434{
435	nfsnode_t np;
436	int error = 0, lockerror, status, nfsvers, numops;
437	u_int64_t xid;
438	struct nfsm_chain nmreq, nmrep;
439	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
440	struct nfs_vattr nvattr;
441	struct nfsreq_secinfo_args si;
442
443	nfsvers = nmp->nm_vers;
444	np = nmp->nm_dnp;
445	if (!np)
446		return (ENXIO);
447	if ((error = vnode_get(NFSTOV(np))))
448		return (error);
449
450	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
451	NVATTR_INIT(&nvattr);
452	nfsm_chain_null(&nmreq);
453	nfsm_chain_null(&nmrep);
454
455	// PUTFH + GETATTR
456	numops = 2;
457	nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
458	nfsm_chain_add_compound_header(error, &nmreq, "statfs", numops);
459	numops--;
460	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
461	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
462	numops--;
463	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
464	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
465	NFS4_STATFS_ATTRIBUTES(bitmap);
466	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
467	nfsm_chain_build_done(error, &nmreq);
468	nfsm_assert(error, (numops == 0), EPROTO);
469	nfsmout_if(error);
470	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND,
471		vfs_context_thread(ctx), vfs_context_ucred(ctx),
472		NULL, R_SOFT, &nmrep, &xid, &status);
473	nfsm_chain_skip_tag(error, &nmrep);
474	nfsm_chain_get_32(error, &nmrep, numops);
475	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
476	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
477	nfsm_assert(error, NFSTONMP(np), ENXIO);
478	nfsmout_if(error);
479	lck_mtx_lock(&nmp->nm_lock);
480	error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL, NULL);
481	lck_mtx_unlock(&nmp->nm_lock);
482	nfsmout_if(error);
483	if ((lockerror = nfs_node_lock(np)))
484		error = lockerror;
485	if (!error)
486		nfs_loadattrcache(np, &nvattr, &xid, 0);
487	if (!lockerror)
488		nfs_node_unlock(np);
489	nfsm_assert(error, NFSTONMP(np), ENXIO);
490	nfsmout_if(error);
491	nmp->nm_fsattr.nfsa_bsize = NFS_FABLKSIZE;
492nfsmout:
493	NVATTR_CLEANUP(&nvattr);
494	nfsm_chain_cleanup(&nmreq);
495	nfsm_chain_cleanup(&nmrep);
496	vnode_put(NFSTOV(np));
497	return (error);
498}
499
500
501/*
502 * The NFS VFS_GETATTR function: "statfs"-type information is retrieved
503 * using the nf_update_statfs() function, and other attributes are cobbled
504 * together from whatever sources we can (getattr, fsinfo, pathconf).
505 */
506int
507nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx)
508{
509	struct nfsmount *nmp;
510	uint32_t bsize;
511	int error = 0, nfsvers;
512
513	nmp = VFSTONFS(mp);
514	if (nfs_mount_gone(nmp))
515		return (ENXIO);
516	nfsvers = nmp->nm_vers;
517
518	if (VFSATTR_IS_ACTIVE(fsap, f_bsize)  ||
519	    VFSATTR_IS_ACTIVE(fsap, f_iosize) ||
520	    VFSATTR_IS_ACTIVE(fsap, f_blocks) ||
521	    VFSATTR_IS_ACTIVE(fsap, f_bfree)  ||
522	    VFSATTR_IS_ACTIVE(fsap, f_bavail) ||
523	    VFSATTR_IS_ACTIVE(fsap, f_bused)  ||
524	    VFSATTR_IS_ACTIVE(fsap, f_files)  ||
525	    VFSATTR_IS_ACTIVE(fsap, f_ffree)) {
526		int statfsrate = nfs_statfs_rate_limit;
527		int refresh = 1;
528
529		/*
530		 * Are we rate-limiting statfs RPCs?
531		 * (Treat values less than 1 or greater than 1,000,000 as no limit.)
532		 */
533		if ((statfsrate > 0) && (statfsrate < 1000000)) {
534			struct timeval now;
535			uint32_t stamp;
536
537			microuptime(&now);
538			lck_mtx_lock(&nmp->nm_lock);
539			stamp = (now.tv_sec * statfsrate) + (now.tv_usec / (1000000/statfsrate));
540			if (stamp != nmp->nm_fsattrstamp) {
541				refresh = 1;
542				nmp->nm_fsattrstamp = stamp;
543			} else {
544				refresh = 0;
545			}
546			lck_mtx_unlock(&nmp->nm_lock);
547		}
548
549		if (refresh && !nfs_use_cache(nmp))
550			error = nmp->nm_funcs->nf_update_statfs(nmp, ctx);
551		if ((error == ESTALE) || (error == ETIMEDOUT))
552			error = 0;
553		if (error)
554			return (error);
555
556		lck_mtx_lock(&nmp->nm_lock);
557		VFSATTR_RETURN(fsap, f_iosize, nfs_iosize);
558		VFSATTR_RETURN(fsap, f_bsize, nmp->nm_fsattr.nfsa_bsize);
559		bsize = nmp->nm_fsattr.nfsa_bsize;
560		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_TOTAL))
561			VFSATTR_RETURN(fsap, f_blocks, nmp->nm_fsattr.nfsa_space_total / bsize);
562		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_FREE))
563			VFSATTR_RETURN(fsap, f_bfree, nmp->nm_fsattr.nfsa_space_free / bsize);
564		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_AVAIL))
565			VFSATTR_RETURN(fsap, f_bavail, nmp->nm_fsattr.nfsa_space_avail / bsize);
566		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_TOTAL) &&
567		    NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_FREE))
568			VFSATTR_RETURN(fsap, f_bused,
569				(nmp->nm_fsattr.nfsa_space_total / bsize) -
570				(nmp->nm_fsattr.nfsa_space_free / bsize));
571		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_TOTAL))
572			VFSATTR_RETURN(fsap, f_files, nmp->nm_fsattr.nfsa_files_total);
573		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_FREE))
574			VFSATTR_RETURN(fsap, f_ffree, nmp->nm_fsattr.nfsa_files_free);
575		lck_mtx_unlock(&nmp->nm_lock);
576	}
577
578	if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
579		u_int32_t caps, valid;
580		nfsnode_t np = nmp->nm_dnp;
581
582		nfsm_assert(error, VFSTONFS(mp) && np, ENXIO);
583		if (error)
584			return (error);
585		lck_mtx_lock(&nmp->nm_lock);
586
587		/*
588		 * The capabilities[] array defines what this volume supports.
589		 *
590		 * The valid[] array defines which bits this code understands
591		 * the meaning of (whether the volume has that capability or not).
592		 * Any zero bits here means "I don't know what you're asking about"
593		 * and the caller cannot tell whether that capability is
594		 * present or not.
595		 */
596		caps = valid = 0;
597		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SYMLINK_SUPPORT)) {
598			valid |= VOL_CAP_FMT_SYMBOLICLINKS;
599			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_SYMLINK)
600				caps |= VOL_CAP_FMT_SYMBOLICLINKS;
601		}
602		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_LINK_SUPPORT)) {
603			valid |= VOL_CAP_FMT_HARDLINKS;
604			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_LINK)
605				caps |= VOL_CAP_FMT_HARDLINKS;
606		}
607		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE)) {
608			valid |= VOL_CAP_FMT_CASE_SENSITIVE;
609			if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE))
610				caps |= VOL_CAP_FMT_CASE_SENSITIVE;
611		}
612		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_PRESERVING)) {
613			valid |= VOL_CAP_FMT_CASE_PRESERVING;
614			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CASE_PRESERVING)
615				caps |= VOL_CAP_FMT_CASE_PRESERVING;
616		}
617		/* Note: VOL_CAP_FMT_2TB_FILESIZE is actually used to test for "large file support" */
618		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXFILESIZE)) {
619			/* Is server's max file size at least 4GB? */
620			if (nmp->nm_fsattr.nfsa_maxfilesize >= 0x100000000ULL)
621				caps |= VOL_CAP_FMT_2TB_FILESIZE;
622		} else if (nfsvers >= NFS_VER3) {
623			/*
624			 * NFSv3 and up supports 64 bits of file size.
625			 * So, we'll just assume maxfilesize >= 4GB
626			 */
627			caps |= VOL_CAP_FMT_2TB_FILESIZE;
628		}
629		if (nfsvers >= NFS_VER4) {
630			caps |= VOL_CAP_FMT_HIDDEN_FILES;
631			valid |= VOL_CAP_FMT_HIDDEN_FILES;
632			// VOL_CAP_FMT_OPENDENYMODES
633//			caps |= VOL_CAP_FMT_OPENDENYMODES;
634//			valid |= VOL_CAP_FMT_OPENDENYMODES;
635		}
636		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
637			// VOL_CAP_FMT_PERSISTENTOBJECTIDS |
638			// VOL_CAP_FMT_SYMBOLICLINKS |
639			// VOL_CAP_FMT_HARDLINKS |
640			// VOL_CAP_FMT_JOURNAL |
641			// VOL_CAP_FMT_JOURNAL_ACTIVE |
642			// VOL_CAP_FMT_NO_ROOT_TIMES |
643			// VOL_CAP_FMT_SPARSE_FILES |
644			// VOL_CAP_FMT_ZERO_RUNS |
645			// VOL_CAP_FMT_CASE_SENSITIVE |
646			// VOL_CAP_FMT_CASE_PRESERVING |
647			// VOL_CAP_FMT_FAST_STATFS |
648			// VOL_CAP_FMT_2TB_FILESIZE |
649			// VOL_CAP_FMT_OPENDENYMODES |
650			// VOL_CAP_FMT_HIDDEN_FILES |
651			caps;
652		fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] =
653			VOL_CAP_FMT_PERSISTENTOBJECTIDS |
654			// VOL_CAP_FMT_SYMBOLICLINKS |
655			// VOL_CAP_FMT_HARDLINKS |
656			// VOL_CAP_FMT_JOURNAL |
657			// VOL_CAP_FMT_JOURNAL_ACTIVE |
658			// VOL_CAP_FMT_NO_ROOT_TIMES |
659			// VOL_CAP_FMT_SPARSE_FILES |
660			// VOL_CAP_FMT_ZERO_RUNS |
661			// VOL_CAP_FMT_CASE_SENSITIVE |
662			// VOL_CAP_FMT_CASE_PRESERVING |
663			VOL_CAP_FMT_FAST_STATFS |
664			VOL_CAP_FMT_2TB_FILESIZE |
665			// VOL_CAP_FMT_OPENDENYMODES |
666			// VOL_CAP_FMT_HIDDEN_FILES |
667			valid;
668
669		/*
670		 * We don't support most of the interfaces.
671		 *
672		 * We MAY support locking, but we don't have any easy way of probing.
673		 * We can tell if there's no lockd running or if locks have been
674		 * disabled for a mount, so we can definitely answer NO in that case.
675		 * Any attempt to send a request to lockd to test for locking support
676		 * may cause the lazily-launched locking daemons to be started
677		 * unnecessarily.  So we avoid that.  However, we do record if we ever
678		 * successfully perform a lock operation on a mount point, so if it
679		 * looks like lock ops have worked, we do report that we support them.
680		 */
681		caps = valid = 0;
682		if (nfsvers >= NFS_VER4) {
683			caps = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
684			valid = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
685			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL)
686				caps |= VOL_CAP_INT_EXTENDED_SECURITY;
687			valid |= VOL_CAP_INT_EXTENDED_SECURITY;
688			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)
689				caps |= VOL_CAP_INT_EXTENDED_ATTR;
690			valid |= VOL_CAP_INT_EXTENDED_ATTR;
691#if NAMEDSTREAMS
692			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)
693				caps |= VOL_CAP_INT_NAMEDSTREAMS;
694			valid |= VOL_CAP_INT_NAMEDSTREAMS;
695#endif
696		} else if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
697			/* locks disabled on this mount, so they definitely won't work */
698			valid = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
699		} else if (nmp->nm_state & NFSSTA_LOCKSWORK) {
700			caps = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
701			valid = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
702		}
703		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] =
704			// VOL_CAP_INT_SEARCHFS |
705			// VOL_CAP_INT_ATTRLIST |
706			// VOL_CAP_INT_NFSEXPORT |
707			// VOL_CAP_INT_READDIRATTR |
708			// VOL_CAP_INT_EXCHANGEDATA |
709			// VOL_CAP_INT_COPYFILE |
710			// VOL_CAP_INT_ALLOCATE |
711			// VOL_CAP_INT_VOL_RENAME |
712			// VOL_CAP_INT_ADVLOCK |
713			// VOL_CAP_INT_FLOCK |
714			// VOL_CAP_INT_EXTENDED_SECURITY |
715			// VOL_CAP_INT_USERACCESS |
716			// VOL_CAP_INT_MANLOCK |
717			// VOL_CAP_INT_NAMEDSTREAMS |
718			// VOL_CAP_INT_EXTENDED_ATTR |
719			VOL_CAP_INT_REMOTE_EVENT |
720			caps;
721		fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
722			VOL_CAP_INT_SEARCHFS |
723			VOL_CAP_INT_ATTRLIST |
724			VOL_CAP_INT_NFSEXPORT |
725			VOL_CAP_INT_READDIRATTR |
726			VOL_CAP_INT_EXCHANGEDATA |
727			VOL_CAP_INT_COPYFILE |
728			VOL_CAP_INT_ALLOCATE |
729			VOL_CAP_INT_VOL_RENAME |
730			// VOL_CAP_INT_ADVLOCK |
731			// VOL_CAP_INT_FLOCK |
732			// VOL_CAP_INT_EXTENDED_SECURITY |
733			// VOL_CAP_INT_USERACCESS |
734			// VOL_CAP_INT_MANLOCK |
735			// VOL_CAP_INT_NAMEDSTREAMS |
736			// VOL_CAP_INT_EXTENDED_ATTR |
737			VOL_CAP_INT_REMOTE_EVENT |
738			valid;
739
740		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
741		fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0;
742
743		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
744		fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0;
745
746		VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
747		lck_mtx_unlock(&nmp->nm_lock);
748	}
749
750	if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
751		fsap->f_attributes.validattr.commonattr = 0;
752		fsap->f_attributes.validattr.volattr =
753			ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
754		fsap->f_attributes.validattr.dirattr = 0;
755		fsap->f_attributes.validattr.fileattr = 0;
756		fsap->f_attributes.validattr.forkattr = 0;
757
758		fsap->f_attributes.nativeattr.commonattr = 0;
759		fsap->f_attributes.nativeattr.volattr =
760			ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
761		fsap->f_attributes.nativeattr.dirattr = 0;
762		fsap->f_attributes.nativeattr.fileattr = 0;
763		fsap->f_attributes.nativeattr.forkattr = 0;
764
765		VFSATTR_SET_SUPPORTED(fsap, f_attributes);
766	}
767
768	return (error);
769}
770
771/*
772 * nfs version 3 fsinfo rpc call
773 */
774int
775nfs3_fsinfo(struct nfsmount *nmp, nfsnode_t np, vfs_context_t ctx)
776{
777	int error = 0, lockerror, status, nmlocked = 0;
778	u_int64_t xid;
779	uint32_t val, prefsize, maxsize;
780	struct nfsm_chain nmreq, nmrep;
781
782	nfsm_chain_null(&nmreq);
783	nfsm_chain_null(&nmrep);
784
785	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nmp->nm_vers));
786	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
787	nfsm_chain_build_done(error, &nmreq);
788	nfsmout_if(error);
789	error = nfs_request(np, NULL, &nmreq, NFSPROC_FSINFO, ctx, NULL, &nmrep, &xid, &status);
790	if ((lockerror = nfs_node_lock(np)))
791		error = lockerror;
792	nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
793	if (!lockerror)
794		nfs_node_unlock(np);
795	if (!error)
796		error = status;
797	nfsmout_if(error);
798
799	lck_mtx_lock(&nmp->nm_lock);
800	nmlocked = 1;
801
802	nfsm_chain_get_32(error, &nmrep, maxsize);
803	nfsm_chain_get_32(error, &nmrep, prefsize);
804	nfsmout_if(error);
805	nmp->nm_fsattr.nfsa_maxread = maxsize;
806	if (prefsize < nmp->nm_rsize)
807		nmp->nm_rsize = (prefsize + NFS_FABLKSIZE - 1) &
808			~(NFS_FABLKSIZE - 1);
809	if ((maxsize > 0) && (maxsize < nmp->nm_rsize)) {
810		nmp->nm_rsize = maxsize & ~(NFS_FABLKSIZE - 1);
811		if (nmp->nm_rsize == 0)
812			nmp->nm_rsize = maxsize;
813	}
814	nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED); // skip rtmult
815
816	nfsm_chain_get_32(error, &nmrep, maxsize);
817	nfsm_chain_get_32(error, &nmrep, prefsize);
818	nfsmout_if(error);
819	nmp->nm_fsattr.nfsa_maxwrite = maxsize;
820	if (prefsize < nmp->nm_wsize)
821		nmp->nm_wsize = (prefsize + NFS_FABLKSIZE - 1) &
822			~(NFS_FABLKSIZE - 1);
823	if ((maxsize > 0) && (maxsize < nmp->nm_wsize)) {
824		nmp->nm_wsize = maxsize & ~(NFS_FABLKSIZE - 1);
825		if (nmp->nm_wsize == 0)
826			nmp->nm_wsize = maxsize;
827	}
828	nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED); // skip wtmult
829
830	nfsm_chain_get_32(error, &nmrep, prefsize);
831	nfsmout_if(error);
832	if ((prefsize > 0) && (prefsize < nmp->nm_readdirsize))
833		nmp->nm_readdirsize = prefsize;
834	if ((nmp->nm_fsattr.nfsa_maxread > 0) &&
835	    (nmp->nm_fsattr.nfsa_maxread < nmp->nm_readdirsize))
836		nmp->nm_readdirsize = nmp->nm_fsattr.nfsa_maxread;
837
838	nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_maxfilesize);
839
840	nfsm_chain_adv(error, &nmrep, 2 * NFSX_UNSIGNED); // skip time_delta
841
842	/* convert FS properties to our own flags */
843	nfsm_chain_get_32(error, &nmrep, val);
844	nfsmout_if(error);
845	if (val & NFSV3FSINFO_LINK)
846		nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_LINK;
847	if (val & NFSV3FSINFO_SYMLINK)
848		nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_SYMLINK;
849	if (val & NFSV3FSINFO_HOMOGENEOUS)
850		nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_HOMOGENEOUS;
851	if (val & NFSV3FSINFO_CANSETTIME)
852		nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_SET_TIME;
853	nmp->nm_state |= NFSSTA_GOTFSINFO;
854	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXREAD);
855	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXWRITE);
856	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXFILESIZE);
857	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_LINK_SUPPORT);
858	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SYMLINK_SUPPORT);
859	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_HOMOGENEOUS);
860	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CANSETTIME);
861nfsmout:
862	if (nmlocked)
863		lck_mtx_unlock(&nmp->nm_lock);
864	nfsm_chain_cleanup(&nmreq);
865	nfsm_chain_cleanup(&nmrep);
866	return (error);
867}
868
869/*
870 * Mount a remote root fs via. nfs. This depends on the info in the
871 * nfs_diskless structure that has been filled in properly by some primary
872 * bootstrap.
873 * It goes something like this:
874 * - do enough of "ifconfig" by calling ifioctl() so that the system
875 *   can talk to the server
876 * - If nfs_diskless.mygateway is filled in, use that address as
877 *   a default gateway.
878 * - hand craft the swap nfs vnode hanging off a fake mount point
879 *	if swdevt[0].sw_dev == NODEV
880 * - build the rootfs mount point and call mountnfs() to do the rest.
881 */
882int
883nfs_mountroot(void)
884{
885	struct nfs_diskless nd;
886	mount_t mp = NULL;
887	vnode_t vp = NULL;
888	vfs_context_t ctx;
889	int error;
890#if !defined(NO_MOUNT_PRIVATE)
891	mount_t mppriv = NULL;
892	vnode_t vppriv = NULL;
893#endif /* NO_MOUNT_PRIVATE */
894	int v3, sotype;
895
896	/*
897	 * Call nfs_boot_init() to fill in the nfs_diskless struct.
898	 * Note: networking must already have been configured before
899	 * we're called.
900	 */
901	bzero((caddr_t) &nd, sizeof(nd));
902	error = nfs_boot_init(&nd);
903	if (error)
904		panic("nfs_boot_init: unable to initialize NFS root system information, "
905		      "error %d, check configuration: %s\n", error, PE_boot_args());
906
907	/*
908	 * Try NFSv3 first, then fallback to NFSv2.
909	 * Likewise, try TCP first, then fall back to UDP.
910	 */
911	v3 = 1;
912	sotype = SOCK_STREAM;
913
914tryagain:
915	error = nfs_boot_getfh(&nd, v3, sotype);
916	if (error) {
917		if (error == EHOSTDOWN || error == EHOSTUNREACH) {
918			if (nd.nd_root.ndm_mntfrom)
919				FREE_ZONE(nd.nd_root.ndm_mntfrom,
920					  MAXPATHLEN, M_NAMEI);
921			if (nd.nd_root.ndm_path)
922				FREE_ZONE(nd.nd_root.ndm_path,
923					  MAXPATHLEN, M_NAMEI);
924			if (nd.nd_private.ndm_mntfrom)
925				FREE_ZONE(nd.nd_private.ndm_mntfrom,
926					  MAXPATHLEN, M_NAMEI);
927			if (nd.nd_private.ndm_path)
928				FREE_ZONE(nd.nd_private.ndm_path,
929					  MAXPATHLEN, M_NAMEI);
930			return (error);
931		}
932		if (v3) {
933			if (sotype == SOCK_STREAM) {
934				printf("NFS mount (v3,TCP) failed with error %d, trying UDP...\n", error);
935				sotype = SOCK_DGRAM;
936				goto tryagain;
937			}
938			printf("NFS mount (v3,UDP) failed with error %d, trying v2...\n", error);
939			v3 = 0;
940			sotype = SOCK_STREAM;
941			goto tryagain;
942		} else if (sotype == SOCK_STREAM) {
943			printf("NFS mount (v2,TCP) failed with error %d, trying UDP...\n", error);
944			sotype = SOCK_DGRAM;
945			goto tryagain;
946		} else {
947			printf("NFS mount (v2,UDP) failed with error %d, giving up...\n", error);
948		}
949		switch(error) {
950		case EPROGUNAVAIL:
951			panic("NFS mount failed: NFS server mountd not responding, check server configuration: %s", PE_boot_args());
952		case EACCES:
953		case EPERM:
954			panic("NFS mount failed: NFS server refused mount, check server configuration: %s", PE_boot_args());
955		default:
956			panic("NFS mount failed with error %d, check configuration: %s", error, PE_boot_args());
957		}
958	}
959
960	ctx = vfs_context_kernel();
961
962	/*
963	 * Create the root mount point.
964	 */
965#if !defined(NO_MOUNT_PRIVATE)
966	{
967		//PWC hack until we have a real "mount" tool to remount root rw
968		int rw_root=0;
969		int flags = MNT_ROOTFS|MNT_RDONLY;
970		PE_parse_boot_argn("-rwroot_hack", &rw_root, sizeof (rw_root));
971		if(rw_root)
972		{
973			flags = MNT_ROOTFS;
974			kprintf("-rwroot_hack in effect: mounting root fs read/write\n");
975		}
976
977	if ((error = nfs_mount_diskless(&nd.nd_root, "/", flags, &vp, &mp, ctx)))
978#else
979	if ((error = nfs_mount_diskless(&nd.nd_root, "/", MNT_ROOTFS, &vp, &mp, ctx)))
980#endif /* NO_MOUNT_PRIVATE */
981	{
982		if (v3) {
983			if (sotype == SOCK_STREAM) {
984				printf("NFS root mount (v3,TCP) failed with %d, trying UDP...\n", error);
985				sotype = SOCK_DGRAM;
986				goto tryagain;
987			}
988			printf("NFS root mount (v3,UDP) failed with %d, trying v2...\n", error);
989			v3 = 0;
990			sotype = SOCK_STREAM;
991			goto tryagain;
992		} else if (sotype == SOCK_STREAM) {
993			printf("NFS root mount (v2,TCP) failed with %d, trying UDP...\n", error);
994			sotype = SOCK_DGRAM;
995			goto tryagain;
996		} else {
997			printf("NFS root mount (v2,UDP) failed with error %d, giving up...\n", error);
998		}
999		panic("NFS root mount failed with error %d, check configuration: %s\n", error, PE_boot_args());
1000	}
1001	}
1002	printf("root on %s\n", nd.nd_root.ndm_mntfrom);
1003
1004	vfs_unbusy(mp);
1005	mount_list_add(mp);
1006	rootvp = vp;
1007
1008#if !defined(NO_MOUNT_PRIVATE)
1009	if (nd.nd_private.ndm_saddr.sin_addr.s_addr) {
1010	    error = nfs_mount_diskless_private(&nd.nd_private, "/private",
1011					       0, &vppriv, &mppriv, ctx);
1012	    if (error)
1013		panic("NFS /private mount failed with error %d, check configuration: %s\n", error, PE_boot_args());
1014	    printf("private on %s\n", nd.nd_private.ndm_mntfrom);
1015
1016	    vfs_unbusy(mppriv);
1017	    mount_list_add(mppriv);
1018	}
1019
1020#endif /* NO_MOUNT_PRIVATE */
1021
1022	if (nd.nd_root.ndm_mntfrom)
1023		FREE_ZONE(nd.nd_root.ndm_mntfrom, MAXPATHLEN, M_NAMEI);
1024	if (nd.nd_root.ndm_path)
1025		FREE_ZONE(nd.nd_root.ndm_path, MAXPATHLEN, M_NAMEI);
1026	if (nd.nd_private.ndm_mntfrom)
1027		FREE_ZONE(nd.nd_private.ndm_mntfrom, MAXPATHLEN, M_NAMEI);
1028	if (nd.nd_private.ndm_path)
1029		FREE_ZONE(nd.nd_private.ndm_path, MAXPATHLEN, M_NAMEI);
1030
1031	/* Get root attributes (for the time). */
1032	error = nfs_getattr(VTONFS(vp), NULL, ctx, NGA_UNCACHED);
1033	if (error)
1034		panic("NFS mount: failed to get attributes for root directory, error %d, check server", error);
1035	return (0);
1036}
1037
1038/*
1039 * Internal version of mount system call for diskless setup.
1040 */
1041static int
1042nfs_mount_diskless(
1043	struct nfs_dlmount *ndmntp,
1044	const char *mntname,
1045	int mntflag,
1046	vnode_t *vpp,
1047	mount_t *mpp,
1048	vfs_context_t ctx)
1049{
1050	mount_t mp;
1051	int error, numcomps;
1052	char *xdrbuf, *p, *cp, *frompath, *endserverp;
1053	char uaddr[MAX_IPv4_STR_LEN];
1054	struct xdrbuf xb;
1055	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
1056	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN];
1057	uint32_t mflags[NFS_MFLAG_BITMAP_LEN];
1058	uint32_t argslength_offset, attrslength_offset, end_offset;
1059
1060	if ((error = vfs_rootmountalloc("nfs", ndmntp->ndm_mntfrom, &mp))) {
1061		printf("nfs_mount_diskless: NFS not configured\n");
1062		return (error);
1063	}
1064
1065	mp->mnt_flag |= mntflag;
1066	if (!(mntflag & MNT_RDONLY))
1067		mp->mnt_flag &= ~MNT_RDONLY;
1068
1069	/* find the server-side path being mounted */
1070	frompath = ndmntp->ndm_mntfrom;
1071	if (*frompath == '[') {  /* skip IPv6 literal address */
1072		while (*frompath && (*frompath != ']'))
1073			frompath++;
1074		if (*frompath == ']')
1075			frompath++;
1076	}
1077	while (*frompath && (*frompath != ':'))
1078		frompath++;
1079	endserverp = frompath;
1080	while (*frompath && (*frompath == ':'))
1081		frompath++;
1082	/* count fs location path components */
1083	p = frompath;
1084	while (*p && (*p == '/'))
1085		p++;
1086	numcomps = 0;
1087	while (*p) {
1088		numcomps++;
1089		while (*p && (*p != '/'))
1090			p++;
1091		while (*p && (*p == '/'))
1092			p++;
1093	}
1094
1095	/* convert address to universal address string */
1096	if (inet_ntop(AF_INET, &ndmntp->ndm_saddr.sin_addr, uaddr, sizeof(uaddr)) != uaddr) {
1097		printf("nfs_mount_diskless: bad address\n");
1098		return (EINVAL);
1099	}
1100
1101	/* prepare mount attributes */
1102	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
1103	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
1104	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
1105	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
1106	NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
1107	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
1108	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
1109
1110	/* prepare mount flags */
1111	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
1112	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
1113	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
1114	NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
1115
1116	/* build xdr buffer */
1117	xb_init_buffer(&xb, NULL, 0);
1118	xb_add_32(error, &xb, NFS_ARGSVERSION_XDR);
1119	argslength_offset = xb_offset(&xb);
1120	xb_add_32(error, &xb, 0); // args length
1121	xb_add_32(error, &xb, NFS_XDRARGS_VERSION_0);
1122	xb_add_bitmap(error, &xb, mattrs, NFS_MATTR_BITMAP_LEN);
1123	attrslength_offset = xb_offset(&xb);
1124	xb_add_32(error, &xb, 0); // attrs length
1125	xb_add_32(error, &xb, ndmntp->ndm_nfsv3 ? 3 : 2); // NFS version
1126	xb_add_string(error, &xb, ((ndmntp->ndm_sotype == SOCK_DGRAM) ? "udp" : "tcp"), 3);
1127	xb_add_32(error, &xb, ntohs(ndmntp->ndm_saddr.sin_port)); // NFS port
1128	xb_add_fh(error, &xb, &ndmntp->ndm_fh[0], ndmntp->ndm_fhlen);
1129	/* fs location */
1130	xb_add_32(error, &xb, 1); /* fs location count */
1131	xb_add_32(error, &xb, 1); /* server count */
1132	xb_add_string(error, &xb, ndmntp->ndm_mntfrom, (endserverp - ndmntp->ndm_mntfrom)); /* server name */
1133	xb_add_32(error, &xb, 1); /* address count */
1134	xb_add_string(error, &xb, uaddr, strlen(uaddr)); /* address */
1135	xb_add_32(error, &xb, 0); /* empty server info */
1136	xb_add_32(error, &xb, numcomps); /* pathname component count */
1137	p = frompath;
1138	while (*p && (*p == '/'))
1139		p++;
1140	while (*p) {
1141		cp = p;
1142		while (*p && (*p != '/'))
1143			p++;
1144		xb_add_string(error, &xb, cp, (p - cp)); /* component */
1145		if (error)
1146			break;
1147		while (*p && (*p == '/'))
1148			p++;
1149	}
1150	xb_add_32(error, &xb, 0); /* empty fsl info */
1151	xb_add_32(error, &xb, mntflag); /* MNT flags */
1152	xb_build_done(error, &xb);
1153
1154	/* update opaque counts */
1155	end_offset = xb_offset(&xb);
1156	if (!error) {
1157		error = xb_seek(&xb, argslength_offset);
1158		xb_add_32(error, &xb, end_offset - argslength_offset + XDRWORD/*version*/);
1159	}
1160	if (!error) {
1161		error = xb_seek(&xb, attrslength_offset);
1162		xb_add_32(error, &xb, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
1163	}
1164	if (error) {
1165		printf("nfs_mount_diskless: error %d assembling mount args\n", error);
1166		xb_cleanup(&xb);
1167		return (error);
1168	}
1169	/* grab the assembled buffer */
1170	xdrbuf = xb_buffer_base(&xb);
1171	xb.xb_flags &= ~XB_CLEANUP;
1172
1173	/* do the mount */
1174	if ((error = mountnfs(xdrbuf, mp, ctx, vpp))) {
1175		printf("nfs_mountroot: mount %s failed: %d\n", mntname, error);
1176		// XXX vfs_rootmountfailed(mp);
1177		mount_list_lock();
1178		mp->mnt_vtable->vfc_refcount--;
1179		mount_list_unlock();
1180		vfs_unbusy(mp);
1181		mount_lock_destroy(mp);
1182#if CONFIG_MACF
1183		mac_mount_label_destroy(mp);
1184#endif
1185		FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
1186	} else {
1187		*mpp = mp;
1188	}
1189	xb_cleanup(&xb);
1190	return (error);
1191}
1192
1193#if !defined(NO_MOUNT_PRIVATE)
1194/*
1195 * Internal version of mount system call to mount "/private"
1196 * separately in diskless setup
1197 */
1198static int
1199nfs_mount_diskless_private(
1200	struct nfs_dlmount *ndmntp,
1201	const char *mntname,
1202	int mntflag,
1203	vnode_t *vpp,
1204	mount_t *mpp,
1205	vfs_context_t ctx)
1206{
1207	mount_t mp;
1208	int error, numcomps;
1209	proc_t procp;
1210	struct vfstable *vfsp;
1211	struct nameidata nd;
1212	vnode_t vp;
1213	char *xdrbuf = NULL, *p, *cp, *frompath, *endserverp;
1214	char uaddr[MAX_IPv4_STR_LEN];
1215	struct xdrbuf xb;
1216	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
1217	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN], mflags[NFS_MFLAG_BITMAP_LEN];
1218	uint32_t argslength_offset, attrslength_offset, end_offset;
1219
1220	procp = current_proc(); /* XXX */
1221	xb_init(&xb, 0);
1222
1223	{
1224	/*
1225	 * mimic main()!. Temporarily set up rootvnode and other stuff so
1226	 * that namei works. Need to undo this because main() does it, too
1227	 */
1228		struct filedesc *fdp;	/* pointer to file descriptor state */
1229		fdp = procp->p_fd;
1230		mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
1231
1232		/* Get the vnode for '/'. Set fdp->fd_cdir to reference it. */
1233		if (VFS_ROOT(mountlist.tqh_first, &rootvnode, NULL))
1234			panic("cannot find root vnode");
1235		error = vnode_ref(rootvnode);
1236		if (error) {
1237			printf("nfs_mountroot: vnode_ref() failed on root vnode!\n");
1238			goto out;
1239		}
1240		fdp->fd_cdir = rootvnode;
1241		fdp->fd_rdir = NULL;
1242	}
1243
1244	/*
1245	 * Get vnode to be covered
1246	 */
1247	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1248	    CAST_USER_ADDR_T(mntname), ctx);
1249	if ((error = namei(&nd))) {
1250		printf("nfs_mountroot: private namei failed!\n");
1251		goto out;
1252	}
1253	{
1254		/* undo vnode_ref() in mimic main()! */
1255		vnode_rele(rootvnode);
1256	}
1257	nameidone(&nd);
1258	vp = nd.ni_vp;
1259
1260	if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) ||
1261	    (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) {
1262		vnode_put(vp);
1263		goto out;
1264	}
1265	if (vnode_vtype(vp) != VDIR) {
1266		vnode_put(vp);
1267		error = ENOTDIR;
1268		goto out;
1269	}
1270	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1271		if (!strncmp(vfsp->vfc_name, "nfs", sizeof(vfsp->vfc_name)))
1272			break;
1273	if (vfsp == NULL) {
1274		printf("nfs_mountroot: private NFS not configured\n");
1275		vnode_put(vp);
1276		error = ENODEV;
1277		goto out;
1278	}
1279	if (vnode_mountedhere(vp) != NULL) {
1280		vnode_put(vp);
1281		error = EBUSY;
1282		goto out;
1283	}
1284
1285	/*
1286	 * Allocate and initialize the filesystem.
1287	 */
1288	mp = _MALLOC_ZONE((u_int32_t)sizeof(struct mount), M_MOUNT, M_WAITOK);
1289	if (!mp) {
1290		printf("nfs_mountroot: unable to allocate mount structure\n");
1291		vnode_put(vp);
1292		error = ENOMEM;
1293		goto out;
1294	}
1295	bzero((char *)mp, sizeof(struct mount));
1296
1297	/* Initialize the default IO constraints */
1298	mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
1299	mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
1300	mp->mnt_ioflags = 0;
1301	mp->mnt_realrootvp = NULLVP;
1302	mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
1303
1304	mount_lock_init(mp);
1305	TAILQ_INIT(&mp->mnt_vnodelist);
1306	TAILQ_INIT(&mp->mnt_workerqueue);
1307	TAILQ_INIT(&mp->mnt_newvnodes);
1308	(void)vfs_busy(mp, LK_NOWAIT);
1309	TAILQ_INIT(&mp->mnt_vnodelist);
1310	mount_list_lock();
1311	vfsp->vfc_refcount++;
1312	mount_list_unlock();
1313	mp->mnt_vtable = vfsp;
1314	mp->mnt_op = vfsp->vfc_vfsops;
1315	// mp->mnt_stat.f_type = vfsp->vfc_typenum;
1316	mp->mnt_flag = mntflag;
1317	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
1318	strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSNAMELEN-1);
1319	vp->v_mountedhere = mp;
1320	mp->mnt_vnodecovered = vp;
1321	vp = NULLVP;
1322	mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get());
1323	(void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN - 1, 0);
1324	(void) copystr(ndmntp->ndm_mntfrom, mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN - 1, 0);
1325#if CONFIG_MACF
1326	mac_mount_label_init(mp);
1327	mac_mount_label_associate(ctx, mp);
1328#endif
1329
1330	/* find the server-side path being mounted */
1331	frompath = ndmntp->ndm_mntfrom;
1332	if (*frompath == '[') {  /* skip IPv6 literal address */
1333		while (*frompath && (*frompath != ']'))
1334			frompath++;
1335		if (*frompath == ']')
1336			frompath++;
1337	}
1338	while (*frompath && (*frompath != ':'))
1339		frompath++;
1340	endserverp = frompath;
1341	while (*frompath && (*frompath == ':'))
1342		frompath++;
1343	/* count fs location path components */
1344	p = frompath;
1345	while (*p && (*p == '/'))
1346		p++;
1347	numcomps = 0;
1348	while (*p) {
1349		numcomps++;
1350		while (*p && (*p != '/'))
1351			p++;
1352		while (*p && (*p == '/'))
1353			p++;
1354	}
1355
1356	/* convert address to universal address string */
1357	if (inet_ntop(AF_INET, &ndmntp->ndm_saddr.sin_addr, uaddr, sizeof(uaddr)) != uaddr) {
1358		printf("nfs_mountroot: bad address\n");
1359		error = EINVAL;
1360		goto out;
1361	}
1362
1363	/* prepare mount attributes */
1364	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
1365	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
1366	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
1367	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
1368	NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
1369	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
1370	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
1371
1372	/* prepare mount flags */
1373	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
1374	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
1375	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
1376	NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
1377
1378	/* build xdr buffer */
1379	xb_init_buffer(&xb, NULL, 0);
1380	xb_add_32(error, &xb, NFS_ARGSVERSION_XDR);
1381	argslength_offset = xb_offset(&xb);
1382	xb_add_32(error, &xb, 0); // args length
1383	xb_add_32(error, &xb, NFS_XDRARGS_VERSION_0);
1384	xb_add_bitmap(error, &xb, mattrs, NFS_MATTR_BITMAP_LEN);
1385	attrslength_offset = xb_offset(&xb);
1386	xb_add_32(error, &xb, 0); // attrs length
1387	xb_add_32(error, &xb, ndmntp->ndm_nfsv3 ? 3 : 2); // NFS version
1388	xb_add_string(error, &xb, ((ndmntp->ndm_sotype == SOCK_DGRAM) ? "udp" : "tcp"), 3);
1389	xb_add_32(error, &xb, ntohs(ndmntp->ndm_saddr.sin_port)); // NFS port
1390	xb_add_fh(error, &xb, &ndmntp->ndm_fh[0], ndmntp->ndm_fhlen);
1391	/* fs location */
1392	xb_add_32(error, &xb, 1); /* fs location count */
1393	xb_add_32(error, &xb, 1); /* server count */
1394	xb_add_string(error, &xb, ndmntp->ndm_mntfrom, (endserverp - ndmntp->ndm_mntfrom)); /* server name */
1395	xb_add_32(error, &xb, 1); /* address count */
1396	xb_add_string(error, &xb, uaddr, strlen(uaddr)); /* address */
1397	xb_add_32(error, &xb, 0); /* empty server info */
1398	xb_add_32(error, &xb, numcomps); /* pathname component count */
1399	p = frompath;
1400	while (*p && (*p == '/'))
1401		p++;
1402	while (*p) {
1403		cp = p;
1404		while (*p && (*p != '/'))
1405			p++;
1406		xb_add_string(error, &xb, cp, (p - cp)); /* component */
1407		if (error)
1408			break;
1409		while (*p && (*p == '/'))
1410			p++;
1411	}
1412	xb_add_32(error, &xb, 0); /* empty fsl info */
1413	xb_add_32(error, &xb, mntflag); /* MNT flags */
1414	xb_build_done(error, &xb);
1415
1416	/* update opaque counts */
1417	end_offset = xb_offset(&xb);
1418	if (!error) {
1419		error = xb_seek(&xb, argslength_offset);
1420		xb_add_32(error, &xb, end_offset - argslength_offset + XDRWORD/*version*/);
1421	}
1422	if (!error) {
1423		error = xb_seek(&xb, attrslength_offset);
1424		xb_add_32(error, &xb, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
1425	}
1426	if (error) {
1427		printf("nfs_mountroot: error %d assembling mount args\n", error);
1428		goto out;
1429	}
1430	/* grab the assembled buffer */
1431	xdrbuf = xb_buffer_base(&xb);
1432	xb.xb_flags &= ~XB_CLEANUP;
1433
1434	/* do the mount */
1435	if ((error = mountnfs(xdrbuf, mp, ctx, &vp))) {
1436		printf("nfs_mountroot: mount %s failed: %d\n", mntname, error);
1437		vnode_put(mp->mnt_vnodecovered);
1438		mount_list_lock();
1439		vfsp->vfc_refcount--;
1440		mount_list_unlock();
1441		vfs_unbusy(mp);
1442		mount_lock_destroy(mp);
1443#if CONFIG_MACF
1444		mac_mount_label_destroy(mp);
1445#endif
1446		FREE_ZONE(mp, sizeof (struct mount), M_MOUNT);
1447		goto out;
1448	}
1449
1450	*mpp = mp;
1451	*vpp = vp;
1452out:
1453	xb_cleanup(&xb);
1454	return (error);
1455}
1456#endif /* NO_MOUNT_PRIVATE */
1457
1458/*
1459 * Convert old style NFS mount args to XDR.
1460 */
1461static int
1462nfs_convert_old_nfs_args(mount_t mp, user_addr_t data, vfs_context_t ctx, int argsversion, int inkernel, char **xdrbufp)
1463{
1464	int error = 0, args64bit, argsize, numcomps;
1465	struct user_nfs_args args;
1466	struct nfs_args tempargs;
1467	caddr_t argsp;
1468	size_t len;
1469	u_char nfh[NFS4_FHSIZE];
1470	char *mntfrom, *endserverp, *frompath, *p, *cp;
1471	struct sockaddr_storage ss;
1472	void *sinaddr;
1473	char uaddr[MAX_IPv6_STR_LEN];
1474	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
1475	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN], mflags[NFS_MFLAG_BITMAP_LEN];
1476	uint32_t nfsvers, nfslockmode = 0, argslength_offset, attrslength_offset, end_offset;
1477	struct xdrbuf xb;
1478
1479	*xdrbufp = NULL;
1480
1481	/* allocate a temporary buffer for mntfrom */
1482	MALLOC_ZONE(mntfrom, char*, MAXPATHLEN, M_NAMEI, M_WAITOK);
1483	if (!mntfrom)
1484		return (ENOMEM);
1485
1486	args64bit = (inkernel || vfs_context_is64bit(ctx));
1487	argsp = args64bit ? (void*)&args : (void*)&tempargs;
1488
1489	argsize = args64bit ? sizeof(args) : sizeof(tempargs);
1490	switch (argsversion) {
1491	case 3:
1492		argsize -= NFS_ARGSVERSION4_INCSIZE;
1493	case 4:
1494		argsize -= NFS_ARGSVERSION5_INCSIZE;
1495	case 5:
1496		argsize -= NFS_ARGSVERSION6_INCSIZE;
1497	case 6:
1498		break;
1499	default:
1500		error = EPROGMISMATCH;
1501		goto nfsmout;
1502	}
1503
1504	/* read in the structure */
1505	if (inkernel)
1506		bcopy(CAST_DOWN(void *, data), argsp, argsize);
1507	else
1508		error = copyin(data, argsp, argsize);
1509	nfsmout_if(error);
1510
1511	if (!args64bit) {
1512		args.addrlen = tempargs.addrlen;
1513		args.sotype = tempargs.sotype;
1514		args.proto = tempargs.proto;
1515		args.fhsize = tempargs.fhsize;
1516		args.flags = tempargs.flags;
1517		args.wsize = tempargs.wsize;
1518		args.rsize = tempargs.rsize;
1519		args.readdirsize = tempargs.readdirsize;
1520		args.timeo = tempargs.timeo;
1521		args.retrans = tempargs.retrans;
1522		args.maxgrouplist = tempargs.maxgrouplist;
1523		args.readahead = tempargs.readahead;
1524		args.leaseterm = tempargs.leaseterm;
1525		args.deadthresh = tempargs.deadthresh;
1526		args.addr = CAST_USER_ADDR_T(tempargs.addr);
1527		args.fh = CAST_USER_ADDR_T(tempargs.fh);
1528		args.hostname = CAST_USER_ADDR_T(tempargs.hostname);
1529		if (args.version >= 4) {
1530			args.acregmin = tempargs.acregmin;
1531			args.acregmax = tempargs.acregmax;
1532			args.acdirmin = tempargs.acdirmin;
1533			args.acdirmax = tempargs.acdirmax;
1534		}
1535		if (args.version >= 5)
1536			args.auth = tempargs.auth;
1537		if (args.version >= 6)
1538			args.deadtimeout = tempargs.deadtimeout;
1539	}
1540
1541	if ((args.fhsize < 0) || (args.fhsize > NFS4_FHSIZE)) {
1542		error = EINVAL;
1543		goto nfsmout;
1544	}
1545	if (args.fhsize > 0) {
1546		if (inkernel)
1547			bcopy(CAST_DOWN(void *, args.fh), (caddr_t)nfh, args.fhsize);
1548		else
1549			error = copyin(args.fh, (caddr_t)nfh, args.fhsize);
1550		nfsmout_if(error);
1551	}
1552
1553	if (inkernel)
1554		error = copystr(CAST_DOWN(void *, args.hostname), mntfrom, MAXPATHLEN-1, &len);
1555	else
1556		error = copyinstr(args.hostname, mntfrom, MAXPATHLEN-1, &len);
1557	nfsmout_if(error);
1558	bzero(&mntfrom[len], MAXPATHLEN - len);
1559
1560	/* find the server-side path being mounted */
1561	frompath = mntfrom;
1562	if (*frompath == '[') {  /* skip IPv6 literal address */
1563		while (*frompath && (*frompath != ']'))
1564			frompath++;
1565		if (*frompath == ']')
1566			frompath++;
1567	}
1568	while (*frompath && (*frompath != ':'))
1569		frompath++;
1570	endserverp = frompath;
1571	while (*frompath && (*frompath == ':'))
1572		frompath++;
1573	/* count fs location path components */
1574	p = frompath;
1575	while (*p && (*p == '/'))
1576		p++;
1577	numcomps = 0;
1578	while (*p) {
1579		numcomps++;
1580		while (*p && (*p != '/'))
1581			p++;
1582		while (*p && (*p == '/'))
1583			p++;
1584	}
1585
1586	/* copy socket address */
1587	if (inkernel)
1588		bcopy(CAST_DOWN(void *, args.addr), &ss, args.addrlen);
1589	else {
1590		if ((size_t)args.addrlen > sizeof (struct sockaddr_storage))
1591			error = EINVAL;
1592		else
1593			error = copyin(args.addr, &ss, args.addrlen);
1594	}
1595	nfsmout_if(error);
1596	ss.ss_len = args.addrlen;
1597
1598	/* convert address to universal address string */
1599	if (ss.ss_family == AF_INET)
1600		sinaddr = &((struct sockaddr_in*)&ss)->sin_addr;
1601	else if (ss.ss_family == AF_INET6)
1602		sinaddr = &((struct sockaddr_in6*)&ss)->sin6_addr;
1603	else
1604		sinaddr = NULL;
1605	if (!sinaddr || (inet_ntop(ss.ss_family, sinaddr, uaddr, sizeof(uaddr)) != uaddr)) {
1606		error = EINVAL;
1607		goto nfsmout;
1608	}
1609
1610	/* prepare mount flags */
1611	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
1612	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
1613	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_SOFT);
1614	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_INTR);
1615	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
1616	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOCONNECT);
1617	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_DUMBTIMER);
1618	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_CALLUMNT);
1619	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RDIRPLUS);
1620	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NONEGNAMECACHE);
1621	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MUTEJUKEBOX);
1622	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOQUOTA);
1623	if (args.flags & NFSMNT_SOFT)
1624		NFS_BITMAP_SET(mflags, NFS_MFLAG_SOFT);
1625	if (args.flags & NFSMNT_INT)
1626		NFS_BITMAP_SET(mflags, NFS_MFLAG_INTR);
1627	if (args.flags & NFSMNT_RESVPORT)
1628		NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
1629	if (args.flags & NFSMNT_NOCONN)
1630		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOCONNECT);
1631	if (args.flags & NFSMNT_DUMBTIMR)
1632		NFS_BITMAP_SET(mflags, NFS_MFLAG_DUMBTIMER);
1633	if (args.flags & NFSMNT_CALLUMNT)
1634		NFS_BITMAP_SET(mflags, NFS_MFLAG_CALLUMNT);
1635	if (args.flags & NFSMNT_RDIRPLUS)
1636		NFS_BITMAP_SET(mflags, NFS_MFLAG_RDIRPLUS);
1637	if (args.flags & NFSMNT_NONEGNAMECACHE)
1638		NFS_BITMAP_SET(mflags, NFS_MFLAG_NONEGNAMECACHE);
1639	if (args.flags & NFSMNT_MUTEJUKEBOX)
1640		NFS_BITMAP_SET(mflags, NFS_MFLAG_MUTEJUKEBOX);
1641	if (args.flags & NFSMNT_NOQUOTA)
1642		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOQUOTA);
1643
1644	/* prepare mount attributes */
1645	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
1646	NFS_BITMAP_SET(mattrs, NFS_MATTR_FLAGS);
1647	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
1648	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
1649	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
1650	NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
1651	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
1652	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
1653	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFROM);
1654	if (args.flags & NFSMNT_NFSV4)
1655		nfsvers = 4;
1656	else if (args.flags & NFSMNT_NFSV3)
1657		nfsvers = 3;
1658	else
1659		nfsvers = 2;
1660	if ((args.flags & NFSMNT_RSIZE) && (args.rsize > 0))
1661		NFS_BITMAP_SET(mattrs, NFS_MATTR_READ_SIZE);
1662	if ((args.flags & NFSMNT_WSIZE) && (args.wsize > 0))
1663		NFS_BITMAP_SET(mattrs, NFS_MATTR_WRITE_SIZE);
1664	if ((args.flags & NFSMNT_TIMEO) && (args.timeo > 0))
1665		NFS_BITMAP_SET(mattrs, NFS_MATTR_REQUEST_TIMEOUT);
1666	if ((args.flags & NFSMNT_RETRANS) && (args.retrans > 0))
1667		NFS_BITMAP_SET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT);
1668	if ((args.flags & NFSMNT_MAXGRPS) && (args.maxgrouplist > 0))
1669		NFS_BITMAP_SET(mattrs, NFS_MATTR_MAX_GROUP_LIST);
1670	if ((args.flags & NFSMNT_READAHEAD) && (args.readahead > 0))
1671		NFS_BITMAP_SET(mattrs, NFS_MATTR_READAHEAD);
1672	if ((args.flags & NFSMNT_READDIRSIZE) && (args.readdirsize > 0))
1673		NFS_BITMAP_SET(mattrs, NFS_MATTR_READDIR_SIZE);
1674	if ((args.flags & NFSMNT_NOLOCKS) ||
1675	    (args.flags & NFSMNT_LOCALLOCKS)) {
1676		NFS_BITMAP_SET(mattrs, NFS_MATTR_LOCK_MODE);
1677		if (args.flags & NFSMNT_NOLOCKS)
1678			nfslockmode = NFS_LOCK_MODE_DISABLED;
1679		else if (args.flags & NFSMNT_LOCALLOCKS)
1680			nfslockmode = NFS_LOCK_MODE_LOCAL;
1681		else
1682			nfslockmode = NFS_LOCK_MODE_ENABLED;
1683	}
1684	if (args.version >= 4) {
1685		if ((args.flags & NFSMNT_ACREGMIN) && (args.acregmin > 0))
1686			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN);
1687		if ((args.flags & NFSMNT_ACREGMAX) && (args.acregmax > 0))
1688			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX);
1689		if ((args.flags & NFSMNT_ACDIRMIN) && (args.acdirmin > 0))
1690			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN);
1691		if ((args.flags & NFSMNT_ACDIRMAX) && (args.acdirmax > 0))
1692			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX);
1693	}
1694	if (args.version >= 5) {
1695		if ((args.flags & NFSMNT_SECFLAVOR) || (args.flags & NFSMNT_SECSYSOK))
1696			NFS_BITMAP_SET(mattrs, NFS_MATTR_SECURITY);
1697	}
1698	if (args.version >= 6) {
1699		if ((args.flags & NFSMNT_DEADTIMEOUT) && (args.deadtimeout > 0))
1700			NFS_BITMAP_SET(mattrs, NFS_MATTR_DEAD_TIMEOUT);
1701	}
1702
1703	/* build xdr buffer */
1704	xb_init_buffer(&xb, NULL, 0);
1705	xb_add_32(error, &xb, args.version);
1706	argslength_offset = xb_offset(&xb);
1707	xb_add_32(error, &xb, 0); // args length
1708	xb_add_32(error, &xb, NFS_XDRARGS_VERSION_0);
1709	xb_add_bitmap(error, &xb, mattrs, NFS_MATTR_BITMAP_LEN);
1710	attrslength_offset = xb_offset(&xb);
1711	xb_add_32(error, &xb, 0); // attrs length
1712	xb_add_bitmap(error, &xb, mflags_mask, NFS_MFLAG_BITMAP_LEN); /* mask */
1713	xb_add_bitmap(error, &xb, mflags, NFS_MFLAG_BITMAP_LEN); /* value */
1714	xb_add_32(error, &xb, nfsvers);
1715	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
1716		xb_add_32(error, &xb, args.rsize);
1717	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
1718		xb_add_32(error, &xb, args.wsize);
1719	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READDIR_SIZE))
1720		xb_add_32(error, &xb, args.readdirsize);
1721	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READAHEAD))
1722		xb_add_32(error, &xb, args.readahead);
1723	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN)) {
1724		xb_add_32(error, &xb, args.acregmin);
1725		xb_add_32(error, &xb, 0);
1726	}
1727	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX)) {
1728		xb_add_32(error, &xb, args.acregmax);
1729		xb_add_32(error, &xb, 0);
1730	}
1731	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN)) {
1732		xb_add_32(error, &xb, args.acdirmin);
1733		xb_add_32(error, &xb, 0);
1734	}
1735	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX)) {
1736		xb_add_32(error, &xb, args.acdirmax);
1737		xb_add_32(error, &xb, 0);
1738	}
1739	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_LOCK_MODE))
1740		xb_add_32(error, &xb, nfslockmode);
1741	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SECURITY)) {
1742		uint32_t flavors[2], i=0;
1743		if (args.flags & NFSMNT_SECFLAVOR)
1744			flavors[i++] = args.auth;
1745		if ((args.flags & NFSMNT_SECSYSOK) && ((i == 0) || (flavors[0] != RPCAUTH_SYS)))
1746			flavors[i++] = RPCAUTH_SYS;
1747		xb_add_word_array(error, &xb, flavors, i);
1748	}
1749	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MAX_GROUP_LIST))
1750		xb_add_32(error, &xb, args.maxgrouplist);
1751	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOCKET_TYPE))
1752		xb_add_string(error, &xb, ((args.sotype == SOCK_DGRAM) ? "udp" : "tcp"), 3);
1753	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_PORT))
1754		xb_add_32(error, &xb, ((ss.ss_family == AF_INET) ?
1755			ntohs(((struct sockaddr_in*)&ss)->sin_port) :
1756			ntohs(((struct sockaddr_in6*)&ss)->sin6_port)));
1757	/* NFS_MATTR_MOUNT_PORT (not available in old args) */
1758	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REQUEST_TIMEOUT)) {
1759		/* convert from .1s increments to time */
1760		xb_add_32(error, &xb, args.timeo/10);
1761		xb_add_32(error, &xb, (args.timeo%10)*100000000);
1762	}
1763	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT))
1764		xb_add_32(error, &xb, args.retrans);
1765	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_DEAD_TIMEOUT)) {
1766		xb_add_32(error, &xb, args.deadtimeout);
1767		xb_add_32(error, &xb, 0);
1768	}
1769	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FH))
1770		xb_add_fh(error, &xb, &nfh[0], args.fhsize);
1771	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FS_LOCATIONS)) {
1772		xb_add_32(error, &xb, 1); /* fs location count */
1773		xb_add_32(error, &xb, 1); /* server count */
1774		xb_add_string(error, &xb, mntfrom, (endserverp - mntfrom)); /* server name */
1775		xb_add_32(error, &xb, 1); /* address count */
1776		xb_add_string(error, &xb, uaddr, strlen(uaddr)); /* address */
1777		xb_add_32(error, &xb, 0); /* empty server info */
1778		xb_add_32(error, &xb, numcomps); /* pathname component count */
1779		nfsmout_if(error);
1780		p = frompath;
1781		while (*p && (*p == '/'))
1782			p++;
1783		while (*p) {
1784			cp = p;
1785			while (*p && (*p != '/'))
1786				p++;
1787			xb_add_string(error, &xb, cp, (p - cp)); /* component */
1788			nfsmout_if(error);
1789			while (*p && (*p == '/'))
1790				p++;
1791		}
1792		xb_add_32(error, &xb, 0); /* empty fsl info */
1793	}
1794	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFLAGS))
1795		xb_add_32(error, &xb, (vfs_flags(mp) & MNT_VISFLAGMASK)); /* VFS MNT_* flags */
1796	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFROM))
1797		xb_add_string(error, &xb, mntfrom, strlen(mntfrom)); /* fixed f_mntfromname */
1798	xb_build_done(error, &xb);
1799
1800	/* update opaque counts */
1801	end_offset = xb_offset(&xb);
1802	error = xb_seek(&xb, argslength_offset);
1803	xb_add_32(error, &xb, end_offset - argslength_offset + XDRWORD/*version*/);
1804	nfsmout_if(error);
1805	error = xb_seek(&xb, attrslength_offset);
1806	xb_add_32(error, &xb, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
1807
1808	if (!error) {
1809		/* grab the assembled buffer */
1810		*xdrbufp = xb_buffer_base(&xb);
1811		xb.xb_flags &= ~XB_CLEANUP;
1812	}
1813nfsmout:
1814	xb_cleanup(&xb);
1815	FREE_ZONE(mntfrom, MAXPATHLEN, M_NAMEI);
1816	return (error);
1817}
1818
1819/*
1820 * VFS Operations.
1821 *
1822 * mount system call
1823 */
1824int
1825nfs_vfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t ctx)
1826{
1827	int error = 0, inkernel = vfs_iskernelmount(mp);
1828	uint32_t argsversion, argslength;
1829	char *xdrbuf = NULL;
1830
1831	/* read in version */
1832	if (inkernel)
1833		bcopy(CAST_DOWN(void *, data), &argsversion, sizeof(argsversion));
1834	else if ((error = copyin(data, &argsversion, sizeof(argsversion))))
1835		return (error);
1836
1837	/* If we have XDR args, then all values in the buffer are in network order */
1838	if (argsversion == htonl(NFS_ARGSVERSION_XDR))
1839		argsversion = NFS_ARGSVERSION_XDR;
1840
1841	switch (argsversion) {
1842	case 3:
1843	case 4:
1844	case 5:
1845	case 6:
1846		/* convert old-style args to xdr */
1847		error = nfs_convert_old_nfs_args(mp, data, ctx, argsversion, inkernel, &xdrbuf);
1848		break;
1849	case NFS_ARGSVERSION_XDR:
1850		/* copy in xdr buffer */
1851		if (inkernel)
1852			bcopy(CAST_DOWN(void *, (data + XDRWORD)), &argslength, XDRWORD);
1853		else
1854			error = copyin((data + XDRWORD), &argslength, XDRWORD);
1855		if (error)
1856			break;
1857		argslength = ntohl(argslength);
1858		/* put a reasonable limit on the size of the XDR args */
1859		if (argslength > 16*1024) {
1860			error = E2BIG;
1861			break;
1862		}
1863		/* allocate xdr buffer */
1864		xdrbuf = xb_malloc(xdr_rndup(argslength));
1865		if (!xdrbuf) {
1866			error = ENOMEM;
1867			break;
1868		}
1869		if (inkernel)
1870			bcopy(CAST_DOWN(void *, data), xdrbuf, argslength);
1871		else
1872			error = copyin(data, xdrbuf, argslength);
1873		break;
1874	default:
1875		error = EPROGMISMATCH;
1876	}
1877
1878	if (error) {
1879		if (xdrbuf)
1880			xb_free(xdrbuf);
1881		return (error);
1882	}
1883	error = mountnfs(xdrbuf, mp, ctx, &vp);
1884	return (error);
1885}
1886
1887/*
1888 * Common code for mount and mountroot
1889 */
1890
1891/* Set up an NFSv2/v3 mount */
1892int
1893nfs3_mount(
1894	struct nfsmount *nmp,
1895	vfs_context_t ctx,
1896	nfsnode_t *npp)
1897{
1898	int error = 0;
1899	struct nfs_vattr nvattr;
1900	u_int64_t xid;
1901
1902	*npp = NULL;
1903
1904	if (!nmp->nm_fh)
1905		return (EINVAL);
1906
1907	/*
1908	 * Get file attributes for the mountpoint.  These are needed
1909	 * in order to properly create the root vnode.
1910	 */
1911	error = nfs3_getattr_rpc(NULL, nmp->nm_mountp, nmp->nm_fh->fh_data, nmp->nm_fh->fh_len, 0,
1912			ctx, &nvattr, &xid);
1913	if (error)
1914		goto out;
1915
1916	error = nfs_nget(nmp->nm_mountp, NULL, NULL, nmp->nm_fh->fh_data, nmp->nm_fh->fh_len,
1917			&nvattr, &xid, RPCAUTH_UNKNOWN, NG_MARKROOT, npp);
1918	if (*npp)
1919		nfs_node_unlock(*npp);
1920	if (error)
1921		goto out;
1922
1923	/*
1924	 * Try to make sure we have all the general info from the server.
1925	 */
1926	if (nmp->nm_vers == NFS_VER2) {
1927		NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME);
1928		nmp->nm_fsattr.nfsa_maxname = NFS_MAXNAMLEN;
1929	} else if (nmp->nm_vers == NFS_VER3) {
1930		/* get the NFSv3 FSINFO */
1931		error = nfs3_fsinfo(nmp, *npp, ctx);
1932		if (error)
1933			goto out;
1934		/* If the server indicates all pathconf info is */
1935		/* the same, grab a copy of that info now */
1936		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_HOMOGENEOUS) &&
1937		    (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS)) {
1938			struct nfs_fsattr nfsa;
1939			if (!nfs3_pathconf_rpc(*npp, &nfsa, ctx)) {
1940				/* cache a copy of the results */
1941				lck_mtx_lock(&nmp->nm_lock);
1942				nfs3_pathconf_cache(nmp, &nfsa);
1943				lck_mtx_unlock(&nmp->nm_lock);
1944			}
1945		}
1946	}
1947out:
1948	if (*npp && error) {
1949		vnode_put(NFSTOV(*npp));
1950		vnode_recycle(NFSTOV(*npp));
1951		*npp = NULL;
1952	}
1953	return (error);
1954}
1955
1956/*
1957 * Update an NFSv4 mount path with the contents of the symlink.
1958 *
1959 * Read the link for the given file handle.
1960 * Insert the link's components into the path.
1961 */
1962int
1963nfs4_mount_update_path_with_symlink(struct nfsmount *nmp, struct nfs_fs_path *nfsp, uint32_t curcomp, fhandle_t *dirfhp, int *depthp, fhandle_t *fhp, vfs_context_t ctx)
1964{
1965	int error = 0, status, numops;
1966	uint32_t len = 0, comp, newcomp, linkcompcount;
1967	u_int64_t xid;
1968	struct nfsm_chain nmreq, nmrep;
1969	struct nfsreq rq, *req = &rq;
1970	struct nfsreq_secinfo_args si;
1971	char *link = NULL, *p, *q, ch;
1972	struct nfs_fs_path nfsp2;
1973
1974	bzero(&nfsp2, sizeof(nfsp2));
1975	if (dirfhp->fh_len)
1976		NFSREQ_SECINFO_SET(&si, NULL, dirfhp->fh_data, dirfhp->fh_len, nfsp->np_components[curcomp], 0);
1977	else
1978		NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, nfsp->np_components[curcomp], 0);
1979	nfsm_chain_null(&nmreq);
1980	nfsm_chain_null(&nmrep);
1981
1982	MALLOC_ZONE(link, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1983	if (!link)
1984		error = ENOMEM;
1985
1986	// PUTFH, READLINK
1987	numops = 2;
1988	nfsm_chain_build_alloc_init(error, &nmreq, 12 * NFSX_UNSIGNED);
1989	nfsm_chain_add_compound_header(error, &nmreq, "readlink", numops);
1990	numops--;
1991	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
1992	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, fhp->fh_data, fhp->fh_len);
1993	numops--;
1994	nfsm_chain_add_32(error, &nmreq, NFS_OP_READLINK);
1995	nfsm_chain_build_done(error, &nmreq);
1996	nfsm_assert(error, (numops == 0), EPROTO);
1997	nfsmout_if(error);
1998
1999	error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
2000			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
2001	if (!error)
2002		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2003
2004	nfsm_chain_skip_tag(error, &nmrep);
2005	nfsm_chain_get_32(error, &nmrep, numops);
2006	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
2007	nfsm_chain_op_check(error, &nmrep, NFS_OP_READLINK);
2008	nfsm_chain_get_32(error, &nmrep, len);
2009	nfsmout_if(error);
2010	if (len == 0)
2011		error = ENOENT;
2012	else if (len >= MAXPATHLEN)
2013		len = MAXPATHLEN - 1;
2014	nfsm_chain_get_opaque(error, &nmrep, len, link);
2015	nfsmout_if(error);
2016	/* make sure link string is terminated properly */
2017	link[len] = '\0';
2018
2019	/* count the number of components in link */
2020	p = link;
2021	while (*p && (*p == '/'))
2022		p++;
2023	linkcompcount = 0;
2024	while (*p) {
2025		linkcompcount++;
2026		while (*p && (*p != '/'))
2027			p++;
2028		while (*p && (*p == '/'))
2029			p++;
2030	}
2031
2032	/* free up used components */
2033	for (comp=0; comp <= curcomp; comp++) {
2034		if (nfsp->np_components[comp]) {
2035			FREE(nfsp->np_components[comp], M_TEMP);
2036			nfsp->np_components[comp] = NULL;
2037		}
2038	}
2039
2040	/* set up new path */
2041	nfsp2.np_compcount = nfsp->np_compcount - curcomp - 1 + linkcompcount;
2042	MALLOC(nfsp2.np_components, char **, nfsp2.np_compcount*sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
2043	if (!nfsp2.np_components) {
2044		error = ENOMEM;
2045		goto nfsmout;
2046	}
2047
2048	/* add link components */
2049	p = link;
2050	while (*p && (*p == '/'))
2051		p++;
2052	for (newcomp=0; newcomp < linkcompcount; newcomp++) {
2053		/* find end of component */
2054		q = p;
2055		while (*q && (*q != '/'))
2056			q++;
2057		MALLOC(nfsp2.np_components[newcomp], char *, q-p+1, M_TEMP, M_WAITOK|M_ZERO);
2058		if (!nfsp2.np_components[newcomp]) {
2059			error = ENOMEM;
2060			break;
2061		}
2062		ch = *q;
2063		*q = '\0';
2064		strlcpy(nfsp2.np_components[newcomp], p, q-p+1);
2065		*q = ch;
2066		p = q;
2067		while (*p && (*p == '/'))
2068			p++;
2069	}
2070	nfsmout_if(error);
2071
2072	/* add remaining components */
2073	for(comp = curcomp + 1; comp < nfsp->np_compcount; comp++,newcomp++) {
2074		nfsp2.np_components[newcomp] = nfsp->np_components[comp];
2075		nfsp->np_components[comp] = NULL;
2076	}
2077
2078	/* move new path into place */
2079	FREE(nfsp->np_components, M_TEMP);
2080	nfsp->np_components = nfsp2.np_components;
2081	nfsp->np_compcount = nfsp2.np_compcount;
2082	nfsp2.np_components = NULL;
2083
2084	/* for absolute link, let the caller now that the next dirfh is root */
2085	if (link[0] == '/') {
2086		dirfhp->fh_len = 0;
2087		*depthp = 0;
2088	}
2089nfsmout:
2090	if (link)
2091		FREE_ZONE(link, MAXPATHLEN, M_NAMEI);
2092	if (nfsp2.np_components) {
2093		for (comp=0; comp < nfsp2.np_compcount; comp++)
2094			if (nfsp2.np_components[comp])
2095				FREE(nfsp2.np_components[comp], M_TEMP);
2096		FREE(nfsp2.np_components, M_TEMP);
2097	}
2098	nfsm_chain_cleanup(&nmreq);
2099	nfsm_chain_cleanup(&nmrep);
2100	return (error);
2101}
2102
2103/* Set up an NFSv4 mount */
2104int
2105nfs4_mount(
2106	struct nfsmount *nmp,
2107	vfs_context_t ctx,
2108	nfsnode_t *npp)
2109{
2110	struct nfsm_chain nmreq, nmrep;
2111	int error = 0, numops, status, interval, isdotdot, loopcnt = 0, depth = 0;
2112	struct nfs_fs_path fspath, *nfsp, fspath2;
2113	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], comp, comp2;
2114	fhandle_t fh, dirfh;
2115	struct nfs_vattr nvattr;
2116	u_int64_t xid;
2117	struct nfsreq rq, *req = &rq;
2118	struct nfsreq_secinfo_args si;
2119	struct nfs_sec sec;
2120	struct nfs_fs_locations nfsls;
2121
2122	*npp = NULL;
2123	fh.fh_len = dirfh.fh_len = 0;
2124	TAILQ_INIT(&nmp->nm_open_owners);
2125	TAILQ_INIT(&nmp->nm_delegations);
2126	TAILQ_INIT(&nmp->nm_dreturnq);
2127	nmp->nm_stategenid = 1;
2128	NVATTR_INIT(&nvattr);
2129	bzero(&nfsls, sizeof(nfsls));
2130	nfsm_chain_null(&nmreq);
2131	nfsm_chain_null(&nmrep);
2132
2133	/*
2134	 * If no security flavors were specified we'll want to default to the server's
2135	 * preferred flavor.  For NFSv4.0 we need a file handle and name to get that via
2136	 * SECINFO, so we'll do that on the last component of the server path we are
2137	 * mounting.  If we are mounting the server's root, we'll need to defer the
2138	 * SECINFO call to the first successful LOOKUP request.
2139	 */
2140	if (!nmp->nm_sec.count)
2141		nmp->nm_state |= NFSSTA_NEEDSECINFO;
2142
2143	/* make a copy of the current location's path */
2144	nfsp = &nmp->nm_locations.nl_locations[nmp->nm_locations.nl_current.nli_loc]->nl_path;
2145	bzero(&fspath, sizeof(fspath));
2146	fspath.np_compcount = nfsp->np_compcount;
2147	if (fspath.np_compcount > 0) {
2148		MALLOC(fspath.np_components, char **, fspath.np_compcount*sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
2149		if (!fspath.np_components) {
2150			error = ENOMEM;
2151			goto nfsmout;
2152		}
2153		for (comp=0; comp < nfsp->np_compcount; comp++) {
2154			int slen = strlen(nfsp->np_components[comp]);
2155			MALLOC(fspath.np_components[comp], char *, slen+1, M_TEMP, M_WAITOK|M_ZERO);
2156			if (!fspath.np_components[comp]) {
2157				error = ENOMEM;
2158				break;
2159			}
2160			strlcpy(fspath.np_components[comp], nfsp->np_components[comp], slen+1);
2161		}
2162		if (error)
2163			goto nfsmout;
2164	}
2165
2166	/* for mirror mounts, we can just use the file handle passed in */
2167	if (nmp->nm_fh) {
2168		dirfh.fh_len = nmp->nm_fh->fh_len;
2169		bcopy(nmp->nm_fh->fh_data, dirfh.fh_data, dirfh.fh_len);
2170		NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, NULL, 0);
2171		goto gotfh;
2172	}
2173
2174	/* otherwise, we need to get the fh for the directory we are mounting */
2175
2176	/* if no components, just get root */
2177	if (fspath.np_compcount == 0) {
2178nocomponents:
2179		// PUTROOTFH + GETATTR(FH)
2180		NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, NULL, 0);
2181		numops = 2;
2182		nfsm_chain_build_alloc_init(error, &nmreq, 9 * NFSX_UNSIGNED);
2183		nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
2184		numops--;
2185		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTROOTFH);
2186		numops--;
2187		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
2188		NFS_CLEAR_ATTRIBUTES(bitmap);
2189		NFS4_DEFAULT_ATTRIBUTES(bitmap);
2190		NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
2191		nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
2192		nfsm_chain_build_done(error, &nmreq);
2193		nfsm_assert(error, (numops == 0), EPROTO);
2194		nfsmout_if(error);
2195		error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
2196				vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
2197		if (!error)
2198			error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2199		nfsm_chain_skip_tag(error, &nmrep);
2200		nfsm_chain_get_32(error, &nmrep, numops);
2201		nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTROOTFH);
2202		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
2203		nfsmout_if(error);
2204		NFS_CLEAR_ATTRIBUTES(nmp->nm_fsattr.nfsa_bitmap);
2205		error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, &dirfh, NULL, NULL);
2206		if (!error && !NFS_BITMAP_ISSET(&nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
2207			printf("nfs: mount didn't return filehandle?\n");
2208			error = EBADRPC;
2209		}
2210		nfsmout_if(error);
2211		nfsm_chain_cleanup(&nmrep);
2212		nfsm_chain_null(&nmreq);
2213		NVATTR_CLEANUP(&nvattr);
2214		goto gotfh;
2215	}
2216
2217	/* look up each path component */
2218	for (comp=0; comp < fspath.np_compcount; ) {
2219		isdotdot = 0;
2220		if (fspath.np_components[comp][0] == '.') {
2221			if (fspath.np_components[comp][1] == '\0') {
2222				/* skip "." */
2223				comp++;
2224				continue;
2225			}
2226			/* treat ".." specially */
2227			if ((fspath.np_components[comp][1] == '.') &&
2228			    (fspath.np_components[comp][2] == '\0'))
2229			    	isdotdot = 1;
2230			if (isdotdot && (dirfh.fh_len == 0)) {
2231				/* ".." in root directory is same as "." */
2232				comp++;
2233				continue;
2234			}
2235		}
2236		// PUT(ROOT)FH + LOOKUP(P) + GETFH + GETATTR
2237		if (dirfh.fh_len == 0)
2238			NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, isdotdot ? NULL : fspath.np_components[comp], 0);
2239		else
2240			NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, isdotdot ? NULL : fspath.np_components[comp], 0);
2241		numops = 4;
2242		nfsm_chain_build_alloc_init(error, &nmreq, 18 * NFSX_UNSIGNED);
2243		nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
2244		numops--;
2245		if (dirfh.fh_len) {
2246			nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
2247			nfsm_chain_add_fh(error, &nmreq, NFS_VER4, dirfh.fh_data, dirfh.fh_len);
2248		} else {
2249			nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTROOTFH);
2250		}
2251		numops--;
2252		if (isdotdot) {
2253			nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUPP);
2254		} else {
2255			nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUP);
2256			nfsm_chain_add_name(error, &nmreq,
2257				fspath.np_components[comp], strlen(fspath.np_components[comp]), nmp);
2258		}
2259		numops--;
2260		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETFH);
2261		numops--;
2262		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
2263		NFS_CLEAR_ATTRIBUTES(bitmap);
2264		NFS4_DEFAULT_ATTRIBUTES(bitmap);
2265		/* if no namedattr support or component is ".zfs", clear NFS_FATTR_NAMED_ATTR */
2266		if (NMFLAG(nmp, NONAMEDATTR) || !strcmp(fspath.np_components[comp], ".zfs"))
2267			NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
2268		nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
2269		nfsm_chain_build_done(error, &nmreq);
2270		nfsm_assert(error, (numops == 0), EPROTO);
2271		nfsmout_if(error);
2272		error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
2273				vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
2274		if (!error)
2275			error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2276		nfsm_chain_skip_tag(error, &nmrep);
2277		nfsm_chain_get_32(error, &nmrep, numops);
2278		nfsm_chain_op_check(error, &nmrep, dirfh.fh_len ? NFS_OP_PUTFH : NFS_OP_PUTROOTFH);
2279		nfsm_chain_op_check(error, &nmrep, isdotdot ? NFS_OP_LOOKUPP : NFS_OP_LOOKUP);
2280		nfsmout_if(error);
2281		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETFH);
2282		nfsm_chain_get_32(error, &nmrep, fh.fh_len);
2283		nfsm_chain_get_opaque(error, &nmrep, fh.fh_len, fh.fh_data);
2284		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
2285		if (!error) {
2286			NFS_CLEAR_ATTRIBUTES(nmp->nm_fsattr.nfsa_bitmap);
2287			error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL, &nfsls);
2288		}
2289		nfsm_chain_cleanup(&nmrep);
2290		nfsm_chain_null(&nmreq);
2291		if (error) {
2292			/* LOOKUP succeeded but GETATTR failed?  This could be a referral. */
2293			/* Try the lookup again with a getattr for fs_locations. */
2294			nfs_fs_locations_cleanup(&nfsls);
2295			error = nfs4_get_fs_locations(nmp, NULL, dirfh.fh_data, dirfh.fh_len, fspath.np_components[comp], ctx, &nfsls);
2296			if (!error && (nfsls.nl_numlocs < 1))
2297				error = ENOENT;
2298			nfsmout_if(error);
2299			if (++loopcnt > MAXSYMLINKS) {
2300				/* too many symlink/referral redirections */
2301				error = ELOOP;
2302				goto nfsmout;
2303			}
2304			/* tear down the current connection */
2305			nfs_disconnect(nmp);
2306			/* replace fs locations */
2307			nfs_fs_locations_cleanup(&nmp->nm_locations);
2308			nmp->nm_locations = nfsls;
2309			bzero(&nfsls, sizeof(nfsls));
2310			/* initiate a connection using the new fs locations */
2311			error = nfs_mount_connect(nmp);
2312			if (!error && !(nmp->nm_locations.nl_current.nli_flags & NLI_VALID))
2313				error = EIO;
2314			nfsmout_if(error);
2315			/* add new server's remote path to beginning of our path and continue */
2316			nfsp = &nmp->nm_locations.nl_locations[nmp->nm_locations.nl_current.nli_loc]->nl_path;
2317			bzero(&fspath2, sizeof(fspath2));
2318			fspath2.np_compcount = (fspath.np_compcount - comp - 1) + nfsp->np_compcount;
2319			if (fspath2.np_compcount > 0) {
2320				MALLOC(fspath2.np_components, char **, fspath2.np_compcount*sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
2321				if (!fspath2.np_components) {
2322					error = ENOMEM;
2323					goto nfsmout;
2324				}
2325				for (comp2=0; comp2 < nfsp->np_compcount; comp2++) {
2326					int slen = strlen(nfsp->np_components[comp2]);
2327					MALLOC(fspath2.np_components[comp2], char *, slen+1, M_TEMP, M_WAITOK|M_ZERO);
2328					if (!fspath2.np_components[comp2]) {
2329						/* clean up fspath2, then error out */
2330						while (comp2 > 0) {
2331							comp2--;
2332							FREE(fspath2.np_components[comp2], M_TEMP);
2333						}
2334						FREE(fspath2.np_components, M_TEMP);
2335						error = ENOMEM;
2336						goto nfsmout;
2337					}
2338					strlcpy(fspath2.np_components[comp2], nfsp->np_components[comp2], slen+1);
2339				}
2340				if ((fspath.np_compcount - comp - 1) > 0)
2341					bcopy(&fspath.np_components[comp+1], &fspath2.np_components[nfsp->np_compcount], (fspath.np_compcount - comp - 1)*sizeof(char*));
2342				/* free up unused parts of old path (prior components and component array) */
2343				do {
2344					FREE(fspath.np_components[comp], M_TEMP);
2345				} while (comp-- > 0);
2346				FREE(fspath.np_components, M_TEMP);
2347				/* put new path in place */
2348				fspath = fspath2;
2349			}
2350			/* reset dirfh and component index */
2351			dirfh.fh_len = 0;
2352			comp = 0;
2353			NVATTR_CLEANUP(&nvattr);
2354			if (fspath.np_compcount == 0)
2355				goto nocomponents;
2356			continue;
2357		}
2358		nfsmout_if(error);
2359		/* if file handle is for a symlink, then update the path with the symlink contents */
2360		if (NFS_BITMAP_ISSET(&nvattr.nva_bitmap, NFS_FATTR_TYPE) && (nvattr.nva_type == VLNK)) {
2361			if (++loopcnt > MAXSYMLINKS)
2362				error = ELOOP;
2363			else
2364				error = nfs4_mount_update_path_with_symlink(nmp, &fspath, comp, &dirfh, &depth, &fh, ctx);
2365			nfsmout_if(error);
2366			/* directory file handle is either left the same or reset to root (if link was absolute) */
2367			/* path traversal starts at beginning of the path again */
2368			comp = 0;
2369			NVATTR_CLEANUP(&nvattr);
2370			nfs_fs_locations_cleanup(&nfsls);
2371			continue;
2372		}
2373		NVATTR_CLEANUP(&nvattr);
2374		nfs_fs_locations_cleanup(&nfsls);
2375		/* not a symlink... */
2376		if ((nmp->nm_state & NFSSTA_NEEDSECINFO) && (comp == (fspath.np_compcount-1)) && !isdotdot) {
2377			/* need to get SECINFO for the directory being mounted */
2378			if (dirfh.fh_len == 0)
2379				NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, isdotdot ? NULL : fspath.np_components[comp], 0);
2380			else
2381				NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, isdotdot ? NULL : fspath.np_components[comp], 0);
2382			sec.count = NX_MAX_SEC_FLAVORS;
2383			error = nfs4_secinfo_rpc(nmp, &si, vfs_context_ucred(ctx), sec.flavors, &sec.count);
2384			/* [sigh] some implementations return "illegal" error for unsupported ops */
2385			if (error == NFSERR_OP_ILLEGAL)
2386				error = 0;
2387			nfsmout_if(error);
2388			/* set our default security flavor to the first in the list */
2389			if (sec.count)
2390				nmp->nm_auth = sec.flavors[0];
2391			nmp->nm_state &= ~NFSSTA_NEEDSECINFO;
2392		}
2393		/* advance directory file handle, component index, & update depth */
2394		dirfh = fh;
2395		comp++;
2396		if (!isdotdot) /* going down the hierarchy */
2397			depth++;
2398		else if (--depth <= 0)  /* going up the hierarchy */
2399			dirfh.fh_len = 0; /* clear dirfh when we hit root */
2400	}
2401
2402gotfh:
2403	/* get attrs for mount point root */
2404	numops = NMFLAG(nmp, NONAMEDATTR) ? 2 : 3; // PUTFH + GETATTR + OPENATTR
2405	nfsm_chain_build_alloc_init(error, &nmreq, 25 * NFSX_UNSIGNED);
2406	nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
2407	numops--;
2408	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
2409	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, dirfh.fh_data, dirfh.fh_len);
2410	numops--;
2411	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
2412	NFS_CLEAR_ATTRIBUTES(bitmap);
2413	NFS4_DEFAULT_ATTRIBUTES(bitmap);
2414	/* if no namedattr support or last component is ".zfs", clear NFS_FATTR_NAMED_ATTR */
2415	if (NMFLAG(nmp, NONAMEDATTR) || ((fspath.np_compcount > 0) && !strcmp(fspath.np_components[fspath.np_compcount-1], ".zfs")))
2416		NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
2417	nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
2418	if (!NMFLAG(nmp, NONAMEDATTR)) {
2419		numops--;
2420		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPENATTR);
2421		nfsm_chain_add_32(error, &nmreq, 0);
2422	}
2423	nfsm_chain_build_done(error, &nmreq);
2424	nfsm_assert(error, (numops == 0), EPROTO);
2425	nfsmout_if(error);
2426	error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
2427			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
2428	if (!error)
2429		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2430	nfsm_chain_skip_tag(error, &nmrep);
2431	nfsm_chain_get_32(error, &nmrep, numops);
2432	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
2433	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
2434	nfsmout_if(error);
2435	NFS_CLEAR_ATTRIBUTES(nmp->nm_fsattr.nfsa_bitmap);
2436	error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL, NULL);
2437	nfsmout_if(error);
2438	if (!NMFLAG(nmp, NONAMEDATTR)) {
2439		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPENATTR);
2440		if (error == ENOENT)
2441			error = 0;
2442		/* [sigh] some implementations return "illegal" error for unsupported ops */
2443		if (error || !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_NAMED_ATTR)) {
2444			nmp->nm_fsattr.nfsa_flags &= ~NFS_FSFLAG_NAMED_ATTR;
2445		} else {
2446			nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_NAMED_ATTR;
2447		}
2448	} else {
2449		nmp->nm_fsattr.nfsa_flags &= ~NFS_FSFLAG_NAMED_ATTR;
2450	}
2451	if (NMFLAG(nmp, NOACL)) /* make sure ACL support is turned off */
2452		nmp->nm_fsattr.nfsa_flags &= ~NFS_FSFLAG_ACL;
2453	if (NMFLAG(nmp, ACLONLY) && !(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL))
2454		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_ACLONLY);
2455	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_FH_EXPIRE_TYPE)) {
2456		uint32_t fhtype = ((nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_FHTYPE_MASK) >> NFS_FSFLAG_FHTYPE_SHIFT);
2457		if (fhtype != NFS_FH_PERSISTENT)
2458			printf("nfs: warning: non-persistent file handles! for %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
2459	}
2460
2461	/* make sure it's a directory */
2462	if (!NFS_BITMAP_ISSET(&nvattr.nva_bitmap, NFS_FATTR_TYPE) || (nvattr.nva_type != VDIR)) {
2463		error = ENOTDIR;
2464		goto nfsmout;
2465	}
2466
2467	/* save the NFS fsid */
2468	nmp->nm_fsid = nvattr.nva_fsid;
2469
2470	/* create the root node */
2471	error = nfs_nget(nmp->nm_mountp, NULL, NULL, dirfh.fh_data, dirfh.fh_len, &nvattr, &xid, rq.r_auth, NG_MARKROOT, npp);
2472	nfsmout_if(error);
2473
2474	if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL)
2475		vfs_setextendedsecurity(nmp->nm_mountp);
2476
2477	/* adjust I/O sizes to server limits */
2478	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXREAD) && (nmp->nm_fsattr.nfsa_maxread > 0)) {
2479		if (nmp->nm_fsattr.nfsa_maxread < (uint64_t)nmp->nm_rsize) {
2480			nmp->nm_rsize = nmp->nm_fsattr.nfsa_maxread & ~(NFS_FABLKSIZE - 1);
2481			if (nmp->nm_rsize == 0)
2482				nmp->nm_rsize = nmp->nm_fsattr.nfsa_maxread;
2483		}
2484	}
2485	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXWRITE) && (nmp->nm_fsattr.nfsa_maxwrite > 0)) {
2486		if (nmp->nm_fsattr.nfsa_maxwrite < (uint64_t)nmp->nm_wsize) {
2487			nmp->nm_wsize = nmp->nm_fsattr.nfsa_maxwrite & ~(NFS_FABLKSIZE - 1);
2488			if (nmp->nm_wsize == 0)
2489				nmp->nm_wsize = nmp->nm_fsattr.nfsa_maxwrite;
2490		}
2491	}
2492
2493	/* set up lease renew timer */
2494	nmp->nm_renew_timer = thread_call_allocate(nfs4_renew_timer, nmp);
2495	interval = nmp->nm_fsattr.nfsa_lease / 2;
2496	if (interval < 1)
2497		interval = 1;
2498	nfs_interval_timer_start(nmp->nm_renew_timer, interval * 1000);
2499
2500nfsmout:
2501	if (fspath.np_components) {
2502		for (comp=0; comp < fspath.np_compcount; comp++)
2503			if (fspath.np_components[comp])
2504				FREE(fspath.np_components[comp], M_TEMP);
2505		FREE(fspath.np_components, M_TEMP);
2506	}
2507	NVATTR_CLEANUP(&nvattr);
2508	nfs_fs_locations_cleanup(&nfsls);
2509	if (*npp)
2510		nfs_node_unlock(*npp);
2511	nfsm_chain_cleanup(&nmreq);
2512	nfsm_chain_cleanup(&nmrep);
2513	return (error);
2514}
2515
2516/*
2517 * Thread to handle initial NFS mount connection.
2518 */
2519void
2520nfs_mount_connect_thread(void *arg, __unused wait_result_t wr)
2521{
2522	struct nfsmount *nmp = arg;
2523	int error = 0, savederror = 0, slpflag = (NMFLAG(nmp, INTR) ? PCATCH : 0);
2524	int done = 0, timeo, tries, maxtries;
2525
2526	if (NM_OMFLAG(nmp, MNTQUICK)) {
2527		timeo = 8;
2528		maxtries = 1;
2529	} else {
2530		timeo = 30;
2531		maxtries = 2;
2532	}
2533
2534	for (tries = 0; tries < maxtries; tries++) {
2535		error = nfs_connect(nmp, 1, timeo);
2536		switch (error) {
2537		case ETIMEDOUT:
2538		case EAGAIN:
2539		case EPIPE:
2540		case EADDRNOTAVAIL:
2541		case ENETDOWN:
2542		case ENETUNREACH:
2543		case ENETRESET:
2544		case ECONNABORTED:
2545		case ECONNRESET:
2546		case EISCONN:
2547		case ENOTCONN:
2548		case ESHUTDOWN:
2549		case ECONNREFUSED:
2550		case EHOSTDOWN:
2551		case EHOSTUNREACH:
2552			/* just keep retrying on any of these errors */
2553			break;
2554		case 0:
2555		default:
2556			/* looks like we got an answer... */
2557			done = 1;
2558			break;
2559		}
2560
2561		/* save the best error */
2562		if (nfs_connect_error_class(error) >= nfs_connect_error_class(savederror))
2563			savederror = error;
2564		if (done) {
2565			error = savederror;
2566			break;
2567		}
2568
2569		/* pause before next attempt */
2570		if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0)))
2571			break;
2572		error = tsleep(nmp, PSOCK|slpflag, "nfs_mount_connect_retry", 2*hz);
2573		if (error && (error != EWOULDBLOCK))
2574			break;
2575		error = savederror;
2576	}
2577
2578	/* update status of mount connect */
2579	lck_mtx_lock(&nmp->nm_lock);
2580	if (!nmp->nm_mounterror)
2581		nmp->nm_mounterror = error;
2582	nmp->nm_state &= ~NFSSTA_MOUNT_THREAD;
2583	lck_mtx_unlock(&nmp->nm_lock);
2584	wakeup(&nmp->nm_nss);
2585}
2586
2587int
2588nfs_mount_connect(struct nfsmount *nmp)
2589{
2590	int error = 0, slpflag;
2591	thread_t thd;
2592	struct timespec ts = { 2, 0 };
2593
2594	/*
2595	 * Set up the socket.  Perform initial search for a location/server/address to
2596	 * connect to and negotiate any unspecified mount parameters.  This work is
2597	 * done on a kernel thread to satisfy reserved port usage needs.
2598	 */
2599	slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
2600	lck_mtx_lock(&nmp->nm_lock);
2601	/* set flag that the thread is running */
2602	nmp->nm_state |= NFSSTA_MOUNT_THREAD;
2603	if (kernel_thread_start(nfs_mount_connect_thread, nmp, &thd) != KERN_SUCCESS) {
2604		nmp->nm_state &= ~NFSSTA_MOUNT_THREAD;
2605		nmp->nm_mounterror = EIO;
2606		printf("nfs mount %s start socket connect thread failed\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
2607	} else {
2608		thread_deallocate(thd);
2609	}
2610
2611	/* wait until mount connect thread is finished/gone */
2612	while (nmp->nm_state & NFSSTA_MOUNT_THREAD) {
2613		error = msleep(&nmp->nm_nss, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectthread", &ts);
2614		if ((error && (error != EWOULDBLOCK)) || ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))) {
2615			/* record error */
2616			if (!nmp->nm_mounterror)
2617				nmp->nm_mounterror = error;
2618			/* signal the thread that we are aborting */
2619			nmp->nm_sockflags |= NMSOCK_UNMOUNT;
2620			if (nmp->nm_nss)
2621				wakeup(nmp->nm_nss);
2622			/* and continue waiting on it to finish */
2623			slpflag = 0;
2624		}
2625	}
2626	lck_mtx_unlock(&nmp->nm_lock);
2627
2628	/* grab mount connect status */
2629	error = nmp->nm_mounterror;
2630
2631	return (error);
2632}
2633
2634/*
2635 * Common code to mount an NFS file system.
2636 */
2637int
2638mountnfs(
2639	char *xdrbuf,
2640	mount_t mp,
2641	vfs_context_t ctx,
2642	vnode_t *vpp)
2643{
2644	struct nfsmount *nmp;
2645	nfsnode_t np;
2646	int error = 0;
2647	struct vfsstatfs *sbp;
2648	struct xdrbuf xb;
2649	uint32_t i, val, vers = 0, minorvers, maxio, iosize, len;
2650	uint32_t *mattrs;
2651	uint32_t *mflags_mask;
2652	uint32_t *mflags;
2653	uint32_t argslength, attrslength;
2654	struct nfs_location_index firstloc = { NLI_VALID, 0, 0, 0 };
2655
2656	/* make sure mbuf constants are set up */
2657	if (!nfs_mbuf_mhlen)
2658		nfs_mbuf_init();
2659
2660	if (vfs_flags(mp) & MNT_UPDATE) {
2661		nmp = VFSTONFS(mp);
2662		/* update paths, file handles, etc, here	XXX */
2663		xb_free(xdrbuf);
2664		return (0);
2665	} else {
2666		/* allocate an NFS mount structure for this mount */
2667		MALLOC_ZONE(nmp, struct nfsmount *,
2668				sizeof (struct nfsmount), M_NFSMNT, M_WAITOK);
2669		if (!nmp) {
2670			xb_free(xdrbuf);
2671			return (ENOMEM);
2672		}
2673		bzero((caddr_t)nmp, sizeof (struct nfsmount));
2674		lck_mtx_init(&nmp->nm_lock, nfs_mount_grp, LCK_ATTR_NULL);
2675		TAILQ_INIT(&nmp->nm_resendq);
2676		TAILQ_INIT(&nmp->nm_iodq);
2677		TAILQ_INIT(&nmp->nm_gsscl);
2678		TAILQ_INIT(&nmp->nm_gssnccl);
2679		LIST_INIT(&nmp->nm_monlist);
2680		vfs_setfsprivate(mp, nmp);
2681		vfs_getnewfsid(mp);
2682		nmp->nm_mountp = mp;
2683		vfs_setauthopaque(mp);
2684
2685		nfs_nhinit_finish();
2686
2687		nmp->nm_args = xdrbuf;
2688
2689		/* set up defaults */
2690		nmp->nm_ref = 0;
2691		nmp->nm_vers = 0;
2692		nmp->nm_timeo = NFS_TIMEO;
2693		nmp->nm_retry = NFS_RETRANS;
2694		nmp->nm_sotype = 0;
2695		nmp->nm_sofamily = 0;
2696		nmp->nm_nfsport = 0;
2697		nmp->nm_wsize = NFS_WSIZE;
2698		nmp->nm_rsize = NFS_RSIZE;
2699		nmp->nm_readdirsize = NFS_READDIRSIZE;
2700		nmp->nm_numgrps = NFS_MAXGRPS;
2701		nmp->nm_readahead = NFS_DEFRAHEAD;
2702		nmp->nm_tprintf_delay = nfs_tprintf_delay;
2703		if (nmp->nm_tprintf_delay < 0)
2704			nmp->nm_tprintf_delay = 0;
2705		nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
2706		if (nmp->nm_tprintf_initial_delay < 0)
2707			nmp->nm_tprintf_initial_delay = 0;
2708		nmp->nm_acregmin = NFS_MINATTRTIMO;
2709		nmp->nm_acregmax = NFS_MAXATTRTIMO;
2710		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
2711		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
2712		nmp->nm_auth = RPCAUTH_SYS;
2713		nmp->nm_iodlink.tqe_next = NFSNOLIST;
2714		nmp->nm_deadtimeout = 0;
2715		nmp->nm_curdeadtimeout = 0;
2716		NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_NOACL);
2717		nmp->nm_realm = NULL;
2718		nmp->nm_principal = NULL;
2719		nmp->nm_sprinc = NULL;
2720	}
2721
2722	mattrs = nmp->nm_mattrs;
2723	mflags = nmp->nm_mflags;
2724	mflags_mask = nmp->nm_mflags_mask;
2725
2726	/* set up NFS mount with args */
2727	xb_init_buffer(&xb, xdrbuf, 2*XDRWORD);
2728	xb_get_32(error, &xb, val); /* version */
2729	xb_get_32(error, &xb, argslength); /* args length */
2730	nfsmerr_if(error);
2731	xb_init_buffer(&xb, xdrbuf, argslength);	/* restart parsing with actual buffer length */
2732	xb_get_32(error, &xb, val); /* version */
2733	xb_get_32(error, &xb, argslength); /* args length */
2734	xb_get_32(error, &xb, val); /* XDR args version */
2735	if (val != NFS_XDRARGS_VERSION_0)
2736		error = EINVAL;
2737	len = NFS_MATTR_BITMAP_LEN;
2738	xb_get_bitmap(error, &xb, mattrs, len); /* mount attribute bitmap */
2739	attrslength = 0;
2740	xb_get_32(error, &xb, attrslength); /* attrs length */
2741	if (!error && (attrslength > (argslength - ((4+NFS_MATTR_BITMAP_LEN+1)*XDRWORD))))
2742		error = EINVAL;
2743	nfsmerr_if(error);
2744	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FLAGS)) {
2745		len = NFS_MFLAG_BITMAP_LEN;
2746		xb_get_bitmap(error, &xb, mflags_mask, len); /* mount flag mask */
2747		len = NFS_MFLAG_BITMAP_LEN;
2748		xb_get_bitmap(error, &xb, mflags, len); /* mount flag values */
2749		if (!error) {
2750			/* clear all mask bits and OR in all the ones that are set */
2751			nmp->nm_flags[0] &= ~mflags_mask[0];
2752			nmp->nm_flags[0] |= (mflags_mask[0] & mflags[0]);
2753		}
2754	}
2755	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION)) {
2756		xb_get_32(error, &xb, vers);
2757		if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION))
2758			xb_get_32(error, &xb, minorvers);
2759		else
2760			minorvers = 0;
2761		nfsmerr_if(error);
2762		switch (vers) {
2763		case 2:
2764			nmp->nm_vers = NFS_VER2;
2765			break;
2766		case 3:
2767			nmp->nm_vers = NFS_VER3;
2768			break;
2769		case 4:
2770			switch (minorvers) {
2771			case 0:
2772				nmp->nm_vers = NFS_VER4;
2773				break;
2774			default:
2775				error = EINVAL;
2776			}
2777			break;
2778		default:
2779			error = EINVAL;
2780		}
2781	}
2782	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION)) {
2783		/* should have also gotten NFS version (and already gotten minorvers) */
2784		if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION))
2785			error = EINVAL;
2786	}
2787	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
2788		xb_get_32(error, &xb, nmp->nm_rsize);
2789	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
2790		xb_get_32(error, &xb, nmp->nm_wsize);
2791	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READDIR_SIZE))
2792		xb_get_32(error, &xb, nmp->nm_readdirsize);
2793	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READAHEAD))
2794		xb_get_32(error, &xb, nmp->nm_readahead);
2795	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN)) {
2796		xb_get_32(error, &xb, nmp->nm_acregmin);
2797		xb_skip(error, &xb, XDRWORD);
2798	}
2799	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX)) {
2800		xb_get_32(error, &xb, nmp->nm_acregmax);
2801		xb_skip(error, &xb, XDRWORD);
2802	}
2803	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN)) {
2804		xb_get_32(error, &xb, nmp->nm_acdirmin);
2805		xb_skip(error, &xb, XDRWORD);
2806	}
2807	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX)) {
2808		xb_get_32(error, &xb, nmp->nm_acdirmax);
2809		xb_skip(error, &xb, XDRWORD);
2810	}
2811	nfsmerr_if(error);
2812	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_LOCK_MODE)) {
2813		xb_get_32(error, &xb, val);
2814		switch (val) {
2815		case NFS_LOCK_MODE_DISABLED:
2816		case NFS_LOCK_MODE_LOCAL:
2817			if (nmp->nm_vers >= NFS_VER4) {
2818				/* disabled/local lock mode only allowed on v2/v3 */
2819				error = EINVAL;
2820				break;
2821			}
2822			/* FALLTHROUGH */
2823		case NFS_LOCK_MODE_ENABLED:
2824			nmp->nm_lockmode = val;
2825			break;
2826		default:
2827			error = EINVAL;
2828		}
2829	}
2830	nfsmerr_if(error);
2831	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SECURITY)) {
2832		uint32_t seccnt;
2833		xb_get_32(error, &xb, seccnt);
2834		if (!error && ((seccnt < 1) || (seccnt > NX_MAX_SEC_FLAVORS)))
2835			error = EINVAL;
2836		nfsmerr_if(error);
2837		nmp->nm_sec.count = seccnt;
2838		for (i=0; i < seccnt; i++) {
2839			xb_get_32(error, &xb, nmp->nm_sec.flavors[i]);
2840			/* Check for valid security flavor */
2841			switch (nmp->nm_sec.flavors[i]) {
2842			case RPCAUTH_NONE:
2843			case RPCAUTH_SYS:
2844			case RPCAUTH_KRB5:
2845			case RPCAUTH_KRB5I:
2846			case RPCAUTH_KRB5P:
2847				break;
2848			default:
2849				error = EINVAL;
2850			}
2851		}
2852		/* start with the first flavor */
2853		nmp->nm_auth = nmp->nm_sec.flavors[0];
2854	}
2855	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MAX_GROUP_LIST))
2856		xb_get_32(error, &xb, nmp->nm_numgrps);
2857	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOCKET_TYPE)) {
2858		char sotype[6];
2859
2860		xb_get_32(error, &xb, val);
2861		if (!error && ((val < 3) || (val > 5)))
2862			error = EINVAL;
2863		nfsmerr_if(error);
2864		error = xb_get_bytes(&xb, sotype, val, 0);
2865		nfsmerr_if(error);
2866		sotype[val] = '\0';
2867		if (!strcmp(sotype, "tcp")) {
2868			nmp->nm_sotype = SOCK_STREAM;
2869		} else if (!strcmp(sotype, "udp")) {
2870			nmp->nm_sotype = SOCK_DGRAM;
2871		} else if (!strcmp(sotype, "tcp4")) {
2872			nmp->nm_sotype = SOCK_STREAM;
2873			nmp->nm_sofamily = AF_INET;
2874		} else if (!strcmp(sotype, "udp4")) {
2875			nmp->nm_sotype = SOCK_DGRAM;
2876			nmp->nm_sofamily = AF_INET;
2877		} else if (!strcmp(sotype, "tcp6")) {
2878			nmp->nm_sotype = SOCK_STREAM;
2879			nmp->nm_sofamily = AF_INET6;
2880		} else if (!strcmp(sotype, "udp6")) {
2881			nmp->nm_sotype = SOCK_DGRAM;
2882			nmp->nm_sofamily = AF_INET6;
2883		} else if (!strcmp(sotype, "inet4")) {
2884			nmp->nm_sofamily = AF_INET;
2885		} else if (!strcmp(sotype, "inet6")) {
2886			nmp->nm_sofamily = AF_INET6;
2887		} else if (!strcmp(sotype, "inet")) {
2888			nmp->nm_sofamily = 0; /* ok */
2889		} else {
2890			error = EINVAL;
2891		}
2892		if (!error && (nmp->nm_vers >= NFS_VER4) && nmp->nm_sotype &&
2893		    (nmp->nm_sotype != SOCK_STREAM))
2894			error = EINVAL;		/* NFSv4 is only allowed over TCP. */
2895		nfsmerr_if(error);
2896	}
2897	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_PORT))
2898		xb_get_32(error, &xb, nmp->nm_nfsport);
2899	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MOUNT_PORT))
2900		xb_get_32(error, &xb, nmp->nm_mountport);
2901	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REQUEST_TIMEOUT)) {
2902		/* convert from time to 0.1s units */
2903		xb_get_32(error, &xb, nmp->nm_timeo);
2904		xb_get_32(error, &xb, val);
2905		nfsmerr_if(error);
2906		if (val >= 1000000000)
2907			error = EINVAL;
2908		nfsmerr_if(error);
2909		nmp->nm_timeo *= 10;
2910		nmp->nm_timeo += (val+100000000-1)/100000000;
2911		/* now convert to ticks */
2912		nmp->nm_timeo = (nmp->nm_timeo * NFS_HZ + 5) / 10;
2913	}
2914	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT)) {
2915		xb_get_32(error, &xb, val);
2916		if (!error && (val > 1))
2917			nmp->nm_retry = val;
2918	}
2919	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_DEAD_TIMEOUT)) {
2920		xb_get_32(error, &xb, nmp->nm_deadtimeout);
2921		xb_skip(error, &xb, XDRWORD);
2922	}
2923	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FH)) {
2924		nfsmerr_if(error);
2925		MALLOC(nmp->nm_fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK|M_ZERO);
2926		if (!nmp->nm_fh)
2927			error = ENOMEM;
2928		xb_get_32(error, &xb, nmp->nm_fh->fh_len);
2929		nfsmerr_if(error);
2930		error = xb_get_bytes(&xb, (char*)&nmp->nm_fh->fh_data[0], nmp->nm_fh->fh_len, 0);
2931	}
2932	nfsmerr_if(error);
2933	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FS_LOCATIONS)) {
2934		uint32_t loc, serv, addr, comp;
2935		struct nfs_fs_location *fsl;
2936		struct nfs_fs_server *fss;
2937		struct nfs_fs_path *fsp;
2938
2939		xb_get_32(error, &xb, nmp->nm_locations.nl_numlocs); /* fs location count */
2940		/* sanity check location count */
2941		if (!error && ((nmp->nm_locations.nl_numlocs < 1) || (nmp->nm_locations.nl_numlocs > 256)))
2942			error = EINVAL;
2943		nfsmerr_if(error);
2944		MALLOC(nmp->nm_locations.nl_locations, struct nfs_fs_location **, nmp->nm_locations.nl_numlocs * sizeof(struct nfs_fs_location*), M_TEMP, M_WAITOK|M_ZERO);
2945		if (!nmp->nm_locations.nl_locations)
2946			error = ENOMEM;
2947		for (loc = 0; loc < nmp->nm_locations.nl_numlocs; loc++) {
2948			nfsmerr_if(error);
2949			MALLOC(fsl, struct nfs_fs_location *, sizeof(struct nfs_fs_location), M_TEMP, M_WAITOK|M_ZERO);
2950			if (!fsl)
2951				error = ENOMEM;
2952			nmp->nm_locations.nl_locations[loc] = fsl;
2953			xb_get_32(error, &xb, fsl->nl_servcount); /* server count */
2954			/* sanity check server count */
2955			if (!error && ((fsl->nl_servcount < 1) || (fsl->nl_servcount > 256)))
2956				error = EINVAL;
2957			nfsmerr_if(error);
2958			MALLOC(fsl->nl_servers, struct nfs_fs_server **, fsl->nl_servcount * sizeof(struct nfs_fs_server*), M_TEMP, M_WAITOK|M_ZERO);
2959			if (!fsl->nl_servers)
2960				error = ENOMEM;
2961			for (serv = 0; serv < fsl->nl_servcount; serv++) {
2962				nfsmerr_if(error);
2963				MALLOC(fss, struct nfs_fs_server *, sizeof(struct nfs_fs_server), M_TEMP, M_WAITOK|M_ZERO);
2964				if (!fss)
2965					error = ENOMEM;
2966				fsl->nl_servers[serv] = fss;
2967				xb_get_32(error, &xb, val); /* server name length */
2968				/* sanity check server name length */
2969				if (!error && ((val < 1) || (val > MAXPATHLEN)))
2970					error = EINVAL;
2971				nfsmerr_if(error);
2972				MALLOC(fss->ns_name, char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
2973				if (!fss->ns_name)
2974					error = ENOMEM;
2975				nfsmerr_if(error);
2976				error = xb_get_bytes(&xb, fss->ns_name, val, 0); /* server name */
2977				xb_get_32(error, &xb, fss->ns_addrcount); /* address count */
2978				/* sanity check address count (OK to be zero) */
2979				if (!error && (fss->ns_addrcount > 256))
2980					error = EINVAL;
2981				nfsmerr_if(error);
2982				if (fss->ns_addrcount > 0) {
2983					MALLOC(fss->ns_addresses, char **, fss->ns_addrcount * sizeof(char *), M_TEMP, M_WAITOK|M_ZERO);
2984					if (!fss->ns_addresses)
2985						error = ENOMEM;
2986					for (addr = 0; addr < fss->ns_addrcount; addr++) {
2987						xb_get_32(error, &xb, val); /* address length */
2988						/* sanity check address length */
2989						if (!error && ((val < 1) || (val > 128)))
2990							error = EINVAL;
2991						nfsmerr_if(error);
2992						MALLOC(fss->ns_addresses[addr], char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
2993						if (!fss->ns_addresses[addr])
2994							error = ENOMEM;
2995						nfsmerr_if(error);
2996						error = xb_get_bytes(&xb, fss->ns_addresses[addr], val, 0); /* address */
2997					}
2998				}
2999				xb_get_32(error, &xb, val); /* server info length */
3000				xb_skip(error, &xb, val); /* skip server info */
3001			}
3002			/* get pathname */
3003			fsp = &fsl->nl_path;
3004			xb_get_32(error, &xb, fsp->np_compcount); /* component count */
3005			/* sanity check component count */
3006			if (!error && (fsp->np_compcount > MAXPATHLEN))
3007				error = EINVAL;
3008			nfsmerr_if(error);
3009			if (fsp->np_compcount) {
3010				MALLOC(fsp->np_components, char **, fsp->np_compcount * sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
3011				if (!fsp->np_components)
3012					error = ENOMEM;
3013			}
3014			for (comp = 0; comp < fsp->np_compcount; comp++) {
3015				xb_get_32(error, &xb, val); /* component length */
3016				/* sanity check component length */
3017				if (!error && (val == 0)) {
3018					/*
3019					 * Apparently some people think a path with zero components should
3020					 * be encoded with one zero-length component.  So, just ignore any
3021					 * zero length components.
3022					 */
3023					comp--;
3024					fsp->np_compcount--;
3025					if (fsp->np_compcount == 0) {
3026						FREE(fsp->np_components, M_TEMP);
3027						fsp->np_components = NULL;
3028					}
3029					continue;
3030				}
3031				if (!error && ((val < 1) || (val > MAXPATHLEN)))
3032					error = EINVAL;
3033				nfsmerr_if(error);
3034				MALLOC(fsp->np_components[comp], char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
3035				if (!fsp->np_components[comp])
3036					error = ENOMEM;
3037				nfsmerr_if(error);
3038				error = xb_get_bytes(&xb, fsp->np_components[comp], val, 0); /* component */
3039			}
3040			xb_get_32(error, &xb, val); /* fs location info length */
3041			xb_skip(error, &xb, val); /* skip fs location info */
3042		}
3043	}
3044	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFLAGS))
3045		xb_skip(error, &xb, XDRWORD);
3046	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFROM)) {
3047		xb_get_32(error, &xb, len);
3048		nfsmerr_if(error);
3049		val = len;
3050		if (val >= sizeof(vfs_statfs(mp)->f_mntfromname))
3051			val = sizeof(vfs_statfs(mp)->f_mntfromname) - 1;
3052		error = xb_get_bytes(&xb, vfs_statfs(mp)->f_mntfromname, val, 0);
3053		if ((len - val) > 0)
3054			xb_skip(error, &xb, len - val);
3055		nfsmerr_if(error);
3056		vfs_statfs(mp)->f_mntfromname[val] = '\0';
3057	}
3058	nfsmerr_if(error);
3059
3060	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REALM)) {
3061		xb_get_32(error, &xb, len);
3062		if (!error && ((len < 1) || (len > MAXPATHLEN)))
3063			error=EINVAL;
3064		nfsmerr_if(error);
3065		/* allocate an extra byte for a leading '@' if its not already prepended to the realm */
3066		MALLOC(nmp->nm_realm, char *, len+2, M_TEMP, M_WAITOK|M_ZERO);
3067		if (!nmp->nm_realm)
3068			error = ENOMEM;
3069		nfsmerr_if(error);
3070		error = xb_get_bytes(&xb, nmp->nm_realm, len, 0);
3071		if (error == 0 && *nmp->nm_realm != '@') {
3072			bcopy(nmp->nm_realm, &nmp->nm_realm[1], len);
3073			nmp->nm_realm[0] = '@';
3074		}
3075	}
3076	nfsmerr_if(error);
3077
3078	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_PRINCIPAL)) {
3079		xb_get_32(error, &xb, len);
3080		if (!error && ((len < 1) || (len > MAXPATHLEN)))
3081			error=EINVAL;
3082		nfsmerr_if(error);
3083		MALLOC(nmp->nm_principal, char *, len+1, M_TEMP, M_WAITOK|M_ZERO);
3084		if (!nmp->nm_principal)
3085			error = ENOMEM;
3086		nfsmerr_if(error);
3087		error = xb_get_bytes(&xb, nmp->nm_principal, len, 0);
3088	}
3089	nfsmerr_if(error);
3090
3091	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SVCPRINCIPAL)) {
3092		xb_get_32(error, &xb, len);
3093		if (!error && ((len < 1) || (len > MAXPATHLEN)))
3094			error=EINVAL;
3095		nfsmerr_if(error);
3096		MALLOC(nmp->nm_sprinc, char *, len+1, M_TEMP, M_WAITOK|M_ZERO);
3097		if (!nmp->nm_sprinc)
3098			error = ENOMEM;
3099		nfsmerr_if(error);
3100		error = xb_get_bytes(&xb, nmp->nm_sprinc, len, 0);
3101	}
3102	nfsmerr_if(error);
3103
3104	/*
3105	 * Sanity check/finalize settings.
3106	 */
3107
3108	if (nmp->nm_timeo < NFS_MINTIMEO)
3109		nmp->nm_timeo = NFS_MINTIMEO;
3110	else if (nmp->nm_timeo > NFS_MAXTIMEO)
3111		nmp->nm_timeo = NFS_MAXTIMEO;
3112	if (nmp->nm_retry > NFS_MAXREXMIT)
3113		nmp->nm_retry = NFS_MAXREXMIT;
3114
3115	if (nmp->nm_numgrps > NFS_MAXGRPS)
3116		nmp->nm_numgrps = NFS_MAXGRPS;
3117	if (nmp->nm_readahead > NFS_MAXRAHEAD)
3118		nmp->nm_readahead = NFS_MAXRAHEAD;
3119	if (nmp->nm_acregmin > nmp->nm_acregmax)
3120		nmp->nm_acregmin = nmp->nm_acregmax;
3121	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
3122		nmp->nm_acdirmin = nmp->nm_acdirmax;
3123
3124	/* need at least one fs location */
3125	if (nmp->nm_locations.nl_numlocs < 1)
3126		error = EINVAL;
3127	nfsmerr_if(error);
3128
3129	/* init mount's mntfromname to first location */
3130	if (!NM_OMATTR_GIVEN(nmp, MNTFROM))
3131		nfs_location_mntfromname(&nmp->nm_locations, firstloc,
3132			vfs_statfs(mp)->f_mntfromname, sizeof(vfs_statfs(mp)->f_mntfromname), 0);
3133
3134	/* Need to save the mounting credential for v4. */
3135	nmp->nm_mcred = vfs_context_ucred(ctx);
3136	if (IS_VALID_CRED(nmp->nm_mcred))
3137		kauth_cred_ref(nmp->nm_mcred);
3138
3139	/*
3140	 * If a reserved port is required, check for that privilege.
3141	 * (Note that mirror mounts are exempt because the privilege was
3142	 * already checked for the original mount.)
3143	 */
3144	if (NMFLAG(nmp, RESVPORT) && !vfs_iskernelmount(mp))
3145		error = priv_check_cred(nmp->nm_mcred, PRIV_NETINET_RESERVEDPORT, 0);
3146	nfsmerr_if(error);
3147
3148	/* do mount's initial socket connection */
3149	error = nfs_mount_connect(nmp);
3150	nfsmerr_if(error);
3151
3152	/* set up the version-specific function tables */
3153	if (nmp->nm_vers < NFS_VER4)
3154		nmp->nm_funcs = &nfs3_funcs;
3155	else
3156		nmp->nm_funcs = &nfs4_funcs;
3157
3158	/* sanity check settings now that version/connection is set */
3159	if (nmp->nm_vers == NFS_VER2)		/* ignore RDIRPLUS on NFSv2 */
3160		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_RDIRPLUS);
3161	if (nmp->nm_vers >= NFS_VER4) {
3162		if (NFS_BITMAP_ISSET(nmp->nm_flags, NFS_MFLAG_ACLONLY)) /* aclonly trumps noacl */
3163			NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NOACL);
3164		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_CALLUMNT);
3165		if (nmp->nm_lockmode != NFS_LOCK_MODE_ENABLED)
3166			error = EINVAL; /* disabled/local lock mode only allowed on v2/v3 */
3167	} else {
3168		/* ignore these if not v4 */
3169		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NOCALLBACK);
3170		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NONAMEDATTR);
3171		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NOACL);
3172		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_ACLONLY);
3173	}
3174	nfsmerr_if(error);
3175
3176	if (nmp->nm_sotype == SOCK_DGRAM) {
3177		/* I/O size defaults for UDP are different */
3178		if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
3179			nmp->nm_rsize = NFS_DGRAM_RSIZE;
3180		if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
3181			nmp->nm_wsize = NFS_DGRAM_WSIZE;
3182	}
3183
3184	/* round down I/O sizes to multiple of NFS_FABLKSIZE */
3185	nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
3186	if (nmp->nm_rsize <= 0)
3187		nmp->nm_rsize = NFS_FABLKSIZE;
3188	nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
3189	if (nmp->nm_wsize <= 0)
3190		nmp->nm_wsize = NFS_FABLKSIZE;
3191
3192	/* and limit I/O sizes to maximum allowed */
3193	maxio = (nmp->nm_vers == NFS_VER2) ? NFS_V2MAXDATA :
3194		(nmp->nm_sotype == SOCK_DGRAM) ? NFS_MAXDGRAMDATA : NFS_MAXDATA;
3195	if (maxio > NFS_MAXBSIZE)
3196		maxio = NFS_MAXBSIZE;
3197	if (nmp->nm_rsize > maxio)
3198		nmp->nm_rsize = maxio;
3199	if (nmp->nm_wsize > maxio)
3200		nmp->nm_wsize = maxio;
3201
3202	if (nmp->nm_readdirsize > maxio)
3203		nmp->nm_readdirsize = maxio;
3204	if (nmp->nm_readdirsize > nmp->nm_rsize)
3205		nmp->nm_readdirsize = nmp->nm_rsize;
3206
3207	/* Set up the sockets and related info */
3208	if (nmp->nm_sotype == SOCK_DGRAM)
3209		TAILQ_INIT(&nmp->nm_cwndq);
3210
3211	/*
3212	 * Get the root node/attributes from the NFS server and
3213	 * do any basic, version-specific setup.
3214	 */
3215	error = nmp->nm_funcs->nf_mount(nmp, ctx, &np);
3216	nfsmerr_if(error);
3217
3218	/*
3219	 * A reference count is needed on the node representing the
3220	 * remote root.  If this object is not persistent, then backward
3221	 * traversals of the mount point (i.e. "..") will not work if
3222	 * the node gets flushed out of the cache.
3223	 */
3224	nmp->nm_dnp = np;
3225	*vpp = NFSTOV(np);
3226	/* get usecount and drop iocount */
3227	error = vnode_ref(*vpp);
3228	vnode_put(*vpp);
3229	if (error) {
3230		vnode_recycle(*vpp);
3231		goto nfsmerr;
3232	}
3233
3234	/*
3235	 * Do statfs to ensure static info gets set to reasonable values.
3236	 */
3237	if ((error = nmp->nm_funcs->nf_update_statfs(nmp, ctx))) {
3238		int error2 = vnode_getwithref(*vpp);
3239		vnode_rele(*vpp);
3240		if (!error2)
3241			vnode_put(*vpp);
3242		vnode_recycle(*vpp);
3243		goto nfsmerr;
3244	}
3245	sbp = vfs_statfs(mp);
3246	sbp->f_bsize = nmp->nm_fsattr.nfsa_bsize;
3247	sbp->f_blocks = nmp->nm_fsattr.nfsa_space_total / sbp->f_bsize;
3248	sbp->f_bfree = nmp->nm_fsattr.nfsa_space_free / sbp->f_bsize;
3249	sbp->f_bavail = nmp->nm_fsattr.nfsa_space_avail / sbp->f_bsize;
3250	sbp->f_bused = (nmp->nm_fsattr.nfsa_space_total / sbp->f_bsize) -
3251			(nmp->nm_fsattr.nfsa_space_free / sbp->f_bsize);
3252	sbp->f_files = nmp->nm_fsattr.nfsa_files_total;
3253	sbp->f_ffree = nmp->nm_fsattr.nfsa_files_free;
3254	sbp->f_iosize = nfs_iosize;
3255
3256	/*
3257	 * Calculate the size used for I/O buffers.  Use the larger
3258	 * of the two sizes to minimise NFS requests but make sure
3259	 * that it is at least one VM page to avoid wasting buffer
3260	 * space and to allow easy mmapping of I/O buffers.
3261	 * The read/write RPC calls handle the splitting up of
3262	 * buffers into multiple requests if the buffer size is
3263	 * larger than the I/O size.
3264	 */
3265	iosize = max(nmp->nm_rsize, nmp->nm_wsize);
3266	if (iosize < PAGE_SIZE)
3267		iosize = PAGE_SIZE;
3268	nmp->nm_biosize = trunc_page_32(iosize);
3269
3270	/* For NFSv3 and greater, there is a (relatively) reliable ACCESS call. */
3271	if (nmp->nm_vers > NFS_VER2)
3272		vfs_setauthopaqueaccess(mp);
3273
3274	switch (nmp->nm_lockmode) {
3275	case NFS_LOCK_MODE_DISABLED:
3276		break;
3277	case NFS_LOCK_MODE_LOCAL:
3278		vfs_setlocklocal(nmp->nm_mountp);
3279		break;
3280	case NFS_LOCK_MODE_ENABLED:
3281	default:
3282		if (nmp->nm_vers <= NFS_VER3)
3283			nfs_lockd_mount_register(nmp);
3284		break;
3285	}
3286
3287	/* success! */
3288	lck_mtx_lock(&nmp->nm_lock);
3289	nmp->nm_state |= NFSSTA_MOUNTED;
3290	lck_mtx_unlock(&nmp->nm_lock);
3291	return (0);
3292nfsmerr:
3293	nfs_mount_cleanup(nmp);
3294	return (error);
3295}
3296
3297#if CONFIG_TRIGGERS
3298
3299/*
3300 * We've detected a file system boundary on the server and
3301 * need to mount a new file system so that our file systems
3302 * MIRROR the file systems on the server.
3303 *
3304 * Build the mount arguments for the new mount and call kernel_mount().
3305 */
3306int
3307nfs_mirror_mount_domount(vnode_t dvp, vnode_t vp, vfs_context_t ctx)
3308{
3309	nfsnode_t np = VTONFS(vp);
3310	nfsnode_t dnp = VTONFS(dvp);
3311	struct nfsmount *nmp = NFSTONMP(np);
3312	char fstype[MFSTYPENAMELEN], *mntfromname = NULL, *path = NULL, *relpath, *p, *cp;
3313	int error = 0, pathbuflen = MAXPATHLEN, i, mntflags = 0, referral, skipcopy = 0;
3314	size_t nlen;
3315	struct xdrbuf xb, xbnew;
3316	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
3317	uint32_t newmattrs[NFS_MATTR_BITMAP_LEN];
3318	uint32_t newmflags[NFS_MFLAG_BITMAP_LEN];
3319	uint32_t newmflags_mask[NFS_MFLAG_BITMAP_LEN];
3320	uint32_t argslength = 0, val, count, mlen, mlen2, rlen, relpathcomps;
3321	uint32_t argslength_offset, attrslength_offset, end_offset;
3322	uint32_t numlocs, loc, numserv, serv, numaddr, addr, numcomp, comp;
3323	char buf[XDRWORD];
3324	struct nfs_fs_locations nfsls;
3325
3326	referral = (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL);
3327	if (referral)
3328		bzero(&nfsls, sizeof(nfsls));
3329
3330	xb_init(&xbnew, 0);
3331
3332	if (!nmp || (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)))
3333		return (ENXIO);
3334
3335	/* allocate a couple path buffers we need */
3336	MALLOC_ZONE(mntfromname, char *, pathbuflen, M_NAMEI, M_WAITOK);
3337	if (!mntfromname) {
3338		error = ENOMEM;
3339		goto nfsmerr;
3340	}
3341	MALLOC_ZONE(path, char *, pathbuflen, M_NAMEI, M_WAITOK);
3342	if (!path) {
3343		error = ENOMEM;
3344		goto nfsmerr;
3345	}
3346
3347	/* get the path for the directory being mounted on */
3348	error = vn_getpath(vp, path, &pathbuflen);
3349	if (error) {
3350		error = ENOMEM;
3351		goto nfsmerr;
3352	}
3353
3354	/*
3355	 * Set up the mntfromname for the new mount based on the
3356	 * current mount's mntfromname and the directory's path
3357	 * relative to the current mount's mntonname.
3358	 * Set up relpath to point at the relative path on the current mount.
3359	 * Also, count the number of components in relpath.
3360	 * We'll be adding those to each fs location path in the new args.
3361	 */
3362	nlen = strlcpy(mntfromname, vfs_statfs(nmp->nm_mountp)->f_mntfromname, MAXPATHLEN);
3363	if ((nlen > 0) && (mntfromname[nlen-1] == '/')) { /* avoid double '/' in new name */
3364		mntfromname[nlen-1] = '\0';
3365		nlen--;
3366	}
3367	relpath = mntfromname + nlen;
3368	nlen = strlcat(mntfromname, path + strlen(vfs_statfs(nmp->nm_mountp)->f_mntonname), MAXPATHLEN);
3369	if (nlen >= MAXPATHLEN) {
3370		error = ENAMETOOLONG;
3371		goto nfsmerr;
3372	}
3373	/* count the number of components in relpath */
3374	p = relpath;
3375	while (*p && (*p == '/'))
3376		p++;
3377	relpathcomps = 0;
3378	while (*p) {
3379		relpathcomps++;
3380		while (*p && (*p != '/'))
3381			p++;
3382		while (*p && (*p == '/'))
3383			p++;
3384	}
3385
3386	/* grab a copy of the file system type */
3387	vfs_name(vnode_mount(vp), fstype);
3388
3389	/* for referrals, fetch the fs locations */
3390	if (referral) {
3391		const char *vname = vnode_getname(NFSTOV(np));
3392		if (!vname) {
3393			error = ENOENT;
3394		} else {
3395			error = nfs4_get_fs_locations(nmp, dnp, NULL, 0, vname, ctx, &nfsls);
3396			vnode_putname(vname);
3397			if (!error && (nfsls.nl_numlocs < 1))
3398				error = ENOENT;
3399		}
3400		nfsmerr_if(error);
3401	}
3402
3403	/* set up NFS mount args based on current mount args */
3404
3405#define xb_copy_32(E, XBSRC, XBDST, V) \
3406	do { \
3407		if (E) break; \
3408		xb_get_32((E), (XBSRC), (V)); \
3409		if (skipcopy) break; \
3410		xb_add_32((E), (XBDST), (V)); \
3411	} while (0)
3412#define xb_copy_opaque(E, XBSRC, XBDST) \
3413	do { \
3414		uint32_t __count, __val; \
3415		xb_copy_32((E), (XBSRC), (XBDST), __count); \
3416		if (E) break; \
3417		__count = nfsm_rndup(__count); \
3418		__count /= XDRWORD; \
3419		while (__count-- > 0) \
3420			xb_copy_32((E), (XBSRC), (XBDST), __val); \
3421	} while (0)
3422
3423	xb_init_buffer(&xb, nmp->nm_args, 2*XDRWORD);
3424	xb_get_32(error, &xb, val); /* version */
3425	xb_get_32(error, &xb, argslength); /* args length */
3426	xb_init_buffer(&xb, nmp->nm_args, argslength);
3427
3428	xb_init_buffer(&xbnew, NULL, 0);
3429	xb_copy_32(error, &xb, &xbnew, val); /* version */
3430	argslength_offset = xb_offset(&xbnew);
3431	xb_copy_32(error, &xb, &xbnew, val); /* args length */
3432	xb_copy_32(error, &xb, &xbnew, val); /* XDR args version */
3433	count = NFS_MATTR_BITMAP_LEN;
3434	xb_get_bitmap(error, &xb, mattrs, count); /* mount attribute bitmap */
3435	nfsmerr_if(error);
3436	for (i = 0; i < NFS_MATTR_BITMAP_LEN; i++)
3437		newmattrs[i] = mattrs[i];
3438	if (referral)
3439		NFS_BITMAP_SET(newmattrs, NFS_MATTR_FS_LOCATIONS);
3440	else
3441		NFS_BITMAP_SET(newmattrs, NFS_MATTR_FH);
3442	NFS_BITMAP_SET(newmattrs, NFS_MATTR_FLAGS);
3443	NFS_BITMAP_SET(newmattrs, NFS_MATTR_MNTFLAGS);
3444	NFS_BITMAP_CLR(newmattrs, NFS_MATTR_MNTFROM);
3445	xb_add_bitmap(error, &xbnew, newmattrs, NFS_MATTR_BITMAP_LEN);
3446	attrslength_offset = xb_offset(&xbnew);
3447	xb_copy_32(error, &xb, &xbnew, val); /* attrs length */
3448	NFS_BITMAP_ZERO(newmflags_mask, NFS_MFLAG_BITMAP_LEN);
3449	NFS_BITMAP_ZERO(newmflags, NFS_MFLAG_BITMAP_LEN);
3450	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FLAGS)) {
3451		count = NFS_MFLAG_BITMAP_LEN;
3452		xb_get_bitmap(error, &xb, newmflags_mask, count); /* mount flag mask bitmap */
3453		count = NFS_MFLAG_BITMAP_LEN;
3454		xb_get_bitmap(error, &xb, newmflags, count); /* mount flag bitmap */
3455	}
3456	NFS_BITMAP_SET(newmflags_mask, NFS_MFLAG_EPHEMERAL);
3457	NFS_BITMAP_SET(newmflags, NFS_MFLAG_EPHEMERAL);
3458	xb_add_bitmap(error, &xbnew, newmflags_mask, NFS_MFLAG_BITMAP_LEN);
3459	xb_add_bitmap(error, &xbnew, newmflags, NFS_MFLAG_BITMAP_LEN);
3460	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION))
3461		xb_copy_32(error, &xb, &xbnew, val);
3462	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION))
3463		xb_copy_32(error, &xb, &xbnew, val);
3464	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
3465		xb_copy_32(error, &xb, &xbnew, val);
3466	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
3467		xb_copy_32(error, &xb, &xbnew, val);
3468	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READDIR_SIZE))
3469		xb_copy_32(error, &xb, &xbnew, val);
3470	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READAHEAD))
3471		xb_copy_32(error, &xb, &xbnew, val);
3472	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN)) {
3473		xb_copy_32(error, &xb, &xbnew, val);
3474		xb_copy_32(error, &xb, &xbnew, val);
3475	}
3476	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX)) {
3477		xb_copy_32(error, &xb, &xbnew, val);
3478		xb_copy_32(error, &xb, &xbnew, val);
3479	}
3480	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN)) {
3481		xb_copy_32(error, &xb, &xbnew, val);
3482		xb_copy_32(error, &xb, &xbnew, val);
3483	}
3484	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX)) {
3485		xb_copy_32(error, &xb, &xbnew, val);
3486		xb_copy_32(error, &xb, &xbnew, val);
3487	}
3488	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_LOCK_MODE))
3489		xb_copy_32(error, &xb, &xbnew, val);
3490	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SECURITY)) {
3491		xb_copy_32(error, &xb, &xbnew, count);
3492		while (!error && (count-- > 0))
3493			xb_copy_32(error, &xb, &xbnew, val);
3494	}
3495	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MAX_GROUP_LIST))
3496		xb_copy_32(error, &xb, &xbnew, val);
3497	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOCKET_TYPE))
3498		xb_copy_opaque(error, &xb, &xbnew);
3499	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_PORT))
3500		xb_copy_32(error, &xb, &xbnew, val);
3501	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MOUNT_PORT))
3502		xb_copy_32(error, &xb, &xbnew, val);
3503	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REQUEST_TIMEOUT)) {
3504		xb_copy_32(error, &xb, &xbnew, val);
3505		xb_copy_32(error, &xb, &xbnew, val);
3506	}
3507	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT))
3508		xb_copy_32(error, &xb, &xbnew, val);
3509	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_DEAD_TIMEOUT)) {
3510		xb_copy_32(error, &xb, &xbnew, val);
3511		xb_copy_32(error, &xb, &xbnew, val);
3512	}
3513	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FH)) {
3514		xb_get_32(error, &xb, count);
3515		xb_skip(error, &xb, count);
3516	}
3517	if (!referral) {
3518		/* set the initial file handle to the directory's file handle */
3519		xb_add_fh(error, &xbnew, np->n_fhp, np->n_fhsize);
3520	}
3521	/* copy/extend/skip fs locations */
3522	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FS_LOCATIONS)) {
3523		numlocs = numserv = numaddr = numcomp = 0;
3524		if (referral) /* don't copy the fs locations for a referral */
3525			skipcopy = 1;
3526		xb_copy_32(error, &xb, &xbnew, numlocs); /* location count */
3527		for (loc = 0; !error && (loc < numlocs); loc++) {
3528			xb_copy_32(error, &xb, &xbnew, numserv); /* server count */
3529			for (serv = 0; !error && (serv < numserv); serv++) {
3530				xb_copy_opaque(error, &xb, &xbnew); /* server name */
3531				xb_copy_32(error, &xb, &xbnew, numaddr); /* address count */
3532				for (addr = 0; !error && (addr < numaddr); addr++)
3533					xb_copy_opaque(error, &xb, &xbnew); /* address */
3534				xb_copy_opaque(error, &xb, &xbnew); /* server info */
3535			}
3536			/* pathname */
3537			xb_get_32(error, &xb, numcomp); /* component count */
3538			if (!skipcopy)
3539				xb_add_32(error, &xbnew, numcomp+relpathcomps); /* new component count */
3540			for (comp = 0; !error && (comp < numcomp); comp++)
3541				xb_copy_opaque(error, &xb, &xbnew); /* component */
3542			/* add additional components */
3543			for (comp = 0; !skipcopy && !error && (comp < relpathcomps); comp++) {
3544				p = relpath;
3545				while (*p && (*p == '/'))
3546					p++;
3547				while (*p && !error) {
3548					cp = p;
3549					while (*p && (*p != '/'))
3550						p++;
3551					xb_add_string(error, &xbnew, cp, (p - cp)); /* component */
3552					while (*p && (*p == '/'))
3553						p++;
3554				}
3555			}
3556			xb_copy_opaque(error, &xb, &xbnew); /* fs location info */
3557		}
3558		if (referral)
3559			skipcopy = 0;
3560	}
3561	if (referral) {
3562		/* add referral's fs locations */
3563		xb_add_32(error, &xbnew, nfsls.nl_numlocs);			/* FS_LOCATIONS */
3564		for (loc = 0; !error && (loc < nfsls.nl_numlocs); loc++) {
3565			xb_add_32(error, &xbnew, nfsls.nl_locations[loc]->nl_servcount);
3566			for (serv = 0; !error && (serv < nfsls.nl_locations[loc]->nl_servcount); serv++) {
3567				xb_add_string(error, &xbnew, nfsls.nl_locations[loc]->nl_servers[serv]->ns_name,
3568					strlen(nfsls.nl_locations[loc]->nl_servers[serv]->ns_name));
3569				xb_add_32(error, &xbnew, nfsls.nl_locations[loc]->nl_servers[serv]->ns_addrcount);
3570				for (addr = 0; !error && (addr < nfsls.nl_locations[loc]->nl_servers[serv]->ns_addrcount); addr++)
3571					xb_add_string(error, &xbnew, nfsls.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr],
3572						strlen(nfsls.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr]));
3573				xb_add_32(error, &xbnew, 0); /* empty server info */
3574			}
3575			xb_add_32(error, &xbnew, nfsls.nl_locations[loc]->nl_path.np_compcount);
3576			for (comp = 0; !error && (comp < nfsls.nl_locations[loc]->nl_path.np_compcount); comp++)
3577				xb_add_string(error, &xbnew, nfsls.nl_locations[loc]->nl_path.np_components[comp],
3578					strlen(nfsls.nl_locations[loc]->nl_path.np_components[comp]));
3579			xb_add_32(error, &xbnew, 0); /* empty fs location info */
3580		}
3581	}
3582	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFLAGS))
3583		xb_get_32(error, &xb, mntflags);
3584	/*
3585	 * We add the following mount flags to the ones for the mounted-on mount:
3586	 * MNT_DONTBROWSE - to keep the mount from showing up as a separate volume
3587	 * MNT_AUTOMOUNTED - to keep DiskArb from retriggering the mount after
3588	 *                   an unmount (looking for /.autodiskmounted)
3589	 */
3590	mntflags |= (MNT_AUTOMOUNTED | MNT_DONTBROWSE);
3591	xb_add_32(error, &xbnew, mntflags);
3592	if (!referral && NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFROM)) {
3593		/* copy mntfrom string and add relpath */
3594		rlen = strlen(relpath);
3595		xb_get_32(error, &xb, mlen);
3596		nfsmerr_if(error);
3597		mlen2 = mlen + ((relpath[0] != '/') ? 1 : 0) + rlen;
3598		xb_add_32(error, &xbnew, mlen2);
3599		count = mlen/XDRWORD;
3600		/* copy the original string */
3601		while (count-- > 0)
3602			xb_copy_32(error, &xb, &xbnew, val);
3603		if (!error && (mlen % XDRWORD)) {
3604			error = xb_get_bytes(&xb, buf, mlen%XDRWORD, 0);
3605			if (!error)
3606				error = xb_add_bytes(&xbnew, buf, mlen%XDRWORD, 1);
3607		}
3608		/* insert a '/' if the relative path doesn't start with one */
3609		if (!error && (relpath[0] != '/')) {
3610			buf[0] = '/';
3611			error = xb_add_bytes(&xbnew, buf, 1, 1);
3612		}
3613		/* add the additional relative path */
3614		if (!error)
3615			error = xb_add_bytes(&xbnew, relpath, rlen, 1);
3616		/* make sure the resulting string has the right number of pad bytes */
3617		if (!error && (mlen2 != nfsm_rndup(mlen2))) {
3618			bzero(buf, sizeof(buf));
3619			count = nfsm_rndup(mlen2) - mlen2;
3620			error = xb_add_bytes(&xbnew, buf, count, 1);
3621		}
3622	}
3623	xb_build_done(error, &xbnew);
3624
3625	/* update opaque counts */
3626	end_offset = xb_offset(&xbnew);
3627	if (!error) {
3628		error = xb_seek(&xbnew, argslength_offset);
3629		argslength = end_offset - argslength_offset + XDRWORD/*version*/;
3630		xb_add_32(error, &xbnew, argslength);
3631	}
3632	if (!error) {
3633		error = xb_seek(&xbnew, attrslength_offset);
3634		xb_add_32(error, &xbnew, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
3635	}
3636	nfsmerr_if(error);
3637
3638	/*
3639	 * For kernel_mount() call, use the existing mount flags (instead of the
3640	 * original flags) because flags like MNT_NOSUID and MNT_NODEV may have
3641	 * been silently enforced.
3642	 */
3643	mntflags = vnode_vfsvisflags(vp);
3644	mntflags |= (MNT_AUTOMOUNTED | MNT_DONTBROWSE);
3645
3646	/* do the mount */
3647	error = kernel_mount(fstype, dvp, vp, path, xb_buffer_base(&xbnew), argslength,
3648			mntflags, KERNEL_MOUNT_PERMIT_UNMOUNT | KERNEL_MOUNT_NOAUTH, ctx);
3649
3650nfsmerr:
3651	if (error)
3652		printf("nfs: mirror mount of %s on %s failed (%d)\n",
3653			mntfromname, path, error);
3654	/* clean up */
3655	xb_cleanup(&xbnew);
3656	if (referral)
3657		nfs_fs_locations_cleanup(&nfsls);
3658	if (path)
3659		FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
3660	if (mntfromname)
3661		FREE_ZONE(mntfromname, MAXPATHLEN, M_NAMEI);
3662	if (!error)
3663		nfs_ephemeral_mount_harvester_start();
3664	return (error);
3665}
3666
3667/*
3668 * trigger vnode functions
3669 */
3670
3671resolver_result_t
3672nfs_mirror_mount_trigger_resolve(
3673	vnode_t vp,
3674	const struct componentname *cnp,
3675	enum path_operation pop,
3676	__unused int flags,
3677	__unused void *data,
3678	vfs_context_t ctx)
3679{
3680	nfsnode_t np = VTONFS(vp);
3681	vnode_t pvp = NULLVP;
3682	int error = 0;
3683	resolver_result_t result;
3684
3685	/*
3686	 * We have a trigger node that doesn't have anything mounted on it yet.
3687	 * We'll do the mount if either:
3688	 * (a) this isn't the last component of the path OR
3689	 * (b) this is an op that looks like it should trigger the mount.
3690	 */
3691	if (cnp->cn_flags & ISLASTCN) {
3692		switch (pop) {
3693		case OP_MOUNT:
3694		case OP_UNMOUNT:
3695		case OP_STATFS:
3696		case OP_LINK:
3697		case OP_UNLINK:
3698		case OP_RENAME:
3699		case OP_MKNOD:
3700		case OP_MKFIFO:
3701		case OP_SYMLINK:
3702		case OP_ACCESS:
3703		case OP_GETATTR:
3704		case OP_MKDIR:
3705		case OP_RMDIR:
3706		case OP_REVOKE:
3707		case OP_GETXATTR:
3708		case OP_LISTXATTR:
3709			/* don't perform the mount for these operations */
3710			result = vfs_resolver_result(np->n_trigseq, RESOLVER_NOCHANGE, 0);
3711#ifdef NFS_TRIGGER_DEBUG
3712			NP(np, "nfs trigger RESOLVE: no change, last %d nameiop %d, seq %d",
3713				(cnp->cn_flags & ISLASTCN) ? 1 : 0, cnp->cn_nameiop, np->n_trigseq);
3714#endif
3715			return (result);
3716		case OP_OPEN:
3717		case OP_CHDIR:
3718		case OP_CHROOT:
3719		case OP_TRUNCATE:
3720		case OP_COPYFILE:
3721		case OP_PATHCONF:
3722		case OP_READLINK:
3723		case OP_SETATTR:
3724		case OP_EXCHANGEDATA:
3725		case OP_SEARCHFS:
3726		case OP_FSCTL:
3727		case OP_SETXATTR:
3728		case OP_REMOVEXATTR:
3729		default:
3730			/* go ahead and do the mount */
3731			break;
3732		}
3733	}
3734
3735	if (vnode_mountedhere(vp) != NULL) {
3736		/*
3737		 * Um... there's already something mounted.
3738		 * Been there.  Done that.  Let's just say it succeeded.
3739		 */
3740		error = 0;
3741		goto skipmount;
3742	}
3743
3744	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx)))) {
3745		result = vfs_resolver_result(np->n_trigseq, RESOLVER_ERROR, error);
3746#ifdef NFS_TRIGGER_DEBUG
3747		NP(np, "nfs trigger RESOLVE: busy error %d, last %d nameiop %d, seq %d",
3748			error, (cnp->cn_flags & ISLASTCN) ? 1 : 0, cnp->cn_nameiop, np->n_trigseq);
3749#endif
3750		return (result);
3751	}
3752
3753	pvp = vnode_getparent(vp);
3754	if (pvp == NULLVP)
3755		error = EINVAL;
3756	if (!error)
3757		error = nfs_mirror_mount_domount(pvp, vp, ctx);
3758skipmount:
3759	if (!error)
3760		np->n_trigseq++;
3761	result = vfs_resolver_result(np->n_trigseq, error ? RESOLVER_ERROR : RESOLVER_RESOLVED, error);
3762#ifdef NFS_TRIGGER_DEBUG
3763	NP(np, "nfs trigger RESOLVE: %s %d, last %d nameiop %d, seq %d",
3764		error ? "error" : "resolved", error,
3765		(cnp->cn_flags & ISLASTCN) ? 1 : 0, cnp->cn_nameiop, np->n_trigseq);
3766#endif
3767
3768	if (pvp != NULLVP)
3769		vnode_put(pvp);
3770	nfs_node_clear_busy(np);
3771	return (result);
3772}
3773
3774resolver_result_t
3775nfs_mirror_mount_trigger_unresolve(
3776	vnode_t vp,
3777	int flags,
3778	__unused void *data,
3779	vfs_context_t ctx)
3780{
3781	nfsnode_t np = VTONFS(vp);
3782	mount_t mp;
3783	int error;
3784	resolver_result_t result;
3785
3786	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx)))) {
3787		result = vfs_resolver_result(np->n_trigseq, RESOLVER_ERROR, error);
3788#ifdef NFS_TRIGGER_DEBUG
3789		NP(np, "nfs trigger UNRESOLVE: busy error %d, seq %d", error, np->n_trigseq);
3790#endif
3791		return (result);
3792	}
3793
3794	mp = vnode_mountedhere(vp);
3795	if (!mp)
3796		error = EINVAL;
3797	if (!error)
3798		error = vfs_unmountbyfsid(&(vfs_statfs(mp)->f_fsid), flags, ctx);
3799	if (!error)
3800		np->n_trigseq++;
3801	result = vfs_resolver_result(np->n_trigseq, error ? RESOLVER_ERROR : RESOLVER_UNRESOLVED, error);
3802#ifdef NFS_TRIGGER_DEBUG
3803	NP(np, "nfs trigger UNRESOLVE: %s %d, seq %d",
3804		error ? "error" : "unresolved", error, np->n_trigseq);
3805#endif
3806	nfs_node_clear_busy(np);
3807	return (result);
3808}
3809
3810resolver_result_t
3811nfs_mirror_mount_trigger_rearm(
3812	vnode_t vp,
3813	__unused int flags,
3814	__unused void *data,
3815	vfs_context_t ctx)
3816{
3817	nfsnode_t np = VTONFS(vp);
3818	int error;
3819	resolver_result_t result;
3820
3821	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx)))) {
3822		result = vfs_resolver_result(np->n_trigseq, RESOLVER_ERROR, error);
3823#ifdef NFS_TRIGGER_DEBUG
3824		NP(np, "nfs trigger REARM: busy error %d, seq %d", error, np->n_trigseq);
3825#endif
3826		return (result);
3827	}
3828
3829	np->n_trigseq++;
3830	result = vfs_resolver_result(np->n_trigseq,
3831			vnode_mountedhere(vp) ? RESOLVER_RESOLVED : RESOLVER_UNRESOLVED, 0);
3832#ifdef NFS_TRIGGER_DEBUG
3833	NP(np, "nfs trigger REARM: %s, seq %d",
3834		vnode_mountedhere(vp) ? "resolved" : "unresolved", np->n_trigseq);
3835#endif
3836	nfs_node_clear_busy(np);
3837	return (result);
3838}
3839
3840/*
3841 * Periodically attempt to unmount ephemeral (mirror) mounts in an attempt to limit
3842 * the number of unused mounts.
3843 */
3844
3845#define NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL	120	/* how often the harvester runs */
3846struct nfs_ephemeral_mount_harvester_info {
3847	fsid_t		fsid;		/* FSID that we need to try to unmount */
3848	uint32_t	mountcount;	/* count of ephemeral mounts seen in scan */
3849 };
3850/* various globals for the harvester */
3851static thread_call_t nfs_ephemeral_mount_harvester_timer = NULL;
3852static int nfs_ephemeral_mount_harvester_on = 0;
3853
3854kern_return_t thread_terminate(thread_t);
3855
3856static int
3857nfs_ephemeral_mount_harvester_callback(mount_t mp, void *arg)
3858{
3859	struct nfs_ephemeral_mount_harvester_info *hinfo = arg;
3860	struct nfsmount *nmp;
3861	struct timeval now;
3862
3863	if (strcmp(mp->mnt_vfsstat.f_fstypename, "nfs"))
3864		return (VFS_RETURNED);
3865	nmp = VFSTONFS(mp);
3866	if (!nmp || !NMFLAG(nmp, EPHEMERAL))
3867		return (VFS_RETURNED);
3868	hinfo->mountcount++;
3869
3870	/* avoid unmounting mounts that have been triggered within the last harvest interval */
3871	microtime(&now);
3872	if ((nmp->nm_mounttime >> 32) > ((uint32_t)now.tv_sec - NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL))
3873		return (VFS_RETURNED);
3874
3875	if (hinfo->fsid.val[0] || hinfo->fsid.val[1]) {
3876		/* attempt to unmount previously-found ephemeral mount */
3877		vfs_unmountbyfsid(&hinfo->fsid, 0, vfs_context_kernel());
3878		hinfo->fsid.val[0] = hinfo->fsid.val[1] = 0;
3879	}
3880
3881	/*
3882	 * We can't call unmount here since we hold a mount iter ref
3883	 * on mp so save its fsid for the next call iteration to unmount.
3884	 */
3885	hinfo->fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0];
3886	hinfo->fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1];
3887
3888	return (VFS_RETURNED);
3889}
3890
3891/*
3892 * Spawn a thread to do the ephemeral mount harvesting.
3893 */
3894static void
3895nfs_ephemeral_mount_harvester_timer_func(void)
3896{
3897	thread_t thd;
3898
3899	if (kernel_thread_start(nfs_ephemeral_mount_harvester, NULL, &thd) == KERN_SUCCESS)
3900		thread_deallocate(thd);
3901}
3902
3903/*
3904 * Iterate all mounts looking for NFS ephemeral mounts to try to unmount.
3905 */
3906void
3907nfs_ephemeral_mount_harvester(__unused void *arg, __unused wait_result_t wr)
3908{
3909	struct nfs_ephemeral_mount_harvester_info hinfo;
3910	uint64_t deadline;
3911
3912	hinfo.mountcount = 0;
3913	hinfo.fsid.val[0] = hinfo.fsid.val[1] = 0;
3914	vfs_iterate(VFS_ITERATE_TAIL_FIRST, nfs_ephemeral_mount_harvester_callback, &hinfo);
3915	if (hinfo.fsid.val[0] || hinfo.fsid.val[1]) {
3916		/* attempt to unmount last found ephemeral mount */
3917		vfs_unmountbyfsid(&hinfo.fsid, 0, vfs_context_kernel());
3918	}
3919
3920	lck_mtx_lock(nfs_global_mutex);
3921	if (!hinfo.mountcount) {
3922		/* no more ephemeral mounts - don't need timer */
3923		nfs_ephemeral_mount_harvester_on = 0;
3924	} else {
3925		/* re-arm the timer */
3926		clock_interval_to_deadline(NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL, NSEC_PER_SEC, &deadline);
3927		thread_call_enter_delayed(nfs_ephemeral_mount_harvester_timer, deadline);
3928		nfs_ephemeral_mount_harvester_on = 1;
3929	}
3930	lck_mtx_unlock(nfs_global_mutex);
3931
3932	/* thread done */
3933	thread_terminate(current_thread());
3934}
3935
3936/*
3937 * Make sure the NFS ephemeral mount harvester timer is running.
3938 */
3939void
3940nfs_ephemeral_mount_harvester_start(void)
3941{
3942	uint64_t deadline;
3943
3944	lck_mtx_lock(nfs_global_mutex);
3945	if (nfs_ephemeral_mount_harvester_on) {
3946		lck_mtx_unlock(nfs_global_mutex);
3947		return;
3948	}
3949	if (nfs_ephemeral_mount_harvester_timer == NULL)
3950		nfs_ephemeral_mount_harvester_timer = thread_call_allocate((thread_call_func_t)nfs_ephemeral_mount_harvester_timer_func, NULL);
3951	clock_interval_to_deadline(NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL, NSEC_PER_SEC, &deadline);
3952	thread_call_enter_delayed(nfs_ephemeral_mount_harvester_timer, deadline);
3953	nfs_ephemeral_mount_harvester_on = 1;
3954	lck_mtx_unlock(nfs_global_mutex);
3955}
3956
3957#endif
3958
3959/*
3960 * Send a MOUNT protocol MOUNT request to the server to get the initial file handle (and security).
3961 */
3962int
3963nfs3_mount_rpc(struct nfsmount *nmp, struct sockaddr *sa, int sotype, int nfsvers, char *path, vfs_context_t ctx, int timeo, fhandle_t *fh, struct nfs_sec *sec)
3964{
3965	int error = 0, slen, mntproto;
3966	thread_t thd = vfs_context_thread(ctx);
3967	kauth_cred_t cred = vfs_context_ucred(ctx);
3968	uint64_t xid = 0;
3969	struct nfsm_chain nmreq, nmrep;
3970	mbuf_t mreq;
3971	uint32_t mntvers, mntport, val;
3972	struct sockaddr_storage ss;
3973	struct sockaddr *saddr = (struct sockaddr*)&ss;
3974
3975	nfsm_chain_null(&nmreq);
3976	nfsm_chain_null(&nmrep);
3977
3978	mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
3979	mntproto = (NM_OMFLAG(nmp, MNTUDP) || (sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
3980	sec->count = 0;
3981
3982	bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
3983	if (saddr->sa_family == AF_INET) {
3984		if (nmp->nm_mountport)
3985			((struct sockaddr_in*)saddr)->sin_port = htons(nmp->nm_mountport);
3986		mntport = ntohs(((struct sockaddr_in*)saddr)->sin_port);
3987	} else {
3988		if (nmp->nm_mountport)
3989			((struct sockaddr_in6*)saddr)->sin6_port = htons(nmp->nm_mountport);
3990		mntport = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
3991	}
3992
3993	while (!mntport) {
3994		error = nfs_portmap_lookup(nmp, ctx, saddr, NULL, RPCPROG_MNT, mntvers, mntproto, timeo);
3995		nfsmout_if(error);
3996		if (saddr->sa_family == AF_INET)
3997			mntport = ntohs(((struct sockaddr_in*)saddr)->sin_port);
3998		else
3999			mntport = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
4000		if (!mntport) {
4001			/* if not found and TCP, then retry with UDP */
4002			if (mntproto == IPPROTO_UDP) {
4003				error = EPROGUNAVAIL;
4004				break;
4005			}
4006			mntproto = IPPROTO_UDP;
4007			bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
4008		}
4009	}
4010	nfsmout_if(error || !mntport);
4011
4012	/* MOUNT protocol MOUNT request */
4013	slen = strlen(path);
4014	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_UNSIGNED + nfsm_rndup(slen));
4015	nfsm_chain_add_name(error, &nmreq, path, slen, nmp);
4016	nfsm_chain_build_done(error, &nmreq);
4017	nfsmout_if(error);
4018	error = nfsm_rpchead2(nmp, (mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
4019			RPCPROG_MNT, mntvers, RPCMNT_MOUNT,
4020			RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
4021	nfsmout_if(error);
4022	nmreq.nmc_mhead = NULL;
4023	error = nfs_aux_request(nmp, thd, saddr, NULL,
4024			((mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM),
4025			mreq, R_XID32(xid), 1, timeo, &nmrep);
4026	nfsmout_if(error);
4027	nfsm_chain_get_32(error, &nmrep, val);
4028	if (!error && val)
4029		error = val;
4030	nfsm_chain_get_fh(error, &nmrep, nfsvers, fh);
4031	if (!error && (nfsvers > NFS_VER2)) {
4032		sec->count = NX_MAX_SEC_FLAVORS;
4033		error = nfsm_chain_get_secinfo(&nmrep, &sec->flavors[0], &sec->count);
4034	}
4035nfsmout:
4036	nfsm_chain_cleanup(&nmreq);
4037	nfsm_chain_cleanup(&nmrep);
4038	return (error);
4039}
4040
4041
4042/*
4043 * Send a MOUNT protocol UNMOUNT request to tell the server we've unmounted it.
4044 */
4045void
4046nfs3_umount_rpc(struct nfsmount *nmp, vfs_context_t ctx, int timeo)
4047{
4048	int error = 0, slen, mntproto;
4049	thread_t thd = vfs_context_thread(ctx);
4050	kauth_cred_t cred = vfs_context_ucred(ctx);
4051	char *path;
4052	uint64_t xid = 0;
4053	struct nfsm_chain nmreq, nmrep;
4054	mbuf_t mreq;
4055	uint32_t mntvers, mntport;
4056	struct sockaddr_storage ss;
4057	struct sockaddr *saddr = (struct sockaddr*)&ss;
4058
4059	if (!nmp->nm_saddr)
4060		return;
4061
4062	nfsm_chain_null(&nmreq);
4063	nfsm_chain_null(&nmrep);
4064
4065	mntvers = (nmp->nm_vers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
4066	mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nmp->nm_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
4067	mntport = nmp->nm_mountport;
4068
4069	bcopy(nmp->nm_saddr, saddr, min(sizeof(ss), nmp->nm_saddr->sa_len));
4070	if (saddr->sa_family == AF_INET)
4071		((struct sockaddr_in*)saddr)->sin_port = htons(mntport);
4072	else
4073		((struct sockaddr_in6*)saddr)->sin6_port = htons(mntport);
4074
4075	while (!mntport) {
4076		error = nfs_portmap_lookup(nmp, ctx, saddr, NULL, RPCPROG_MNT, mntvers, mntproto, timeo);
4077  		nfsmout_if(error);
4078		if (saddr->sa_family == AF_INET)
4079			mntport = ntohs(((struct sockaddr_in*)saddr)->sin_port);
4080		else
4081			mntport = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
4082		/* if not found and mntvers > VER1, then retry with VER1 */
4083		if (!mntport) {
4084			if (mntvers > RPCMNT_VER1) {
4085				mntvers = RPCMNT_VER1;
4086			} else if (mntproto == IPPROTO_TCP) {
4087				mntproto = IPPROTO_UDP;
4088				mntvers = (nmp->nm_vers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
4089			} else {
4090				break;
4091			}
4092			bcopy(nmp->nm_saddr, saddr, min(sizeof(ss), nmp->nm_saddr->sa_len));
4093		}
4094	}
4095	nfsmout_if(!mntport);
4096
4097	/* MOUNT protocol UNMOUNT request */
4098	path = &vfs_statfs(nmp->nm_mountp)->f_mntfromname[0];
4099	while (*path && (*path != '/'))
4100		path++;
4101	slen = strlen(path);
4102	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_UNSIGNED + nfsm_rndup(slen));
4103	nfsm_chain_add_name(error, &nmreq, path, slen, nmp);
4104	nfsm_chain_build_done(error, &nmreq);
4105	nfsmout_if(error);
4106	error = nfsm_rpchead2(nmp, (mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
4107			RPCPROG_MNT, RPCMNT_VER1, RPCMNT_UMOUNT,
4108			RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
4109	nfsmout_if(error);
4110	nmreq.nmc_mhead = NULL;
4111	error = nfs_aux_request(nmp, thd, saddr, NULL,
4112		((mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM),
4113		mreq, R_XID32(xid), 1, timeo, &nmrep);
4114nfsmout:
4115	nfsm_chain_cleanup(&nmreq);
4116	nfsm_chain_cleanup(&nmrep);
4117}
4118
4119/*
4120 * unmount system call
4121 */
4122int
4123nfs_vfs_unmount(
4124	mount_t mp,
4125	int mntflags,
4126	__unused vfs_context_t ctx)
4127{
4128	struct nfsmount *nmp;
4129	vnode_t vp;
4130	int error, flags = 0;
4131	struct timespec ts = { 1, 0 };
4132
4133	nmp = VFSTONFS(mp);
4134	lck_mtx_lock(&nmp->nm_lock);
4135	/*
4136	 * Set the flag indicating that an unmount attempt is in progress.
4137	 */
4138	nmp->nm_state |= NFSSTA_UNMOUNTING;
4139	/*
4140	 * During a force unmount we want to...
4141	 *   Mark that we are doing a force unmount.
4142	 *   Make the mountpoint soft.
4143	 */
4144	if (mntflags & MNT_FORCE) {
4145		flags |= FORCECLOSE;
4146		nmp->nm_state |= NFSSTA_FORCE;
4147		NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT);
4148	}
4149	/*
4150	 * Wait for any in-progress monitored node scan to complete.
4151	 */
4152	while (nmp->nm_state & NFSSTA_MONITOR_SCAN)
4153		msleep(&nmp->nm_state, &nmp->nm_lock, PZERO-1, "nfswaitmonscan", &ts);
4154	/*
4155	 * Goes something like this..
4156	 * - Call vflush() to clear out vnodes for this file system,
4157	 *   except for the swap files. Deal with them in 2nd pass.
4158	 * - Decrement reference on the vnode representing remote root.
4159	 * - Clean up the NFS mount structure.
4160	 */
4161	vp = NFSTOV(nmp->nm_dnp);
4162	lck_mtx_unlock(&nmp->nm_lock);
4163
4164	/*
4165	 * vflush will check for busy vnodes on mountpoint.
4166	 * Will do the right thing for MNT_FORCE. That is, we should
4167	 * not get EBUSY back.
4168	 */
4169	error = vflush(mp, vp, SKIPSWAP | flags);
4170	if (mntflags & MNT_FORCE) {
4171		error = vflush(mp, NULLVP, flags); /* locks vp in the process */
4172	} else {
4173		if (vnode_isinuse(vp, 1))
4174			error = EBUSY;
4175		else
4176			error = vflush(mp, vp, flags);
4177	}
4178	if (error) {
4179		lck_mtx_lock(&nmp->nm_lock);
4180		nmp->nm_state &= ~NFSSTA_UNMOUNTING;
4181		lck_mtx_unlock(&nmp->nm_lock);
4182		return (error);
4183	}
4184
4185	lck_mtx_lock(&nmp->nm_lock);
4186	nmp->nm_dnp = NULL;
4187	lck_mtx_unlock(&nmp->nm_lock);
4188
4189	/*
4190	 * Release the root vnode reference held by mountnfs()
4191	 */
4192	error = vnode_get(vp);
4193	vnode_rele(vp);
4194	if (!error)
4195		vnode_put(vp);
4196
4197	vflush(mp, NULLVP, FORCECLOSE);
4198
4199	/* Wait for all other references to be released and free the mount */
4200	nfs_mount_drain_and_cleanup(nmp);
4201
4202	return (0);
4203}
4204
4205/*
4206 * cleanup/destroy NFS fs locations structure
4207 */
4208void
4209nfs_fs_locations_cleanup(struct nfs_fs_locations *nfslsp)
4210{
4211	struct nfs_fs_location *fsl;
4212	struct nfs_fs_server *fss;
4213	struct nfs_fs_path *fsp;
4214	uint32_t loc, serv, addr, comp;
4215
4216	/* free up fs locations */
4217	if (!nfslsp->nl_numlocs || !nfslsp->nl_locations)
4218		return;
4219
4220	for (loc = 0; loc < nfslsp->nl_numlocs; loc++) {
4221		fsl = nfslsp->nl_locations[loc];
4222		if (!fsl)
4223			continue;
4224		if ((fsl->nl_servcount > 0) && fsl->nl_servers) {
4225			for (serv = 0; serv < fsl->nl_servcount; serv++) {
4226				fss = fsl->nl_servers[serv];
4227				if (!fss)
4228					continue;
4229				if ((fss->ns_addrcount > 0) && fss->ns_addresses) {
4230					for (addr = 0; addr < fss->ns_addrcount; addr++)
4231						FREE(fss->ns_addresses[addr], M_TEMP);
4232					FREE(fss->ns_addresses, M_TEMP);
4233				}
4234				FREE(fss->ns_name, M_TEMP);
4235				FREE(fss, M_TEMP);
4236			}
4237			FREE(fsl->nl_servers, M_TEMP);
4238		}
4239		fsp = &fsl->nl_path;
4240		if (fsp->np_compcount && fsp->np_components) {
4241			for (comp = 0; comp < fsp->np_compcount; comp++)
4242				if (fsp->np_components[comp])
4243					FREE(fsp->np_components[comp], M_TEMP);
4244			FREE(fsp->np_components, M_TEMP);
4245		}
4246		FREE(fsl, M_TEMP);
4247	}
4248	FREE(nfslsp->nl_locations, M_TEMP);
4249	nfslsp->nl_numlocs = 0;
4250	nfslsp->nl_locations = NULL;
4251}
4252
4253void
4254nfs_mount_rele(struct nfsmount *nmp)
4255{
4256	int wup = 0;
4257
4258	lck_mtx_lock(&nmp->nm_lock);
4259	if (nmp->nm_ref < 1)
4260		panic("nfs zombie mount underflow\n");
4261	nmp->nm_ref--;
4262	if (nmp->nm_ref == 0)
4263		wup = nmp->nm_state & NFSSTA_MOUNT_DRAIN;
4264	lck_mtx_unlock(&nmp->nm_lock);
4265	if (wup)
4266		wakeup(&nmp->nm_ref);
4267}
4268
4269void
4270nfs_mount_drain_and_cleanup(struct nfsmount *nmp)
4271{
4272	lck_mtx_lock(&nmp->nm_lock);
4273	nmp->nm_state |= NFSSTA_MOUNT_DRAIN;
4274	while (nmp->nm_ref > 0) {
4275		msleep(&nmp->nm_ref, &nmp->nm_lock, PZERO-1, "nfs_mount_drain", NULL);
4276	}
4277	assert(nmp->nm_ref == 0);
4278	lck_mtx_unlock(&nmp->nm_lock);
4279	nfs_mount_cleanup(nmp);
4280}
4281
4282/*
4283 * nfs_mount_zombie
4284 */
4285void
4286nfs_mount_zombie(struct nfsmount *nmp, int nm_state_flags)
4287{
4288	struct nfsreq *req, *treq;
4289	struct nfs_reqqhead iodq;
4290	struct timespec ts = { 1, 0 };
4291	struct nfs_open_owner *noop, *nextnoop;
4292	nfsnode_t np;
4293	int docallback;
4294
4295	lck_mtx_lock(&nmp->nm_lock);
4296	nmp->nm_state |= nm_state_flags;
4297	nmp->nm_ref++;
4298	lck_mtx_unlock(&nmp->nm_lock);
4299
4300	/* stop callbacks */
4301	if ((nmp->nm_vers >= NFS_VER4) && !NMFLAG(nmp, NOCALLBACK) && nmp->nm_cbid)
4302		nfs4_mount_callback_shutdown(nmp);
4303
4304	/* Destroy any RPCSEC_GSS contexts */
4305	if (!TAILQ_EMPTY(&nmp->nm_gsscl))
4306		nfs_gss_clnt_ctx_unmount(nmp);
4307
4308	/* mark the socket for termination */
4309	lck_mtx_lock(&nmp->nm_lock);
4310	nmp->nm_sockflags |= NMSOCK_UNMOUNT;
4311
4312	/* Have the socket thread send the unmount RPC, if requested/appropriate. */
4313	if ((nmp->nm_vers < NFS_VER4) && (nmp->nm_state & NFSSTA_MOUNTED) &&
4314	    !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) && NMFLAG(nmp, CALLUMNT))
4315		nfs_mount_sock_thread_wake(nmp);
4316
4317	/* wait for the socket thread to terminate */
4318	while (nmp->nm_sockthd && current_thread() != nmp->nm_sockthd) {
4319		wakeup(&nmp->nm_sockthd);
4320		msleep(&nmp->nm_sockthd, &nmp->nm_lock, PZERO-1, "nfswaitsockthd", &ts);
4321	}
4322	lck_mtx_unlock(&nmp->nm_lock);
4323
4324	/* tear down the socket */
4325	nfs_disconnect(nmp);
4326
4327	lck_mtx_lock(&nmp->nm_lock);
4328
4329	if ((nmp->nm_vers >= NFS_VER4) && !NMFLAG(nmp, NOCALLBACK) && nmp->nm_cbid) {
4330		/* clear out any pending delegation return requests */
4331		while ((np = TAILQ_FIRST(&nmp->nm_dreturnq))) {
4332			TAILQ_REMOVE(&nmp->nm_dreturnq, np, n_dreturn);
4333			np->n_dreturn.tqe_next = NFSNOLIST;
4334		}
4335	}
4336
4337	/* cancel any renew timer */
4338	if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_renew_timer) {
4339		thread_call_cancel(nmp->nm_renew_timer);
4340		thread_call_free(nmp->nm_renew_timer);
4341	}
4342
4343	lck_mtx_unlock(&nmp->nm_lock);
4344
4345	if (nmp->nm_state & NFSSTA_MOUNTED)
4346		switch (nmp->nm_lockmode) {
4347		case NFS_LOCK_MODE_DISABLED:
4348		case NFS_LOCK_MODE_LOCAL:
4349			break;
4350		case NFS_LOCK_MODE_ENABLED:
4351		default:
4352			if (nmp->nm_vers <= NFS_VER3) {
4353				nfs_lockd_mount_unregister(nmp);
4354				nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
4355			}
4356			break;
4357		}
4358
4359	if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_longid) {
4360		/* remove/deallocate the client ID data */
4361		lck_mtx_lock(nfs_global_mutex);
4362		TAILQ_REMOVE(&nfsclientids, nmp->nm_longid, nci_link);
4363		if (nmp->nm_longid->nci_id)
4364			FREE(nmp->nm_longid->nci_id, M_TEMP);
4365		FREE(nmp->nm_longid, M_TEMP);
4366		lck_mtx_unlock(nfs_global_mutex);
4367	}
4368
4369	/*
4370	 * Loop through outstanding request list and remove dangling
4371	 * references to defunct nfsmount struct
4372	 */
4373	TAILQ_INIT(&iodq);
4374	lck_mtx_lock(nfs_request_mutex);
4375	TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
4376		if (req->r_nmp == nmp) {
4377			if (req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT)) {
4378				/* async I/O RPC needs to be finished */
4379				lck_mtx_lock(nfsiod_mutex);
4380				if (req->r_achain.tqe_next == NFSREQNOLIST) {
4381					TAILQ_INSERT_TAIL(&iodq, req, r_achain);
4382				}
4383				lck_mtx_unlock(nfsiod_mutex);
4384			}
4385			wakeup(req);
4386		}
4387	}
4388	lck_mtx_unlock(nfs_request_mutex);
4389
4390	/* finish any async I/O RPCs queued up */
4391	lck_mtx_lock(nfsiod_mutex);
4392	if (nmp->nm_iodlink.tqe_next != NFSNOLIST)
4393		TAILQ_REMOVE(&nfsiodmounts, nmp, nm_iodlink);
4394	TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
4395	lck_mtx_unlock(nfsiod_mutex);
4396	TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
4397		TAILQ_REMOVE(&iodq, req, r_achain);
4398		lck_mtx_lock(nfsiod_mutex);
4399		req->r_achain.tqe_next = NFSIODCOMPLETING;
4400		lck_mtx_unlock(nfsiod_mutex);
4401		lck_mtx_lock(&req->r_mtx);
4402		req->r_error = ENXIO;
4403		docallback = !(req->r_flags & R_WAITSENT);
4404		lck_mtx_unlock(&req->r_mtx);
4405		if (docallback)
4406			req->r_callback.rcb_func(req);
4407	}
4408
4409	/* clean up common state */
4410	lck_mtx_lock(&nmp->nm_lock);
4411 	while ((np = LIST_FIRST(&nmp->nm_monlist))) {
4412 		LIST_REMOVE(np, n_monlink);
4413 		np->n_monlink.le_next = NFSNOLIST;
4414 	}
4415	TAILQ_FOREACH_SAFE(noop, &nmp->nm_open_owners, noo_link, nextnoop) {
4416		TAILQ_REMOVE(&nmp->nm_open_owners, noop, noo_link);
4417		noop->noo_flags &= ~NFS_OPEN_OWNER_LINK;
4418		if (noop->noo_refcnt)
4419			continue;
4420		nfs_open_owner_destroy(noop);
4421	}
4422	lck_mtx_unlock(&nmp->nm_lock);
4423
4424	/* clean up NFSv4 state */
4425	if (nmp->nm_vers >= NFS_VER4) {
4426		lck_mtx_lock(&nmp->nm_lock);
4427		while ((np = TAILQ_FIRST(&nmp->nm_delegations))) {
4428			TAILQ_REMOVE(&nmp->nm_delegations, np, n_dlink);
4429			np->n_dlink.tqe_next = NFSNOLIST;
4430		}
4431		lck_mtx_unlock(&nmp->nm_lock);
4432	}
4433
4434	nfs_mount_rele(nmp);
4435}
4436
4437/*
4438 * cleanup/destroy an nfsmount
4439 */
4440void
4441nfs_mount_cleanup(struct nfsmount *nmp)
4442{
4443	if (!nmp)
4444		return;
4445
4446	nfs_mount_zombie(nmp, 0);
4447
4448	NFS_VFS_DBG("Unmounting %s from %s\n",
4449		    vfs_statfs(nmp->nm_mountp)->f_mntfromname,
4450		    vfs_statfs(nmp->nm_mountp)->f_mntonname);
4451	NFS_VFS_DBG("nfs state = %x\n", nmp->nm_state);
4452	NFS_VFS_DBG("nfs socket flags = %x\n", nmp->nm_sockflags);
4453	NFS_VFS_DBG("nfs mount ref count is %d\n", nmp->nm_ref);
4454	NFS_VFS_DBG("mount ref count is %d\n", nmp->nm_mountp->mnt_count);
4455
4456	if (nmp->nm_mountp)
4457		vfs_setfsprivate(nmp->nm_mountp, NULL);
4458
4459	lck_mtx_lock(&nmp->nm_lock);
4460	if (nmp->nm_ref)
4461		panic("Some one has grabbed a ref %d\n", nmp->nm_ref);
4462
4463	if (nmp->nm_saddr)
4464		FREE(nmp->nm_saddr, M_SONAME);
4465	if ((nmp->nm_vers < NFS_VER4) && nmp->nm_rqsaddr)
4466		FREE(nmp->nm_rqsaddr, M_SONAME);
4467
4468	if (IS_VALID_CRED(nmp->nm_mcred))
4469		kauth_cred_unref(&nmp->nm_mcred);
4470
4471	nfs_fs_locations_cleanup(&nmp->nm_locations);
4472
4473	if (nmp->nm_realm)
4474		FREE(nmp->nm_realm, M_TEMP);
4475	if (nmp->nm_principal)
4476		FREE(nmp->nm_principal, M_TEMP);
4477	if (nmp->nm_sprinc)
4478		FREE(nmp->nm_sprinc, M_TEMP);
4479
4480	if (nmp->nm_args)
4481		xb_free(nmp->nm_args);
4482
4483	lck_mtx_unlock(&nmp->nm_lock);
4484
4485	lck_mtx_destroy(&nmp->nm_lock, nfs_mount_grp);
4486	if (nmp->nm_fh)
4487		FREE(nmp->nm_fh, M_TEMP);
4488	FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT);
4489}
4490
4491/*
4492 * Return root of a filesystem
4493 */
4494int
4495nfs_vfs_root(mount_t mp, vnode_t *vpp, __unused vfs_context_t ctx)
4496{
4497	vnode_t vp;
4498	struct nfsmount *nmp;
4499	int error;
4500	u_int32_t vpid;
4501
4502	nmp = VFSTONFS(mp);
4503	if (!nmp || !nmp->nm_dnp)
4504		return (ENXIO);
4505	vp = NFSTOV(nmp->nm_dnp);
4506	vpid = vnode_vid(vp);
4507	while ((error = vnode_getwithvid(vp, vpid))) {
4508		/* vnode_get() may return ENOENT if the dir changes. */
4509		/* If that happens, just try it again, else return the error. */
4510		if ((error != ENOENT) || (vnode_vid(vp) == vpid))
4511			return (error);
4512		vpid = vnode_vid(vp);
4513	}
4514	*vpp = vp;
4515	return (0);
4516}
4517
4518/*
4519 * Do operations associated with quotas
4520 */
4521#if !QUOTA
4522int
4523nfs_vfs_quotactl(
4524	__unused mount_t mp,
4525	__unused int cmds,
4526	__unused uid_t uid,
4527	__unused caddr_t datap,
4528	__unused vfs_context_t context)
4529{
4530	return (ENOTSUP);
4531}
4532#else
4533
4534int
4535nfs3_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struct dqblk *dqb)
4536{
4537	int error = 0, slen, timeo;
4538	int rqport = 0, rqproto, rqvers = (type == GRPQUOTA) ? RPCRQUOTA_EXT_VER : RPCRQUOTA_VER;
4539	thread_t thd = vfs_context_thread(ctx);
4540	kauth_cred_t cred = vfs_context_ucred(ctx);
4541	char *path;
4542	uint64_t xid = 0;
4543	struct nfsm_chain nmreq, nmrep;
4544	mbuf_t mreq;
4545	uint32_t val = 0, bsize = 0;
4546	struct sockaddr *rqsaddr;
4547	struct timeval now;
4548
4549	if (!nmp->nm_saddr)
4550		return (ENXIO);
4551
4552	if (NMFLAG(nmp, NOQUOTA))
4553		return (ENOTSUP);
4554
4555	if (!nmp->nm_rqsaddr)
4556		MALLOC(nmp->nm_rqsaddr, struct sockaddr *, sizeof(struct sockaddr_storage), M_SONAME, M_WAITOK|M_ZERO);
4557	if (!nmp->nm_rqsaddr)
4558		return (ENOMEM);
4559	rqsaddr = nmp->nm_rqsaddr;
4560	if (rqsaddr->sa_family == AF_INET6)
4561		rqport = ntohs(((struct sockaddr_in6*)rqsaddr)->sin6_port);
4562	else if (rqsaddr->sa_family == AF_INET)
4563		rqport = ntohs(((struct sockaddr_in*)rqsaddr)->sin_port);
4564
4565	timeo = NMFLAG(nmp, SOFT) ? 10 : 60;
4566	rqproto = IPPROTO_UDP; /* XXX should prefer TCP if mount is TCP */
4567
4568	/* check if we have a recently cached rquota port */
4569	microuptime(&now);
4570	if (!rqport || ((nmp->nm_rqsaddrstamp + 60) >= (uint32_t)now.tv_sec)) {
4571		/* send portmap request to get rquota port */
4572		bcopy(nmp->nm_saddr, rqsaddr, min(sizeof(struct sockaddr_storage), nmp->nm_saddr->sa_len));
4573		error = nfs_portmap_lookup(nmp, ctx, rqsaddr, NULL, RPCPROG_RQUOTA, rqvers, rqproto, timeo);
4574		if (error)
4575			return (error);
4576		if (rqsaddr->sa_family == AF_INET6)
4577			rqport = ntohs(((struct sockaddr_in6*)rqsaddr)->sin6_port);
4578		else if (rqsaddr->sa_family == AF_INET)
4579			rqport = ntohs(((struct sockaddr_in*)rqsaddr)->sin_port);
4580		else
4581			return (EIO);
4582		if (!rqport)
4583			return (ENOTSUP);
4584		microuptime(&now);
4585		nmp->nm_rqsaddrstamp = now.tv_sec;
4586	}
4587
4588	/* rquota request */
4589	nfsm_chain_null(&nmreq);
4590	nfsm_chain_null(&nmrep);
4591	path = &vfs_statfs(nmp->nm_mountp)->f_mntfromname[0];
4592	while (*path && (*path != '/'))
4593		path++;
4594	slen = strlen(path);
4595	nfsm_chain_build_alloc_init(error, &nmreq, 3 * NFSX_UNSIGNED + nfsm_rndup(slen));
4596	nfsm_chain_add_name(error, &nmreq, path, slen, nmp);
4597	if (type == GRPQUOTA)
4598		nfsm_chain_add_32(error, &nmreq, type);
4599	nfsm_chain_add_32(error, &nmreq, id);
4600	nfsm_chain_build_done(error, &nmreq);
4601	nfsmout_if(error);
4602	error = nfsm_rpchead2(nmp, (rqproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
4603			RPCPROG_RQUOTA, rqvers, RPCRQUOTA_GET,
4604			RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
4605	nfsmout_if(error);
4606	nmreq.nmc_mhead = NULL;
4607	error = nfs_aux_request(nmp, thd, rqsaddr, NULL,
4608			(rqproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
4609			mreq, R_XID32(xid), 0, timeo, &nmrep);
4610	nfsmout_if(error);
4611
4612	/* parse rquota response */
4613	nfsm_chain_get_32(error, &nmrep, val);
4614	if (!error && (val != RQUOTA_STAT_OK)) {
4615		if (val == RQUOTA_STAT_NOQUOTA)
4616			error = ENOENT;
4617		else if (val == RQUOTA_STAT_EPERM)
4618			error = EPERM;
4619		else
4620			error = EIO;
4621	}
4622	nfsm_chain_get_32(error, &nmrep, bsize);
4623	nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED);
4624	nfsm_chain_get_32(error, &nmrep, val);
4625	nfsmout_if(error);
4626	dqb->dqb_bhardlimit = (uint64_t)val * bsize;
4627	nfsm_chain_get_32(error, &nmrep, val);
4628	nfsmout_if(error);
4629	dqb->dqb_bsoftlimit = (uint64_t)val * bsize;
4630	nfsm_chain_get_32(error, &nmrep, val);
4631	nfsmout_if(error);
4632	dqb->dqb_curbytes = (uint64_t)val * bsize;
4633	nfsm_chain_get_32(error, &nmrep, dqb->dqb_ihardlimit);
4634	nfsm_chain_get_32(error, &nmrep, dqb->dqb_isoftlimit);
4635	nfsm_chain_get_32(error, &nmrep, dqb->dqb_curinodes);
4636	nfsm_chain_get_32(error, &nmrep, dqb->dqb_btime);
4637	nfsm_chain_get_32(error, &nmrep, dqb->dqb_itime);
4638	nfsmout_if(error);
4639	dqb->dqb_id = id;
4640nfsmout:
4641	nfsm_chain_cleanup(&nmreq);
4642	nfsm_chain_cleanup(&nmrep);
4643	return (error);
4644}
4645
4646int
4647nfs4_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struct dqblk *dqb)
4648{
4649	nfsnode_t np;
4650	int error = 0, status, nfsvers, numops;
4651	u_int64_t xid;
4652	struct nfsm_chain nmreq, nmrep;
4653	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
4654	thread_t thd = vfs_context_thread(ctx);
4655	kauth_cred_t cred = vfs_context_ucred(ctx);
4656	struct nfsreq_secinfo_args si;
4657
4658	if (type != USRQUOTA)  /* NFSv4 only supports user quotas */
4659		return (ENOTSUP);
4660
4661	/* first check that the server supports any of the quota attributes */
4662	if (!NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_HARD) &&
4663	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_SOFT) &&
4664	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_USED))
4665		return (ENOTSUP);
4666
4667	/*
4668	 * The credential passed to the server needs to have
4669	 * an effective uid that matches the given uid.
4670	 */
4671	if (id != kauth_cred_getuid(cred)) {
4672		struct posix_cred temp_pcred;
4673		posix_cred_t pcred = posix_cred_get(cred);
4674		bzero(&temp_pcred, sizeof(temp_pcred));
4675		temp_pcred.cr_uid = id;
4676		temp_pcred.cr_ngroups = pcred->cr_ngroups;
4677		bcopy(pcred->cr_groups, temp_pcred.cr_groups, sizeof(temp_pcred.cr_groups));
4678		cred = posix_cred_create(&temp_pcred);
4679		if (!IS_VALID_CRED(cred))
4680			return (ENOMEM);
4681	} else {
4682		kauth_cred_ref(cred);
4683	}
4684
4685	nfsvers = nmp->nm_vers;
4686	np = nmp->nm_dnp;
4687	if (!np)
4688		error = ENXIO;
4689	if (error || ((error = vnode_get(NFSTOV(np))))) {
4690		kauth_cred_unref(&cred);
4691		return(error);
4692	}
4693
4694	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
4695	nfsm_chain_null(&nmreq);
4696	nfsm_chain_null(&nmrep);
4697
4698	// PUTFH + GETATTR
4699	numops = 2;
4700	nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
4701	nfsm_chain_add_compound_header(error, &nmreq, "quota", numops);
4702	numops--;
4703	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
4704	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
4705	numops--;
4706	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
4707	NFS_CLEAR_ATTRIBUTES(bitmap);
4708	NFS_BITMAP_SET(bitmap, NFS_FATTR_QUOTA_AVAIL_HARD);
4709	NFS_BITMAP_SET(bitmap, NFS_FATTR_QUOTA_AVAIL_SOFT);
4710	NFS_BITMAP_SET(bitmap, NFS_FATTR_QUOTA_USED);
4711	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, NULL);
4712	nfsm_chain_build_done(error, &nmreq);
4713	nfsm_assert(error, (numops == 0), EPROTO);
4714	nfsmout_if(error);
4715	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, 0, &nmrep, &xid, &status);
4716	nfsm_chain_skip_tag(error, &nmrep);
4717	nfsm_chain_get_32(error, &nmrep, numops);
4718	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
4719	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
4720	nfsm_assert(error, NFSTONMP(np), ENXIO);
4721	nfsmout_if(error);
4722	error = nfs4_parsefattr(&nmrep, NULL, NULL, NULL, dqb, NULL);
4723	nfsmout_if(error);
4724	nfsm_assert(error, NFSTONMP(np), ENXIO);
4725nfsmout:
4726	nfsm_chain_cleanup(&nmreq);
4727	nfsm_chain_cleanup(&nmrep);
4728	vnode_put(NFSTOV(np));
4729	kauth_cred_unref(&cred);
4730	return (error);
4731}
4732
4733int
4734nfs_vfs_quotactl(mount_t mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t ctx)
4735{
4736	struct nfsmount *nmp;
4737	int cmd, type, error, nfsvers;
4738	uid_t euid = kauth_cred_getuid(vfs_context_ucred(ctx));
4739	struct dqblk *dqb = (struct dqblk*)datap;
4740
4741	nmp = VFSTONFS(mp);
4742	if (nfs_mount_gone(nmp))
4743		return (ENXIO);
4744	nfsvers = nmp->nm_vers;
4745
4746	if (uid == ~0U)
4747		uid = euid;
4748
4749	/* we can only support Q_GETQUOTA */
4750	cmd = cmds >> SUBCMDSHIFT;
4751	switch (cmd) {
4752	case Q_GETQUOTA:
4753		break;
4754	case Q_QUOTAON:
4755	case Q_QUOTAOFF:
4756	case Q_SETQUOTA:
4757	case Q_SETUSE:
4758	case Q_SYNC:
4759	case Q_QUOTASTAT:
4760		return (ENOTSUP);
4761	default:
4762		return (EINVAL);
4763	}
4764
4765	type = cmds & SUBCMDMASK;
4766	if ((u_int)type >= MAXQUOTAS)
4767		return (EINVAL);
4768	if ((uid != euid) && ((error = vfs_context_suser(ctx))))
4769		return (error);
4770
4771	if (vfs_busy(mp, LK_NOWAIT))
4772		return (0);
4773	bzero(dqb, sizeof(*dqb));
4774	error = nmp->nm_funcs->nf_getquota(nmp, ctx, uid, type, dqb);
4775	vfs_unbusy(mp);
4776	return (error);
4777}
4778#endif
4779
4780/*
4781 * Flush out the buffer cache
4782 */
4783int nfs_sync_callout(vnode_t, void *);
4784
4785struct nfs_sync_cargs {
4786	vfs_context_t	ctx;
4787	int		waitfor;
4788	int		error;
4789};
4790
4791int
4792nfs_sync_callout(vnode_t vp, void *arg)
4793{
4794	struct nfs_sync_cargs *cargs = (struct nfs_sync_cargs*)arg;
4795	nfsnode_t np = VTONFS(vp);
4796	int error;
4797
4798	if (np->n_flag & NREVOKE) {
4799		vn_revoke(vp, REVOKEALL, cargs->ctx);
4800		return (VNODE_RETURNED);
4801	}
4802
4803	if (LIST_EMPTY(&np->n_dirtyblkhd))
4804		return (VNODE_RETURNED);
4805	if (np->n_wrbusy > 0)
4806		return (VNODE_RETURNED);
4807	if (np->n_bflag & (NBFLUSHINPROG|NBINVALINPROG))
4808		return (VNODE_RETURNED);
4809
4810	error = nfs_flush(np, cargs->waitfor, vfs_context_thread(cargs->ctx), 0);
4811	if (error)
4812		cargs->error = error;
4813
4814	return (VNODE_RETURNED);
4815}
4816
4817int
4818nfs_vfs_sync(mount_t mp, int waitfor, vfs_context_t ctx)
4819{
4820	struct nfs_sync_cargs cargs;
4821
4822	cargs.waitfor = waitfor;
4823	cargs.ctx = ctx;
4824	cargs.error = 0;
4825
4826	vnode_iterate(mp, 0, nfs_sync_callout, &cargs);
4827
4828	return (cargs.error);
4829}
4830
4831/*
4832 * NFS flat namespace lookup.
4833 * Currently unsupported.
4834 */
4835/*ARGSUSED*/
4836int
4837nfs_vfs_vget(
4838	__unused mount_t mp,
4839	__unused ino64_t ino,
4840	__unused vnode_t *vpp,
4841	__unused vfs_context_t ctx)
4842{
4843
4844	return (ENOTSUP);
4845}
4846
4847/*
4848 * At this point, this should never happen
4849 */
4850/*ARGSUSED*/
4851int
4852nfs_vfs_fhtovp(
4853	__unused mount_t mp,
4854	__unused int fhlen,
4855	__unused unsigned char *fhp,
4856	__unused vnode_t *vpp,
4857	__unused vfs_context_t ctx)
4858{
4859
4860	return (ENOTSUP);
4861}
4862
4863/*
4864 * Vnode pointer to File handle, should never happen either
4865 */
4866/*ARGSUSED*/
4867int
4868nfs_vfs_vptofh(
4869	__unused vnode_t vp,
4870	__unused int *fhlenp,
4871	__unused unsigned char *fhp,
4872	__unused vfs_context_t ctx)
4873{
4874
4875	return (ENOTSUP);
4876}
4877
4878/*
4879 * Vfs start routine, a no-op.
4880 */
4881/*ARGSUSED*/
4882int
4883nfs_vfs_start(
4884	__unused mount_t mp,
4885	__unused int flags,
4886	__unused vfs_context_t ctx)
4887{
4888
4889	return (0);
4890}
4891
4892/*
4893 * Build the mount info buffer for NFS_MOUNTINFO.
4894 */
4895int
4896nfs_mountinfo_assemble(struct nfsmount *nmp, struct xdrbuf *xb)
4897{
4898	struct xdrbuf xbinfo, xborig;
4899	char sotype[6];
4900	uint32_t origargsvers, origargslength;
4901	uint32_t infolength_offset, curargsopaquelength_offset, curargslength_offset, attrslength_offset, curargs_end_offset, end_offset;
4902	uint32_t miattrs[NFS_MIATTR_BITMAP_LEN];
4903	uint32_t miflags_mask[NFS_MIFLAG_BITMAP_LEN];
4904	uint32_t miflags[NFS_MIFLAG_BITMAP_LEN];
4905	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
4906	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN];
4907	uint32_t mflags[NFS_MFLAG_BITMAP_LEN];
4908	uint32_t loc, serv, addr, comp;
4909	int i, timeo, error = 0;
4910
4911	/* set up mount info attr and flag bitmaps */
4912	NFS_BITMAP_ZERO(miattrs, NFS_MIATTR_BITMAP_LEN);
4913	NFS_BITMAP_SET(miattrs, NFS_MIATTR_FLAGS);
4914	NFS_BITMAP_SET(miattrs, NFS_MIATTR_ORIG_ARGS);
4915	NFS_BITMAP_SET(miattrs, NFS_MIATTR_CUR_ARGS);
4916	NFS_BITMAP_SET(miattrs, NFS_MIATTR_CUR_LOC_INDEX);
4917	NFS_BITMAP_ZERO(miflags_mask, NFS_MIFLAG_BITMAP_LEN);
4918	NFS_BITMAP_ZERO(miflags, NFS_MIFLAG_BITMAP_LEN);
4919	NFS_BITMAP_SET(miflags_mask, NFS_MIFLAG_DEAD);
4920	NFS_BITMAP_SET(miflags_mask, NFS_MIFLAG_NOTRESP);
4921	NFS_BITMAP_SET(miflags_mask, NFS_MIFLAG_RECOVERY);
4922	if (nmp->nm_state & NFSSTA_DEAD)
4923		NFS_BITMAP_SET(miflags, NFS_MIFLAG_DEAD);
4924	if ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_JUKEBOXTIMEO)) ||
4925	    ((nmp->nm_state & NFSSTA_LOCKTIMEO) && (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED)))
4926		NFS_BITMAP_SET(miflags, NFS_MIFLAG_NOTRESP);
4927	if (nmp->nm_state & NFSSTA_RECOVER)
4928		NFS_BITMAP_SET(miflags, NFS_MIFLAG_RECOVERY);
4929
4930	/* get original mount args length */
4931	xb_init_buffer(&xborig, nmp->nm_args, 2*XDRWORD);
4932	xb_get_32(error, &xborig, origargsvers); /* version */
4933	xb_get_32(error, &xborig, origargslength); /* args length */
4934	nfsmerr_if(error);
4935
4936	/* set up current mount attributes bitmap */
4937	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
4938	NFS_BITMAP_SET(mattrs, NFS_MATTR_FLAGS);
4939	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
4940	if (nmp->nm_vers >= NFS_VER4)
4941		NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_MINOR_VERSION);
4942	NFS_BITMAP_SET(mattrs, NFS_MATTR_READ_SIZE);
4943	NFS_BITMAP_SET(mattrs, NFS_MATTR_WRITE_SIZE);
4944	NFS_BITMAP_SET(mattrs, NFS_MATTR_READDIR_SIZE);
4945	NFS_BITMAP_SET(mattrs, NFS_MATTR_READAHEAD);
4946	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN);
4947	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX);
4948	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN);
4949	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX);
4950	NFS_BITMAP_SET(mattrs, NFS_MATTR_LOCK_MODE);
4951	NFS_BITMAP_SET(mattrs, NFS_MATTR_SECURITY);
4952	NFS_BITMAP_SET(mattrs, NFS_MATTR_MAX_GROUP_LIST);
4953	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
4954	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
4955	if ((nmp->nm_vers < NFS_VER4) && nmp->nm_mountport)
4956		NFS_BITMAP_SET(mattrs, NFS_MATTR_MOUNT_PORT);
4957	NFS_BITMAP_SET(mattrs, NFS_MATTR_REQUEST_TIMEOUT);
4958	if (NMFLAG(nmp, SOFT))
4959		NFS_BITMAP_SET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT);
4960	if (nmp->nm_deadtimeout)
4961		NFS_BITMAP_SET(mattrs, NFS_MATTR_DEAD_TIMEOUT);
4962	if (nmp->nm_fh)
4963		NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
4964	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
4965	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
4966	if (origargsvers < NFS_ARGSVERSION_XDR)
4967		NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFROM);
4968	if (nmp->nm_realm)
4969		NFS_BITMAP_SET(mattrs, NFS_MATTR_REALM);
4970	if (nmp->nm_principal)
4971		NFS_BITMAP_SET(mattrs, NFS_MATTR_PRINCIPAL);
4972	if (nmp->nm_sprinc)
4973		NFS_BITMAP_SET(mattrs, NFS_MATTR_SVCPRINCIPAL);
4974
4975	/* set up current mount flags bitmap */
4976	/* first set the flags that we will be setting - either on OR off */
4977	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
4978	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_SOFT);
4979	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_INTR);
4980	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
4981	if (nmp->nm_sotype == SOCK_DGRAM)
4982		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOCONNECT);
4983	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_DUMBTIMER);
4984	if (nmp->nm_vers < NFS_VER4)
4985		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_CALLUMNT);
4986	if (nmp->nm_vers >= NFS_VER3)
4987		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RDIRPLUS);
4988	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NONEGNAMECACHE);
4989	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MUTEJUKEBOX);
4990	if (nmp->nm_vers >= NFS_VER4) {
4991		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_EPHEMERAL);
4992		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOCALLBACK);
4993		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NONAMEDATTR);
4994		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOACL);
4995		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_ACLONLY);
4996	}
4997	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NFC);
4998	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOQUOTA);
4999	if (nmp->nm_vers < NFS_VER4)
5000		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MNTUDP);
5001	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MNTQUICK);
5002	/* now set the flags that should be set */
5003	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
5004	if (NMFLAG(nmp, SOFT))
5005		NFS_BITMAP_SET(mflags, NFS_MFLAG_SOFT);
5006	if (NMFLAG(nmp, INTR))
5007		NFS_BITMAP_SET(mflags, NFS_MFLAG_INTR);
5008	if (NMFLAG(nmp, RESVPORT))
5009		NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
5010	if ((nmp->nm_sotype == SOCK_DGRAM) && NMFLAG(nmp, NOCONNECT))
5011		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOCONNECT);
5012	if (NMFLAG(nmp, DUMBTIMER))
5013		NFS_BITMAP_SET(mflags, NFS_MFLAG_DUMBTIMER);
5014	if ((nmp->nm_vers < NFS_VER4) && NMFLAG(nmp, CALLUMNT))
5015		NFS_BITMAP_SET(mflags, NFS_MFLAG_CALLUMNT);
5016	if ((nmp->nm_vers >= NFS_VER3) && NMFLAG(nmp, RDIRPLUS))
5017		NFS_BITMAP_SET(mflags, NFS_MFLAG_RDIRPLUS);
5018	if (NMFLAG(nmp, NONEGNAMECACHE))
5019		NFS_BITMAP_SET(mflags, NFS_MFLAG_NONEGNAMECACHE);
5020	if (NMFLAG(nmp, MUTEJUKEBOX))
5021		NFS_BITMAP_SET(mflags, NFS_MFLAG_MUTEJUKEBOX);
5022	if (nmp->nm_vers >= NFS_VER4) {
5023		if (NMFLAG(nmp, EPHEMERAL))
5024			NFS_BITMAP_SET(mflags, NFS_MFLAG_EPHEMERAL);
5025		if (NMFLAG(nmp, NOCALLBACK))
5026			NFS_BITMAP_SET(mflags, NFS_MFLAG_NOCALLBACK);
5027		if (NMFLAG(nmp, NONAMEDATTR))
5028			NFS_BITMAP_SET(mflags, NFS_MFLAG_NONAMEDATTR);
5029		if (NMFLAG(nmp, NOACL))
5030			NFS_BITMAP_SET(mflags, NFS_MFLAG_NOACL);
5031		if (NMFLAG(nmp, ACLONLY))
5032			NFS_BITMAP_SET(mflags, NFS_MFLAG_ACLONLY);
5033	}
5034	if (NMFLAG(nmp, NFC))
5035		NFS_BITMAP_SET(mflags, NFS_MFLAG_NFC);
5036	if (NMFLAG(nmp, NOQUOTA) || ((nmp->nm_vers >= NFS_VER4) &&
5037	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_HARD) &&
5038	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_SOFT) &&
5039	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_USED)))
5040		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOQUOTA);
5041	if ((nmp->nm_vers < NFS_VER4) && NMFLAG(nmp, MNTUDP))
5042		NFS_BITMAP_SET(mflags, NFS_MFLAG_MNTUDP);
5043	if (NMFLAG(nmp, MNTQUICK))
5044		NFS_BITMAP_SET(mflags, NFS_MFLAG_MNTQUICK);
5045
5046	/* assemble info buffer: */
5047	xb_init_buffer(&xbinfo, NULL, 0);
5048	xb_add_32(error, &xbinfo, NFS_MOUNT_INFO_VERSION);
5049	infolength_offset = xb_offset(&xbinfo);
5050	xb_add_32(error, &xbinfo, 0);
5051	xb_add_bitmap(error, &xbinfo, miattrs, NFS_MIATTR_BITMAP_LEN);
5052	xb_add_bitmap(error, &xbinfo, miflags, NFS_MIFLAG_BITMAP_LEN);
5053	xb_add_32(error, &xbinfo, origargslength);
5054	if (!error)
5055		error = xb_add_bytes(&xbinfo, nmp->nm_args, origargslength, 0);
5056
5057	/* the opaque byte count for the current mount args values: */
5058	curargsopaquelength_offset = xb_offset(&xbinfo);
5059	xb_add_32(error, &xbinfo, 0);
5060
5061	/* Encode current mount args values */
5062	xb_add_32(error, &xbinfo, NFS_ARGSVERSION_XDR);
5063	curargslength_offset = xb_offset(&xbinfo);
5064	xb_add_32(error, &xbinfo, 0);
5065	xb_add_32(error, &xbinfo, NFS_XDRARGS_VERSION_0);
5066	xb_add_bitmap(error, &xbinfo, mattrs, NFS_MATTR_BITMAP_LEN);
5067	attrslength_offset = xb_offset(&xbinfo);
5068	xb_add_32(error, &xbinfo, 0);
5069	xb_add_bitmap(error, &xbinfo, mflags_mask, NFS_MFLAG_BITMAP_LEN);
5070	xb_add_bitmap(error, &xbinfo, mflags, NFS_MFLAG_BITMAP_LEN);
5071	xb_add_32(error, &xbinfo, nmp->nm_vers);		/* NFS_VERSION */
5072	if (nmp->nm_vers >= NFS_VER4)
5073		xb_add_32(error, &xbinfo, 0);			/* NFS_MINOR_VERSION */
5074	xb_add_32(error, &xbinfo, nmp->nm_rsize);		/* READ_SIZE */
5075	xb_add_32(error, &xbinfo, nmp->nm_wsize);		/* WRITE_SIZE */
5076	xb_add_32(error, &xbinfo, nmp->nm_readdirsize);		/* READDIR_SIZE */
5077	xb_add_32(error, &xbinfo, nmp->nm_readahead);		/* READAHEAD */
5078	xb_add_32(error, &xbinfo, nmp->nm_acregmin);		/* ATTRCACHE_REG_MIN */
5079	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_REG_MIN */
5080	xb_add_32(error, &xbinfo, nmp->nm_acregmax);		/* ATTRCACHE_REG_MAX */
5081	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_REG_MAX */
5082	xb_add_32(error, &xbinfo, nmp->nm_acdirmin);		/* ATTRCACHE_DIR_MIN */
5083	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_DIR_MIN */
5084	xb_add_32(error, &xbinfo, nmp->nm_acdirmax);		/* ATTRCACHE_DIR_MAX */
5085	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_DIR_MAX */
5086	xb_add_32(error, &xbinfo, nmp->nm_lockmode);		/* LOCK_MODE */
5087	if (nmp->nm_sec.count) {
5088		xb_add_32(error, &xbinfo, nmp->nm_sec.count);		/* SECURITY */
5089		nfsmerr_if(error);
5090		for (i=0; i < nmp->nm_sec.count; i++)
5091			xb_add_32(error, &xbinfo, nmp->nm_sec.flavors[i]);
5092	} else if (nmp->nm_servsec.count) {
5093		xb_add_32(error, &xbinfo, nmp->nm_servsec.count);	/* SECURITY */
5094		nfsmerr_if(error);
5095		for (i=0; i < nmp->nm_servsec.count; i++)
5096			xb_add_32(error, &xbinfo, nmp->nm_servsec.flavors[i]);
5097	} else {
5098		xb_add_32(error, &xbinfo, 1);				/* SECURITY */
5099		xb_add_32(error, &xbinfo, nmp->nm_auth);
5100	}
5101	xb_add_32(error, &xbinfo, nmp->nm_numgrps);		/* MAX_GROUP_LIST */
5102	nfsmerr_if(error);
5103	snprintf(sotype, sizeof(sotype), "%s%s", (nmp->nm_sotype == SOCK_DGRAM) ? "udp" : "tcp",
5104		nmp->nm_sofamily ? (nmp->nm_sofamily == AF_INET) ? "4" : "6" : "");
5105	xb_add_string(error, &xbinfo, sotype, strlen(sotype));	/* SOCKET_TYPE */
5106	xb_add_32(error, &xbinfo, ntohs(((struct sockaddr_in*)nmp->nm_saddr)->sin_port)); /* NFS_PORT */
5107	if ((nmp->nm_vers < NFS_VER4) && nmp->nm_mountport)
5108		xb_add_32(error, &xbinfo, nmp->nm_mountport);	/* MOUNT_PORT */
5109	timeo = (nmp->nm_timeo * 10) / NFS_HZ;
5110	xb_add_32(error, &xbinfo, timeo/10);			/* REQUEST_TIMEOUT */
5111	xb_add_32(error, &xbinfo, (timeo%10)*100000000);	/* REQUEST_TIMEOUT */
5112	if (NMFLAG(nmp, SOFT))
5113		xb_add_32(error, &xbinfo, nmp->nm_retry);	/* SOFT_RETRY_COUNT */
5114	if (nmp->nm_deadtimeout) {
5115		xb_add_32(error, &xbinfo, nmp->nm_deadtimeout);	/* DEAD_TIMEOUT */
5116		xb_add_32(error, &xbinfo, 0);			/* DEAD_TIMEOUT */
5117	}
5118	if (nmp->nm_fh)
5119		xb_add_fh(error, &xbinfo, &nmp->nm_fh->fh_data[0], nmp->nm_fh->fh_len); /* FH */
5120	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_numlocs);			/* FS_LOCATIONS */
5121	for (loc = 0; !error && (loc < nmp->nm_locations.nl_numlocs); loc++) {
5122		xb_add_32(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servcount);
5123		for (serv = 0; !error && (serv < nmp->nm_locations.nl_locations[loc]->nl_servcount); serv++) {
5124			xb_add_string(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name,
5125				strlen(nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name));
5126			xb_add_32(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount);
5127			for (addr = 0; !error && (addr < nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount); addr++)
5128				xb_add_string(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr],
5129					strlen(nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr]));
5130			xb_add_32(error, &xbinfo, 0); /* empty server info */
5131		}
5132		xb_add_32(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_path.np_compcount);
5133		for (comp = 0; !error && (comp < nmp->nm_locations.nl_locations[loc]->nl_path.np_compcount); comp++)
5134			xb_add_string(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_path.np_components[comp],
5135				strlen(nmp->nm_locations.nl_locations[loc]->nl_path.np_components[comp]));
5136		xb_add_32(error, &xbinfo, 0); /* empty fs location info */
5137	}
5138	xb_add_32(error, &xbinfo, vfs_flags(nmp->nm_mountp));		/* MNTFLAGS */
5139	if (origargsvers < NFS_ARGSVERSION_XDR)
5140		xb_add_string(error, &xbinfo, vfs_statfs(nmp->nm_mountp)->f_mntfromname,
5141			strlen(vfs_statfs(nmp->nm_mountp)->f_mntfromname));	/* MNTFROM */
5142	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REALM))
5143		xb_add_string(error, &xbinfo, nmp->nm_realm, strlen(nmp->nm_realm));
5144	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_PRINCIPAL))
5145		xb_add_string(error, &xbinfo, nmp->nm_principal, strlen(nmp->nm_principal));
5146	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SVCPRINCIPAL))
5147		xb_add_string(error, &xbinfo, nmp->nm_sprinc, strlen(nmp->nm_sprinc));
5148
5149	curargs_end_offset = xb_offset(&xbinfo);
5150
5151	/* NFS_MIATTR_CUR_LOC_INDEX */
5152	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_flags);
5153	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_loc);
5154	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_serv);
5155	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_addr);
5156
5157	xb_build_done(error, &xbinfo);
5158
5159	/* update opaque counts */
5160	end_offset = xb_offset(&xbinfo);
5161	if (!error) {
5162		error = xb_seek(&xbinfo, attrslength_offset);
5163		xb_add_32(error, &xbinfo, curargs_end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
5164	}
5165	if (!error) {
5166		error = xb_seek(&xbinfo, curargslength_offset);
5167		xb_add_32(error, &xbinfo, curargs_end_offset - curargslength_offset + XDRWORD/*version*/);
5168	}
5169	if (!error) {
5170		error = xb_seek(&xbinfo, curargsopaquelength_offset);
5171		xb_add_32(error, &xbinfo, curargs_end_offset - curargslength_offset + XDRWORD/*version*/);
5172	}
5173	if (!error) {
5174		error = xb_seek(&xbinfo, infolength_offset);
5175		xb_add_32(error, &xbinfo, end_offset - infolength_offset + XDRWORD/*version*/);
5176	}
5177	nfsmerr_if(error);
5178
5179	/* copy result xdrbuf to caller */
5180	*xb = xbinfo;
5181
5182	/* and mark the local copy as not needing cleanup */
5183	xbinfo.xb_flags &= ~XB_CLEANUP;
5184nfsmerr:
5185	xb_cleanup(&xbinfo);
5186	return (error);
5187}
5188
5189/*
5190 * Do that sysctl thang...
5191 */
5192int
5193nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
5194           user_addr_t newp, size_t newlen, vfs_context_t ctx)
5195{
5196	int error = 0, val;
5197	int softnobrowse;
5198	struct sysctl_req *req = NULL;
5199	union union_vfsidctl vc;
5200	mount_t mp;
5201	struct nfsmount *nmp = NULL;
5202	struct vfsquery vq;
5203	struct nfsreq *rq;
5204	boolean_t is_64_bit;
5205	fsid_t fsid;
5206	struct xdrbuf xb;
5207	struct netfs_status *nsp = NULL;
5208	int timeoutmask;
5209	uint pos, totlen, count, numThreads;
5210#if NFSSERVER
5211	struct nfs_exportfs *nxfs;
5212	struct nfs_export *nx;
5213	struct nfs_active_user_list *ulist;
5214	struct nfs_export_stat_desc stat_desc;
5215	struct nfs_export_stat_rec statrec;
5216	struct nfs_user_stat_node *unode, *unode_next;
5217	struct nfs_user_stat_desc ustat_desc;
5218	struct nfs_user_stat_user_rec ustat_rec;
5219	struct nfs_user_stat_path_rec upath_rec;
5220	uint bytes_avail, bytes_total, recs_copied;
5221	uint numExports, numRecs;
5222#endif /* NFSSERVER */
5223
5224	/*
5225	 * All names at this level are terminal.
5226	 */
5227	if (namelen > 1)
5228		return (ENOTDIR);	/* overloaded */
5229
5230	is_64_bit = vfs_context_is64bit(ctx);
5231
5232	/* common code for "new style" VFS_CTL sysctl, get the mount. */
5233	switch (name[0]) {
5234	case VFS_CTL_TIMEO:
5235	case VFS_CTL_NOLOCKS:
5236	case VFS_CTL_NSTATUS:
5237	case VFS_CTL_QUERY:
5238		req = CAST_DOWN(struct sysctl_req *, oldp);
5239		if (req == NULL) {
5240			return EFAULT;
5241		}
5242		error = SYSCTL_IN(req, &vc, is_64_bit? sizeof(vc.vc64):sizeof(vc.vc32));
5243		if (error)
5244			return (error);
5245		mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
5246		if (mp == NULL)
5247			return (ENOENT);
5248		nmp = VFSTONFS(mp);
5249		if (!nmp)
5250			return (ENOENT);
5251		bzero(&vq, sizeof(vq));
5252		req->newidx = 0;
5253		if (is_64_bit) {
5254			req->newptr = vc.vc64.vc_ptr;
5255			req->newlen = (size_t)vc.vc64.vc_len;
5256		} else {
5257			req->newptr = CAST_USER_ADDR_T(vc.vc32.vc_ptr);
5258			req->newlen = vc.vc32.vc_len;
5259		}
5260		break;
5261	}
5262
5263	switch(name[0]) {
5264	case NFS_NFSSTATS:
5265		if (!oldp) {
5266			*oldlenp = sizeof nfsstats;
5267			return (0);
5268		}
5269
5270		if (*oldlenp < sizeof nfsstats) {
5271			*oldlenp = sizeof nfsstats;
5272			return (ENOMEM);
5273		}
5274
5275		error = copyout(&nfsstats, oldp, sizeof nfsstats);
5276		if (error)
5277			return (error);
5278
5279		if (newp && newlen != sizeof nfsstats)
5280			return (EINVAL);
5281
5282		if (newp)
5283			return copyin(newp, &nfsstats, sizeof nfsstats);
5284		return (0);
5285	case NFS_MOUNTINFO:
5286		/* read in the fsid */
5287		if (*oldlenp < sizeof(fsid))
5288			return (EINVAL);
5289		if ((error = copyin(oldp, &fsid, sizeof(fsid))))
5290			return (error);
5291		/* swizzle it back to host order */
5292		fsid.val[0] = ntohl(fsid.val[0]);
5293		fsid.val[1] = ntohl(fsid.val[1]);
5294		/* find mount and make sure it's NFS */
5295		if (((mp = vfs_getvfs(&fsid))) == NULL)
5296			return (ENOENT);
5297		if (strcmp(mp->mnt_vfsstat.f_fstypename, "nfs"))
5298			return (EINVAL);
5299		if (((nmp = VFSTONFS(mp))) == NULL)
5300			return (ENOENT);
5301		xb_init(&xb, 0);
5302		if ((error = nfs_mountinfo_assemble(nmp, &xb)))
5303			return (error);
5304		if (*oldlenp < xb.xb_u.xb_buffer.xbb_len)
5305			error = ENOMEM;
5306		else
5307			error = copyout(xb_buffer_base(&xb), oldp, xb.xb_u.xb_buffer.xbb_len);
5308		*oldlenp = xb.xb_u.xb_buffer.xbb_len;
5309		xb_cleanup(&xb);
5310		break;
5311#if NFSSERVER
5312	case NFS_EXPORTSTATS:
5313		/* setup export stat descriptor */
5314		stat_desc.rec_vers = NFS_EXPORT_STAT_REC_VERSION;
5315
5316		if (!nfsrv_is_initialized()) {
5317			stat_desc.rec_count = 0;
5318			if (oldp && (*oldlenp >= sizeof(struct nfs_export_stat_desc)))
5319				error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
5320			*oldlenp = sizeof(struct nfs_export_stat_desc);
5321			return (error);
5322		}
5323
5324		/* Count the number of exported directories */
5325		lck_rw_lock_shared(&nfsrv_export_rwlock);
5326		numExports = 0;
5327		LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next)
5328			LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next)
5329					numExports += 1;
5330
5331		/* update stat descriptor's export record count */
5332		stat_desc.rec_count = numExports;
5333
5334		/* calculate total size of required buffer */
5335		totlen = sizeof(struct nfs_export_stat_desc) + (numExports * sizeof(struct nfs_export_stat_rec));
5336
5337		/* Check caller's buffer */
5338		if (oldp == 0) {
5339			lck_rw_done(&nfsrv_export_rwlock);
5340			/* indicate required buffer len */
5341			*oldlenp = totlen;
5342			return (0);
5343		}
5344
5345		/* We require the caller's buffer to be at least large enough to hold the descriptor */
5346		if (*oldlenp < sizeof(struct nfs_export_stat_desc)) {
5347			lck_rw_done(&nfsrv_export_rwlock);
5348			/* indicate required buffer len */
5349			*oldlenp = totlen;
5350			return (ENOMEM);
5351		}
5352
5353		/* indicate required buffer len */
5354		*oldlenp = totlen;
5355
5356		/* check if export table is empty */
5357		if (!numExports) {
5358			lck_rw_done(&nfsrv_export_rwlock);
5359			error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
5360			return (error);
5361		}
5362
5363		/* calculate how many actual export stat records fit into caller's buffer */
5364		numRecs = (*oldlenp - sizeof(struct nfs_export_stat_desc)) / sizeof(struct nfs_export_stat_rec);
5365
5366		if (!numRecs) {
5367			/* caller's buffer can only accomodate descriptor */
5368			lck_rw_done(&nfsrv_export_rwlock);
5369			stat_desc.rec_count = 0;
5370			error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
5371			return (error);
5372		}
5373
5374		/* adjust to actual number of records to copyout to caller's buffer */
5375		if (numRecs > numExports)
5376			numRecs = numExports;
5377
5378		/* set actual number of records we are returning */
5379		stat_desc.rec_count = numRecs;
5380
5381		/* first copy out the stat descriptor */
5382		pos = 0;
5383		error = copyout(&stat_desc, oldp + pos, sizeof(struct nfs_export_stat_desc));
5384		if (error) {
5385			lck_rw_done(&nfsrv_export_rwlock);
5386			return (error);
5387		}
5388		pos += sizeof(struct nfs_export_stat_desc);
5389
5390		/* Loop through exported directories */
5391		count = 0;
5392		LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
5393			LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
5394
5395				if (count >= numRecs)
5396					break;
5397
5398				/* build exported filesystem path */
5399				snprintf(statrec.path, sizeof(statrec.path), "%s%s%s",
5400					nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
5401					nx->nx_path);
5402
5403				/* build the 64-bit export stat counters */
5404				statrec.ops = ((uint64_t)nx->nx_stats.ops.hi << 32) |
5405						nx->nx_stats.ops.lo;
5406				statrec.bytes_read = ((uint64_t)nx->nx_stats.bytes_read.hi << 32) |
5407						nx->nx_stats.bytes_read.lo;
5408				statrec.bytes_written = ((uint64_t)nx->nx_stats.bytes_written.hi << 32) |
5409						nx->nx_stats.bytes_written.lo;
5410				error = copyout(&statrec, oldp + pos, sizeof(statrec));
5411				if (error) {
5412					lck_rw_done(&nfsrv_export_rwlock);
5413					return (error);
5414				}
5415				/* advance buffer position */
5416				pos += sizeof(statrec);
5417			}
5418		}
5419		lck_rw_done(&nfsrv_export_rwlock);
5420		break;
5421	case NFS_USERSTATS:
5422		/* init structures used for copying out of kernel */
5423		ustat_desc.rec_vers = NFS_USER_STAT_REC_VERSION;
5424		ustat_rec.rec_type = NFS_USER_STAT_USER_REC;
5425		upath_rec.rec_type = NFS_USER_STAT_PATH_REC;
5426
5427		/* initialize counters */
5428		bytes_total = sizeof(struct nfs_user_stat_desc);
5429		bytes_avail  = *oldlenp;
5430		recs_copied = 0;
5431
5432		if (!nfsrv_is_initialized()) /* NFS server not initialized, so no stats */
5433			goto ustat_skip;
5434
5435		/* reclaim old expired user nodes */
5436		nfsrv_active_user_list_reclaim();
5437
5438		/* reserve space for the buffer descriptor */
5439		if (bytes_avail >= sizeof(struct nfs_user_stat_desc))
5440			bytes_avail -= sizeof(struct nfs_user_stat_desc);
5441		else
5442			bytes_avail = 0;
5443
5444		/* put buffer position past the buffer descriptor */
5445		pos = sizeof(struct nfs_user_stat_desc);
5446
5447		/* Loop through exported directories */
5448		lck_rw_lock_shared(&nfsrv_export_rwlock);
5449		LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
5450			LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
5451				/* copy out path */
5452				if (bytes_avail >= sizeof(struct nfs_user_stat_path_rec)) {
5453					snprintf(upath_rec.path, sizeof(upath_rec.path), "%s%s%s",
5454					    nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
5455					    nx->nx_path);
5456
5457					error = copyout(&upath_rec, oldp + pos, sizeof(struct nfs_user_stat_path_rec));
5458					if (error) {
5459						/* punt */
5460						goto ustat_done;
5461					}
5462
5463					pos += sizeof(struct nfs_user_stat_path_rec);
5464					bytes_avail -= sizeof(struct nfs_user_stat_path_rec);
5465					recs_copied++;
5466				}
5467				else {
5468					/* Caller's buffer is exhausted */
5469					bytes_avail = 0;
5470				}
5471
5472				bytes_total += sizeof(struct nfs_user_stat_path_rec);
5473
5474				/* Scan through all user nodes of this export */
5475				ulist = &nx->nx_user_list;
5476				lck_mtx_lock(&ulist->user_mutex);
5477				for (unode = TAILQ_FIRST(&ulist->user_lru); unode; unode = unode_next) {
5478					unode_next = TAILQ_NEXT(unode, lru_link);
5479
5480					/* copy out node if there is space */
5481					if (bytes_avail >= sizeof(struct nfs_user_stat_user_rec)) {
5482						/* prepare a user stat rec for copying out */
5483						ustat_rec.uid = unode->uid;
5484						bcopy(&unode->sock, &ustat_rec.sock, unode->sock.ss_len);
5485						ustat_rec.ops = unode->ops;
5486						ustat_rec.bytes_read = unode->bytes_read;
5487						ustat_rec.bytes_written = unode->bytes_written;
5488						ustat_rec.tm_start = unode->tm_start;
5489						ustat_rec.tm_last = unode->tm_last;
5490
5491						error = copyout(&ustat_rec, oldp + pos, sizeof(struct nfs_user_stat_user_rec));
5492
5493						if (error) {
5494							/* punt */
5495							lck_mtx_unlock(&ulist->user_mutex);
5496							goto ustat_done;
5497						}
5498
5499						pos += sizeof(struct nfs_user_stat_user_rec);
5500						bytes_avail -= sizeof(struct nfs_user_stat_user_rec);
5501						recs_copied++;
5502					}
5503					else {
5504						/* Caller's buffer is exhausted */
5505						bytes_avail = 0;
5506					}
5507					bytes_total += sizeof(struct nfs_user_stat_user_rec);
5508				}
5509				/* can unlock this export's list now */
5510				lck_mtx_unlock(&ulist->user_mutex);
5511			}
5512		}
5513
5514ustat_done:
5515		/* unlock the export table */
5516		lck_rw_done(&nfsrv_export_rwlock);
5517
5518ustat_skip:
5519		/* indicate number of actual records copied */
5520		ustat_desc.rec_count = recs_copied;
5521
5522		if (!error) {
5523			/* check if there was enough room for the buffer descriptor */
5524			if (*oldlenp >= sizeof(struct nfs_user_stat_desc))
5525				error = copyout(&ustat_desc, oldp, sizeof(struct nfs_user_stat_desc));
5526			else
5527				error = ENOMEM;
5528
5529			/* always indicate required buffer size */
5530			*oldlenp = bytes_total;
5531		}
5532		break;
5533	case NFS_USERCOUNT:
5534		if (!oldp) {
5535			*oldlenp = sizeof(nfsrv_user_stat_node_count);
5536			return (0);
5537		}
5538
5539		if (*oldlenp < sizeof(nfsrv_user_stat_node_count)) {
5540			*oldlenp = sizeof(nfsrv_user_stat_node_count);
5541			return (ENOMEM);
5542		}
5543
5544		if (nfsrv_is_initialized()) {
5545			/* reclaim old expired user nodes */
5546			nfsrv_active_user_list_reclaim();
5547		}
5548
5549		error = copyout(&nfsrv_user_stat_node_count, oldp, sizeof(nfsrv_user_stat_node_count));
5550		break;
5551#endif /* NFSSERVER */
5552	case VFS_CTL_NOLOCKS:
5553 		if (req->oldptr != USER_ADDR_NULL) {
5554			lck_mtx_lock(&nmp->nm_lock);
5555			val = (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) ? 1 : 0;
5556			lck_mtx_unlock(&nmp->nm_lock);
5557 			error = SYSCTL_OUT(req, &val, sizeof(val));
5558 			if (error)
5559 				return (error);
5560 		}
5561 		if (req->newptr != USER_ADDR_NULL) {
5562 			error = SYSCTL_IN(req, &val, sizeof(val));
5563 			if (error)
5564 				return (error);
5565			lck_mtx_lock(&nmp->nm_lock);
5566			if (nmp->nm_lockmode == NFS_LOCK_MODE_LOCAL) {
5567				/* can't toggle locks when using local locks */
5568				error = EINVAL;
5569			} else if ((nmp->nm_vers >= NFS_VER4) && val) {
5570				/* can't disable locks for NFSv4 */
5571				error = EINVAL;
5572			} else if (val) {
5573				if ((nmp->nm_vers <= NFS_VER3) && (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED))
5574					nfs_lockd_mount_unregister(nmp);
5575				nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
5576				nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
5577			} else {
5578				if ((nmp->nm_vers <= NFS_VER3) && (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED))
5579					nfs_lockd_mount_register(nmp);
5580				nmp->nm_lockmode = NFS_LOCK_MODE_ENABLED;
5581			}
5582			lck_mtx_unlock(&nmp->nm_lock);
5583 		}
5584		break;
5585	case VFS_CTL_QUERY:
5586		lck_mtx_lock(&nmp->nm_lock);
5587		/* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
5588		softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
5589		if (!softnobrowse && (nmp->nm_state & NFSSTA_TIMEO))
5590			vq.vq_flags |= VQ_NOTRESP;
5591		if (!softnobrowse && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO) && !NMFLAG(nmp, MUTEJUKEBOX))
5592			vq.vq_flags |= VQ_NOTRESP;
5593		if (!softnobrowse && (nmp->nm_state & NFSSTA_LOCKTIMEO) &&
5594		    (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED))
5595			vq.vq_flags |= VQ_NOTRESP;
5596		if (nmp->nm_state & NFSSTA_DEAD)
5597			vq.vq_flags |= VQ_DEAD;
5598		lck_mtx_unlock(&nmp->nm_lock);
5599		error = SYSCTL_OUT(req, &vq, sizeof(vq));
5600		break;
5601 	case VFS_CTL_TIMEO:
5602 		if (req->oldptr != USER_ADDR_NULL) {
5603			lck_mtx_lock(&nmp->nm_lock);
5604			val = nmp->nm_tprintf_initial_delay;
5605			lck_mtx_unlock(&nmp->nm_lock);
5606 			error = SYSCTL_OUT(req, &val, sizeof(val));
5607 			if (error)
5608 				return (error);
5609 		}
5610 		if (req->newptr != USER_ADDR_NULL) {
5611 			error = SYSCTL_IN(req, &val, sizeof(val));
5612 			if (error)
5613 				return (error);
5614			lck_mtx_lock(&nmp->nm_lock);
5615 			if (val < 0)
5616 				nmp->nm_tprintf_initial_delay = 0;
5617			else
5618				nmp->nm_tprintf_initial_delay = val;
5619			lck_mtx_unlock(&nmp->nm_lock);
5620 		}
5621		break;
5622	case VFS_CTL_NSTATUS:
5623		/*
5624		 * Return the status of this mount.  This is much more
5625		 * information than VFS_CTL_QUERY.  In addition to the
5626		 * vq_flags return the significant mount options along
5627		 * with the list of threads blocked on the mount and
5628		 * how long the threads have been waiting.
5629		 */
5630
5631		lck_mtx_lock(nfs_request_mutex);
5632		lck_mtx_lock(&nmp->nm_lock);
5633
5634		/*
5635		 * Count the number of requests waiting for a reply.
5636		 * Note: there could be multiple requests from the same thread.
5637		 */
5638		numThreads = 0;
5639		TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
5640			if (rq->r_nmp == nmp)
5641				numThreads++;
5642		}
5643
5644		/* Calculate total size of result buffer */
5645		totlen = sizeof(struct netfs_status) + (numThreads * sizeof(uint64_t));
5646
5647		if (req->oldptr == USER_ADDR_NULL) {		// Caller is querying buffer size
5648			lck_mtx_unlock(&nmp->nm_lock);
5649			lck_mtx_unlock(nfs_request_mutex);
5650			return SYSCTL_OUT(req, NULL, totlen);
5651		}
5652		if (req->oldlen < totlen) {	// Check if caller's buffer is big enough
5653			lck_mtx_unlock(&nmp->nm_lock);
5654			lck_mtx_unlock(nfs_request_mutex);
5655			return (ERANGE);
5656		}
5657
5658		MALLOC(nsp, struct netfs_status *, totlen, M_TEMP, M_WAITOK|M_ZERO);
5659		if (nsp == NULL) {
5660			lck_mtx_unlock(&nmp->nm_lock);
5661			lck_mtx_unlock(nfs_request_mutex);
5662			return (ENOMEM);
5663		}
5664		timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
5665		if (nmp->nm_state & timeoutmask)
5666			nsp->ns_status |= VQ_NOTRESP;
5667		if (nmp->nm_state & NFSSTA_DEAD)
5668			nsp->ns_status |= VQ_DEAD;
5669
5670		(void) nfs_mountopts(nmp, nsp->ns_mountopts, sizeof(nsp->ns_mountopts));
5671		nsp->ns_threadcount = numThreads;
5672
5673		/*
5674		 * Get the thread ids of threads waiting for a reply
5675		 * and find the longest wait time.
5676		 */
5677		if (numThreads > 0) {
5678			struct timeval now;
5679			time_t sendtime;
5680
5681			microuptime(&now);
5682			count = 0;
5683			sendtime = now.tv_sec;
5684			TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
5685				if (rq->r_nmp == nmp) {
5686					if (rq->r_start < sendtime)
5687						sendtime = rq->r_start;
5688		 			// A thread_id of zero is used to represent an async I/O request.
5689					nsp->ns_threadids[count] =
5690						rq->r_thread ? thread_tid(rq->r_thread) : 0;
5691					if (++count >= numThreads)
5692						break;
5693				}
5694			}
5695			nsp->ns_waittime = now.tv_sec - sendtime;
5696		}
5697
5698		lck_mtx_unlock(&nmp->nm_lock);
5699		lck_mtx_unlock(nfs_request_mutex);
5700
5701 		error = SYSCTL_OUT(req, nsp, totlen);
5702		FREE(nsp, M_TEMP);
5703		break;
5704	default:
5705		return (ENOTSUP);
5706	}
5707	return (error);
5708}
5709