1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993, 1995
31 *	The Regents of the University of California.  All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 *    must display the following acknowledgement:
46 *	This product includes software developed by the University of
47 *	California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 *    may be used to endorse or promote products derived from this software
50 *    without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 *	@(#)nfs_vfsops.c	8.12 (Berkeley) 5/20/95
65 * FreeBSD-Id: nfs_vfsops.c,v 1.52 1997/11/12 05:42:21 julian Exp $
66 */
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections.  This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/conf.h>
77#include <sys/ioctl.h>
78#include <sys/signal.h>
79#include <sys/proc_internal.h> /* for fs rooting to update rootdir in fdp */
80#include <sys/kauth.h>
81#include <sys/vnode_internal.h>
82#include <sys/malloc.h>
83#include <sys/kernel.h>
84#include <sys/sysctl.h>
85#include <sys/mount_internal.h>
86#include <sys/kpi_mbuf.h>
87#include <sys/socket.h>
88#include <sys/socketvar.h>
89#include <sys/fcntl.h>
90#include <sys/quota.h>
91#include <sys/priv.h>
92#include <libkern/OSAtomic.h>
93
94#include <sys/vm.h>
95#include <sys/vmparam.h>
96
97#if !defined(NO_MOUNT_PRIVATE)
98#include <sys/filedesc.h>
99#endif /* NO_MOUNT_PRIVATE */
100
101#include <net/if.h>
102#include <net/route.h>
103#include <netinet/in.h>
104
105#include <nfs/rpcv2.h>
106#include <nfs/krpc.h>
107#include <nfs/nfsproto.h>
108#include <nfs/nfs.h>
109#include <nfs/nfsnode.h>
110#include <nfs/nfs_gss.h>
111#include <nfs/nfsmount.h>
112#include <nfs/xdr_subs.h>
113#include <nfs/nfsm_subs.h>
114#include <nfs/nfsdiskless.h>
115#include <nfs/nfs_lock.h>
116#if CONFIG_MACF
117#include <security/mac_framework.h>
118#endif
119
120#include <pexpert/pexpert.h>
121
122/*
123 * NFS client globals
124 */
125
126int nfs_ticks;
127static lck_grp_t *nfs_global_grp, *nfs_mount_grp;
128lck_mtx_t *nfs_global_mutex;
129uint32_t nfs_fs_attr_bitmap[NFS_ATTR_BITMAP_LEN];
130uint32_t nfs_object_attr_bitmap[NFS_ATTR_BITMAP_LEN];
131uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN];
132struct nfsclientidlist nfsclientids;
133
134/* NFS requests */
135struct nfs_reqqhead nfs_reqq;
136lck_grp_t *nfs_request_grp;
137lck_mtx_t *nfs_request_mutex;
138thread_call_t nfs_request_timer_call;
139int nfs_request_timer_on;
140u_int32_t nfs_xid = 0;
141u_int32_t nfs_xidwrap = 0;		/* to build a (non-wrapping) 64 bit xid */
142
143thread_call_t nfs_buf_timer_call;
144
145/* NFSv4 */
146lck_grp_t *nfs_open_grp;
147uint32_t nfs_open_owner_seqnum = 0;
148uint32_t nfs_lock_owner_seqnum = 0;
149thread_call_t nfs4_callback_timer_call;
150int nfs4_callback_timer_on = 0;
151
152/* nfsiod */
153lck_grp_t *nfsiod_lck_grp;
154lck_mtx_t *nfsiod_mutex;
155struct nfsiodlist nfsiodfree, nfsiodwork;
156struct nfsiodmountlist nfsiodmounts;
157int nfsiod_thread_count = 0;
158int nfsiod_thread_max = NFS_DEFASYNCTHREAD;
159int nfs_max_async_writes = NFS_DEFMAXASYNCWRITES;
160
161int nfs_iosize = NFS_IOSIZE;
162int nfs_access_cache_timeout = NFS_MAXATTRTIMO;
163int nfs_access_delete = 1; /* too many servers get this wrong - workaround on by default */
164int nfs_access_dotzfs = 1;
165int nfs_access_for_getattr = 0;
166int nfs_allow_async = 0;
167int nfs_statfs_rate_limit = NFS_DEFSTATFSRATELIMIT;
168int nfs_lockd_mounts = 0;
169int nfs_lockd_request_sent = 0;
170int nfs_idmap_ctrl = NFS_IDMAP_CTRL_USE_IDMAP_SERVICE;
171int nfs_callback_port = 0;
172
173int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
174int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
175
176
177int		mountnfs(char *, mount_t, vfs_context_t, vnode_t *);
178static int	nfs_mount_diskless(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *, vfs_context_t);
179#if !defined(NO_MOUNT_PRIVATE)
180static int	nfs_mount_diskless_private(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *, vfs_context_t);
181#endif /* NO_MOUNT_PRIVATE */
182int		nfs_mount_connect(struct nfsmount *);
183void		nfs_mount_cleanup(struct nfsmount *);
184int		nfs_mountinfo_assemble(struct nfsmount *, struct xdrbuf *);
185int		nfs4_mount_update_path_with_symlink(struct nfsmount *, struct nfs_fs_path *, uint32_t, fhandle_t *, int *, fhandle_t *, vfs_context_t);
186
187/*
188 * NFS VFS operations.
189 */
190int	nfs_vfs_mount(mount_t, vnode_t, user_addr_t, vfs_context_t);
191int	nfs_vfs_start(mount_t, int, vfs_context_t);
192int	nfs_vfs_unmount(mount_t, int, vfs_context_t);
193int	nfs_vfs_root(mount_t, vnode_t *, vfs_context_t);
194int	nfs_vfs_quotactl(mount_t, int, uid_t, caddr_t, vfs_context_t);
195int	nfs_vfs_getattr(mount_t, struct vfs_attr *, vfs_context_t);
196int	nfs_vfs_sync(mount_t, int, vfs_context_t);
197int	nfs_vfs_vget(mount_t, ino64_t, vnode_t *, vfs_context_t);
198int	nfs_vfs_vptofh(vnode_t, int *, unsigned char *, vfs_context_t);
199int	nfs_vfs_fhtovp(mount_t, int, unsigned char *, vnode_t *, vfs_context_t);
200int	nfs_vfs_init(struct vfsconf *);
201int	nfs_vfs_sysctl(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t);
202
203struct vfsops nfs_vfsops = {
204	nfs_vfs_mount,
205	nfs_vfs_start,
206	nfs_vfs_unmount,
207	nfs_vfs_root,
208	nfs_vfs_quotactl,
209	nfs_vfs_getattr,
210	nfs_vfs_sync,
211	nfs_vfs_vget,
212	nfs_vfs_fhtovp,
213	nfs_vfs_vptofh,
214	nfs_vfs_init,
215	nfs_vfs_sysctl,
216	NULL,		/* setattr */
217	{ NULL,		/* reserved */
218	  NULL,		/* reserved */
219	  NULL,		/* reserved */
220	  NULL,		/* reserved */
221	  NULL,		/* reserved */
222	  NULL,		/* reserved */
223	  NULL }	/* reserved */
224};
225
226
227/*
228 * version-specific NFS functions
229 */
230int nfs3_mount(struct nfsmount *, vfs_context_t, nfsnode_t *);
231int nfs4_mount(struct nfsmount *, vfs_context_t, nfsnode_t *);
232int nfs3_fsinfo(struct nfsmount *, nfsnode_t, vfs_context_t);
233int nfs3_update_statfs(struct nfsmount *, vfs_context_t);
234int nfs4_update_statfs(struct nfsmount *, vfs_context_t);
235#if !QUOTA
236#define nfs3_getquota	NULL
237#define nfs4_getquota	NULL
238#else
239int nfs3_getquota(struct nfsmount *, vfs_context_t, uid_t, int, struct dqblk *);
240int nfs4_getquota(struct nfsmount *, vfs_context_t, uid_t, int, struct dqblk *);
241#endif
242
243struct nfs_funcs nfs3_funcs = {
244	nfs3_mount,
245	nfs3_update_statfs,
246	nfs3_getquota,
247	nfs3_access_rpc,
248	nfs3_getattr_rpc,
249	nfs3_setattr_rpc,
250	nfs3_read_rpc_async,
251	nfs3_read_rpc_async_finish,
252	nfs3_readlink_rpc,
253	nfs3_write_rpc_async,
254	nfs3_write_rpc_async_finish,
255	nfs3_commit_rpc,
256	nfs3_lookup_rpc_async,
257	nfs3_lookup_rpc_async_finish,
258	nfs3_remove_rpc,
259	nfs3_rename_rpc,
260	nfs3_setlock_rpc,
261	nfs3_unlock_rpc,
262	nfs3_getlock_rpc
263	};
264struct nfs_funcs nfs4_funcs = {
265	nfs4_mount,
266	nfs4_update_statfs,
267	nfs4_getquota,
268	nfs4_access_rpc,
269	nfs4_getattr_rpc,
270	nfs4_setattr_rpc,
271	nfs4_read_rpc_async,
272	nfs4_read_rpc_async_finish,
273	nfs4_readlink_rpc,
274	nfs4_write_rpc_async,
275	nfs4_write_rpc_async_finish,
276	nfs4_commit_rpc,
277	nfs4_lookup_rpc_async,
278	nfs4_lookup_rpc_async_finish,
279	nfs4_remove_rpc,
280	nfs4_rename_rpc,
281	nfs4_setlock_rpc,
282	nfs4_unlock_rpc,
283	nfs4_getlock_rpc
284	};
285
286/*
287 * Called once to initialize data structures...
288 */
289int
290nfs_vfs_init(__unused struct vfsconf *vfsp)
291{
292	int i;
293
294	/*
295	 * Check to see if major data structures haven't bloated.
296	 */
297	if (sizeof (struct nfsnode) > NFS_NODEALLOC) {
298		printf("struct nfsnode bloated (> %dbytes)\n", NFS_NODEALLOC);
299		printf("Try reducing NFS_SMALLFH\n");
300	}
301	if (sizeof (struct nfsmount) > NFS_MNTALLOC)
302		printf("struct nfsmount bloated (> %dbytes)\n", NFS_MNTALLOC);
303
304	nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
305	if (nfs_ticks < 1)
306		nfs_ticks = 1;
307
308	/* init async I/O thread pool state */
309	TAILQ_INIT(&nfsiodfree);
310	TAILQ_INIT(&nfsiodwork);
311	TAILQ_INIT(&nfsiodmounts);
312	nfsiod_lck_grp = lck_grp_alloc_init("nfsiod", LCK_GRP_ATTR_NULL);
313	nfsiod_mutex = lck_mtx_alloc_init(nfsiod_lck_grp, LCK_ATTR_NULL);
314
315	/* init lock groups, etc. */
316	nfs_mount_grp = lck_grp_alloc_init("nfs_mount", LCK_GRP_ATTR_NULL);
317	nfs_open_grp = lck_grp_alloc_init("nfs_open", LCK_GRP_ATTR_NULL);
318	nfs_global_grp = lck_grp_alloc_init("nfs_global", LCK_GRP_ATTR_NULL);
319
320	nfs_global_mutex = lck_mtx_alloc_init(nfs_global_grp, LCK_ATTR_NULL);
321
322	/* init request list mutex */
323	nfs_request_grp = lck_grp_alloc_init("nfs_request", LCK_GRP_ATTR_NULL);
324	nfs_request_mutex = lck_mtx_alloc_init(nfs_request_grp, LCK_ATTR_NULL);
325
326	/* initialize NFS request list */
327	TAILQ_INIT(&nfs_reqq);
328
329	nfs_nbinit();			/* Init the nfsbuf table */
330	nfs_nhinit();			/* Init the nfsnode table */
331	nfs_lockinit();			/* Init the nfs lock state */
332	nfs_gss_init();			/* Init RPCSEC_GSS security */
333
334	/* NFSv4 stuff */
335	NFS4_PER_FS_ATTRIBUTES(nfs_fs_attr_bitmap);
336	NFS4_PER_OBJECT_ATTRIBUTES(nfs_object_attr_bitmap);
337	NFS4_DEFAULT_ATTRIBUTES(nfs_getattr_bitmap);
338	for (i=0; i < NFS_ATTR_BITMAP_LEN; i++)
339		nfs_getattr_bitmap[i] &= nfs_object_attr_bitmap[i];
340	TAILQ_INIT(&nfsclientids);
341
342	/* initialize NFS timer callouts */
343	nfs_request_timer_call = thread_call_allocate(nfs_request_timer, NULL);
344	nfs_buf_timer_call = thread_call_allocate(nfs_buf_timer, NULL);
345	nfs4_callback_timer_call = thread_call_allocate(nfs4_callback_timer, NULL);
346
347	return (0);
348}
349
350/*
351 * nfs statfs call
352 */
353int
354nfs3_update_statfs(struct nfsmount *nmp, vfs_context_t ctx)
355{
356	nfsnode_t np;
357	int error = 0, lockerror, status, nfsvers;
358	u_int64_t xid;
359	struct nfsm_chain nmreq, nmrep;
360	uint32_t val = 0;
361
362	nfsvers = nmp->nm_vers;
363	np = nmp->nm_dnp;
364	if (!np)
365		return (ENXIO);
366	if ((error = vnode_get(NFSTOV(np))))
367		return (error);
368
369	nfsm_chain_null(&nmreq);
370	nfsm_chain_null(&nmrep);
371
372	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
373	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
374	nfsm_chain_build_done(error, &nmreq);
375	nfsmout_if(error);
376	error = nfs_request(np, NULL, &nmreq, NFSPROC_FSSTAT, ctx, NULL, &nmrep, &xid, &status);
377	if ((lockerror = nfs_node_lock(np)))
378		error = lockerror;
379	if (nfsvers == NFS_VER3)
380		nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
381	if (!lockerror)
382		nfs_node_unlock(np);
383	if (!error)
384		error = status;
385	nfsm_assert(error, NFSTONMP(np), ENXIO);
386	nfsmout_if(error);
387	lck_mtx_lock(&nmp->nm_lock);
388	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_TOTAL);
389	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_FREE);
390	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_AVAIL);
391	if (nfsvers == NFS_VER3) {
392		NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_AVAIL);
393		NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_TOTAL);
394		NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_FREE);
395		nmp->nm_fsattr.nfsa_bsize = NFS_FABLKSIZE;
396		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_space_total);
397		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_space_free);
398		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_space_avail);
399		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_files_total);
400		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_files_free);
401		nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_files_avail);
402		// skip invarsec
403	} else {
404		nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED); // skip tsize?
405		nfsm_chain_get_32(error, &nmrep, nmp->nm_fsattr.nfsa_bsize);
406		nfsm_chain_get_32(error, &nmrep, val);
407		nfsmout_if(error);
408		if (nmp->nm_fsattr.nfsa_bsize <= 0)
409			nmp->nm_fsattr.nfsa_bsize = NFS_FABLKSIZE;
410		nmp->nm_fsattr.nfsa_space_total = (uint64_t)val * nmp->nm_fsattr.nfsa_bsize;
411		nfsm_chain_get_32(error, &nmrep, val);
412		nfsmout_if(error);
413		nmp->nm_fsattr.nfsa_space_free = (uint64_t)val * nmp->nm_fsattr.nfsa_bsize;
414		nfsm_chain_get_32(error, &nmrep, val);
415		nfsmout_if(error);
416		nmp->nm_fsattr.nfsa_space_avail = (uint64_t)val * nmp->nm_fsattr.nfsa_bsize;
417	}
418	lck_mtx_unlock(&nmp->nm_lock);
419nfsmout:
420	nfsm_chain_cleanup(&nmreq);
421	nfsm_chain_cleanup(&nmrep);
422	vnode_put(NFSTOV(np));
423	return (error);
424}
425
426int
427nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx)
428{
429	nfsnode_t np;
430	int error = 0, lockerror, status, nfsvers, numops;
431	u_int64_t xid;
432	struct nfsm_chain nmreq, nmrep;
433	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
434	struct nfs_vattr nvattr;
435	struct nfsreq_secinfo_args si;
436
437	nfsvers = nmp->nm_vers;
438	np = nmp->nm_dnp;
439	if (!np)
440		return (ENXIO);
441	if ((error = vnode_get(NFSTOV(np))))
442		return (error);
443
444	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
445	NVATTR_INIT(&nvattr);
446	nfsm_chain_null(&nmreq);
447	nfsm_chain_null(&nmrep);
448
449	// PUTFH + GETATTR
450	numops = 2;
451	nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
452	nfsm_chain_add_compound_header(error, &nmreq, "statfs", numops);
453	numops--;
454	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
455	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
456	numops--;
457	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
458	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
459	NFS4_STATFS_ATTRIBUTES(bitmap);
460	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
461	nfsm_chain_build_done(error, &nmreq);
462	nfsm_assert(error, (numops == 0), EPROTO);
463	nfsmout_if(error);
464	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
465	nfsm_chain_skip_tag(error, &nmrep);
466	nfsm_chain_get_32(error, &nmrep, numops);
467	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
468	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
469	nfsm_assert(error, NFSTONMP(np), ENXIO);
470	nfsmout_if(error);
471	lck_mtx_lock(&nmp->nm_lock);
472	error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL, NULL);
473	lck_mtx_unlock(&nmp->nm_lock);
474	nfsmout_if(error);
475	if ((lockerror = nfs_node_lock(np)))
476		error = lockerror;
477	if (!error)
478		nfs_loadattrcache(np, &nvattr, &xid, 0);
479	if (!lockerror)
480		nfs_node_unlock(np);
481	nfsm_assert(error, NFSTONMP(np), ENXIO);
482	nfsmout_if(error);
483	nmp->nm_fsattr.nfsa_bsize = NFS_FABLKSIZE;
484nfsmout:
485	NVATTR_CLEANUP(&nvattr);
486	nfsm_chain_cleanup(&nmreq);
487	nfsm_chain_cleanup(&nmrep);
488	vnode_put(NFSTOV(np));
489	return (error);
490}
491
492
493/*
494 * The NFS VFS_GETATTR function: "statfs"-type information is retrieved
495 * using the nf_update_statfs() function, and other attributes are cobbled
496 * together from whatever sources we can (getattr, fsinfo, pathconf).
497 */
498int
499nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx)
500{
501	struct nfsmount *nmp;
502	uint32_t bsize;
503	int error = 0, nfsvers;
504
505	if (!(nmp = VFSTONFS(mp)))
506		return (ENXIO);
507	nfsvers = nmp->nm_vers;
508
509	if (VFSATTR_IS_ACTIVE(fsap, f_bsize)  ||
510	    VFSATTR_IS_ACTIVE(fsap, f_iosize) ||
511	    VFSATTR_IS_ACTIVE(fsap, f_blocks) ||
512	    VFSATTR_IS_ACTIVE(fsap, f_bfree)  ||
513	    VFSATTR_IS_ACTIVE(fsap, f_bavail) ||
514	    VFSATTR_IS_ACTIVE(fsap, f_bused)  ||
515	    VFSATTR_IS_ACTIVE(fsap, f_files)  ||
516	    VFSATTR_IS_ACTIVE(fsap, f_ffree)) {
517		int statfsrate = nfs_statfs_rate_limit;
518		int refresh = 1;
519
520		/*
521		 * Are we rate-limiting statfs RPCs?
522		 * (Treat values less than 1 or greater than 1,000,000 as no limit.)
523		 */
524		if ((statfsrate > 0) && (statfsrate < 1000000)) {
525			struct timeval now;
526			uint32_t stamp;
527
528			microuptime(&now);
529			lck_mtx_lock(&nmp->nm_lock);
530			stamp = (now.tv_sec * statfsrate) + (now.tv_usec / (1000000/statfsrate));
531			if (stamp != nmp->nm_fsattrstamp) {
532				refresh = 1;
533				nmp->nm_fsattrstamp = stamp;
534			} else {
535				refresh = 0;
536			}
537			lck_mtx_unlock(&nmp->nm_lock);
538		}
539
540		if (refresh)
541			error = nmp->nm_funcs->nf_update_statfs(nmp, ctx);
542		if ((error == ESTALE) || (error == ETIMEDOUT))
543			error = 0;
544		if (error)
545			return (error);
546
547		lck_mtx_lock(&nmp->nm_lock);
548		VFSATTR_RETURN(fsap, f_iosize, nfs_iosize);
549		VFSATTR_RETURN(fsap, f_bsize, nmp->nm_fsattr.nfsa_bsize);
550		bsize = nmp->nm_fsattr.nfsa_bsize;
551		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_TOTAL))
552			VFSATTR_RETURN(fsap, f_blocks, nmp->nm_fsattr.nfsa_space_total / bsize);
553		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_FREE))
554			VFSATTR_RETURN(fsap, f_bfree, nmp->nm_fsattr.nfsa_space_free / bsize);
555		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_AVAIL))
556			VFSATTR_RETURN(fsap, f_bavail, nmp->nm_fsattr.nfsa_space_avail / bsize);
557		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_TOTAL) &&
558		    NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SPACE_FREE))
559			VFSATTR_RETURN(fsap, f_bused,
560				(nmp->nm_fsattr.nfsa_space_total / bsize) -
561				(nmp->nm_fsattr.nfsa_space_free / bsize));
562		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_TOTAL))
563			VFSATTR_RETURN(fsap, f_files, nmp->nm_fsattr.nfsa_files_total);
564		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_FILES_FREE))
565			VFSATTR_RETURN(fsap, f_ffree, nmp->nm_fsattr.nfsa_files_free);
566		lck_mtx_unlock(&nmp->nm_lock);
567	}
568
569	if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
570		u_int32_t caps, valid;
571		nfsnode_t np = nmp->nm_dnp;
572
573		nfsm_assert(error, VFSTONFS(mp) && np, ENXIO);
574		if (error)
575			return (error);
576		lck_mtx_lock(&nmp->nm_lock);
577
578		/*
579		 * The capabilities[] array defines what this volume supports.
580		 *
581		 * The valid[] array defines which bits this code understands
582		 * the meaning of (whether the volume has that capability or not).
583		 * Any zero bits here means "I don't know what you're asking about"
584		 * and the caller cannot tell whether that capability is
585		 * present or not.
586		 */
587		caps = valid = 0;
588		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SYMLINK_SUPPORT)) {
589			valid |= VOL_CAP_FMT_SYMBOLICLINKS;
590			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_SYMLINK)
591				caps |= VOL_CAP_FMT_SYMBOLICLINKS;
592		}
593		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_LINK_SUPPORT)) {
594			valid |= VOL_CAP_FMT_HARDLINKS;
595			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_LINK)
596				caps |= VOL_CAP_FMT_HARDLINKS;
597		}
598		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE)) {
599			valid |= VOL_CAP_FMT_CASE_SENSITIVE;
600			if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE))
601				caps |= VOL_CAP_FMT_CASE_SENSITIVE;
602		}
603		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_PRESERVING)) {
604			valid |= VOL_CAP_FMT_CASE_PRESERVING;
605			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CASE_PRESERVING)
606				caps |= VOL_CAP_FMT_CASE_PRESERVING;
607		}
608		/* Note: VOL_CAP_FMT_2TB_FILESIZE is actually used to test for "large file support" */
609		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXFILESIZE)) {
610			/* Is server's max file size at least 4GB? */
611			if (nmp->nm_fsattr.nfsa_maxfilesize >= 0x100000000ULL)
612				caps |= VOL_CAP_FMT_2TB_FILESIZE;
613		} else if (nfsvers >= NFS_VER3) {
614			/*
615			 * NFSv3 and up supports 64 bits of file size.
616			 * So, we'll just assume maxfilesize >= 4GB
617			 */
618			caps |= VOL_CAP_FMT_2TB_FILESIZE;
619		}
620		if (nfsvers >= NFS_VER4) {
621			caps |= VOL_CAP_FMT_HIDDEN_FILES;
622			valid |= VOL_CAP_FMT_HIDDEN_FILES;
623			// VOL_CAP_FMT_OPENDENYMODES
624//			caps |= VOL_CAP_FMT_OPENDENYMODES;
625//			valid |= VOL_CAP_FMT_OPENDENYMODES;
626		}
627		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
628			// VOL_CAP_FMT_PERSISTENTOBJECTIDS |
629			// VOL_CAP_FMT_SYMBOLICLINKS |
630			// VOL_CAP_FMT_HARDLINKS |
631			// VOL_CAP_FMT_JOURNAL |
632			// VOL_CAP_FMT_JOURNAL_ACTIVE |
633			// VOL_CAP_FMT_NO_ROOT_TIMES |
634			// VOL_CAP_FMT_SPARSE_FILES |
635			// VOL_CAP_FMT_ZERO_RUNS |
636			// VOL_CAP_FMT_CASE_SENSITIVE |
637			// VOL_CAP_FMT_CASE_PRESERVING |
638			// VOL_CAP_FMT_FAST_STATFS |
639			// VOL_CAP_FMT_2TB_FILESIZE |
640			// VOL_CAP_FMT_OPENDENYMODES |
641			// VOL_CAP_FMT_HIDDEN_FILES |
642			caps;
643		fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] =
644			VOL_CAP_FMT_PERSISTENTOBJECTIDS |
645			// VOL_CAP_FMT_SYMBOLICLINKS |
646			// VOL_CAP_FMT_HARDLINKS |
647			// VOL_CAP_FMT_JOURNAL |
648			// VOL_CAP_FMT_JOURNAL_ACTIVE |
649			// VOL_CAP_FMT_NO_ROOT_TIMES |
650			// VOL_CAP_FMT_SPARSE_FILES |
651			// VOL_CAP_FMT_ZERO_RUNS |
652			// VOL_CAP_FMT_CASE_SENSITIVE |
653			// VOL_CAP_FMT_CASE_PRESERVING |
654			VOL_CAP_FMT_FAST_STATFS |
655			VOL_CAP_FMT_2TB_FILESIZE |
656			// VOL_CAP_FMT_OPENDENYMODES |
657			// VOL_CAP_FMT_HIDDEN_FILES |
658			valid;
659
660		/*
661		 * We don't support most of the interfaces.
662		 *
663		 * We MAY support locking, but we don't have any easy way of probing.
664		 * We can tell if there's no lockd running or if locks have been
665		 * disabled for a mount, so we can definitely answer NO in that case.
666		 * Any attempt to send a request to lockd to test for locking support
667		 * may cause the lazily-launched locking daemons to be started
668		 * unnecessarily.  So we avoid that.  However, we do record if we ever
669		 * successfully perform a lock operation on a mount point, so if it
670		 * looks like lock ops have worked, we do report that we support them.
671		 */
672		caps = valid = 0;
673		if (nfsvers >= NFS_VER4) {
674			caps = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
675			valid = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
676			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL)
677				caps |= VOL_CAP_INT_EXTENDED_SECURITY;
678			valid |= VOL_CAP_INT_EXTENDED_SECURITY;
679			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)
680				caps |= VOL_CAP_INT_EXTENDED_ATTR;
681			valid |= VOL_CAP_INT_EXTENDED_ATTR;
682#if NAMEDSTREAMS
683			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)
684				caps |= VOL_CAP_INT_NAMEDSTREAMS;
685			valid |= VOL_CAP_INT_NAMEDSTREAMS;
686#endif
687		} else if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
688			/* locks disabled on this mount, so they definitely won't work */
689			valid = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
690		} else if (nmp->nm_state & NFSSTA_LOCKSWORK) {
691			caps = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
692			valid = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
693		}
694		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] =
695			// VOL_CAP_INT_SEARCHFS |
696			// VOL_CAP_INT_ATTRLIST |
697			// VOL_CAP_INT_NFSEXPORT |
698			// VOL_CAP_INT_READDIRATTR |
699			// VOL_CAP_INT_EXCHANGEDATA |
700			// VOL_CAP_INT_COPYFILE |
701			// VOL_CAP_INT_ALLOCATE |
702			// VOL_CAP_INT_VOL_RENAME |
703			// VOL_CAP_INT_ADVLOCK |
704			// VOL_CAP_INT_FLOCK |
705			// VOL_CAP_INT_EXTENDED_SECURITY |
706			// VOL_CAP_INT_USERACCESS |
707			// VOL_CAP_INT_MANLOCK |
708			// VOL_CAP_INT_NAMEDSTREAMS |
709			// VOL_CAP_INT_EXTENDED_ATTR |
710			VOL_CAP_INT_REMOTE_EVENT |
711			caps;
712		fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
713			VOL_CAP_INT_SEARCHFS |
714			VOL_CAP_INT_ATTRLIST |
715			VOL_CAP_INT_NFSEXPORT |
716			VOL_CAP_INT_READDIRATTR |
717			VOL_CAP_INT_EXCHANGEDATA |
718			VOL_CAP_INT_COPYFILE |
719			VOL_CAP_INT_ALLOCATE |
720			VOL_CAP_INT_VOL_RENAME |
721			// VOL_CAP_INT_ADVLOCK |
722			// VOL_CAP_INT_FLOCK |
723			// VOL_CAP_INT_EXTENDED_SECURITY |
724			// VOL_CAP_INT_USERACCESS |
725			// VOL_CAP_INT_MANLOCK |
726			// VOL_CAP_INT_NAMEDSTREAMS |
727			// VOL_CAP_INT_EXTENDED_ATTR |
728			VOL_CAP_INT_REMOTE_EVENT |
729			valid;
730
731		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
732		fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0;
733
734		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
735		fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0;
736
737		VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
738		lck_mtx_unlock(&nmp->nm_lock);
739	}
740
741	if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
742		fsap->f_attributes.validattr.commonattr = 0;
743		fsap->f_attributes.validattr.volattr =
744			ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
745		fsap->f_attributes.validattr.dirattr = 0;
746		fsap->f_attributes.validattr.fileattr = 0;
747		fsap->f_attributes.validattr.forkattr = 0;
748
749		fsap->f_attributes.nativeattr.commonattr = 0;
750		fsap->f_attributes.nativeattr.volattr =
751			ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
752		fsap->f_attributes.nativeattr.dirattr = 0;
753		fsap->f_attributes.nativeattr.fileattr = 0;
754		fsap->f_attributes.nativeattr.forkattr = 0;
755
756		VFSATTR_SET_SUPPORTED(fsap, f_attributes);
757	}
758
759	return (error);
760}
761
762/*
763 * nfs version 3 fsinfo rpc call
764 */
765int
766nfs3_fsinfo(struct nfsmount *nmp, nfsnode_t np, vfs_context_t ctx)
767{
768	int error = 0, lockerror, status, nmlocked = 0;
769	u_int64_t xid;
770	uint32_t val, prefsize, maxsize;
771	struct nfsm_chain nmreq, nmrep;
772
773	nfsm_chain_null(&nmreq);
774	nfsm_chain_null(&nmrep);
775
776	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nmp->nm_vers));
777	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
778	nfsm_chain_build_done(error, &nmreq);
779	nfsmout_if(error);
780	error = nfs_request(np, NULL, &nmreq, NFSPROC_FSINFO, ctx, NULL, &nmrep, &xid, &status);
781	if ((lockerror = nfs_node_lock(np)))
782		error = lockerror;
783	nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
784	if (!lockerror)
785		nfs_node_unlock(np);
786	if (!error)
787		error = status;
788	nfsmout_if(error);
789
790	lck_mtx_lock(&nmp->nm_lock);
791	nmlocked = 1;
792
793	nfsm_chain_get_32(error, &nmrep, maxsize);
794	nfsm_chain_get_32(error, &nmrep, prefsize);
795	nfsmout_if(error);
796	nmp->nm_fsattr.nfsa_maxread = maxsize;
797	if (prefsize < nmp->nm_rsize)
798		nmp->nm_rsize = (prefsize + NFS_FABLKSIZE - 1) &
799			~(NFS_FABLKSIZE - 1);
800	if ((maxsize > 0) && (maxsize < nmp->nm_rsize)) {
801		nmp->nm_rsize = maxsize & ~(NFS_FABLKSIZE - 1);
802		if (nmp->nm_rsize == 0)
803			nmp->nm_rsize = maxsize;
804	}
805	nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED); // skip rtmult
806
807	nfsm_chain_get_32(error, &nmrep, maxsize);
808	nfsm_chain_get_32(error, &nmrep, prefsize);
809	nfsmout_if(error);
810	nmp->nm_fsattr.nfsa_maxwrite = maxsize;
811	if (prefsize < nmp->nm_wsize)
812		nmp->nm_wsize = (prefsize + NFS_FABLKSIZE - 1) &
813			~(NFS_FABLKSIZE - 1);
814	if ((maxsize > 0) && (maxsize < nmp->nm_wsize)) {
815		nmp->nm_wsize = maxsize & ~(NFS_FABLKSIZE - 1);
816		if (nmp->nm_wsize == 0)
817			nmp->nm_wsize = maxsize;
818	}
819	nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED); // skip wtmult
820
821	nfsm_chain_get_32(error, &nmrep, prefsize);
822	nfsmout_if(error);
823	if ((prefsize > 0) && (prefsize < nmp->nm_readdirsize))
824		nmp->nm_readdirsize = prefsize;
825	if ((nmp->nm_fsattr.nfsa_maxread > 0) &&
826	    (nmp->nm_fsattr.nfsa_maxread < nmp->nm_readdirsize))
827		nmp->nm_readdirsize = nmp->nm_fsattr.nfsa_maxread;
828
829	nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_maxfilesize);
830
831	nfsm_chain_adv(error, &nmrep, 2 * NFSX_UNSIGNED); // skip time_delta
832
833	/* convert FS properties to our own flags */
834	nfsm_chain_get_32(error, &nmrep, val);
835	nfsmout_if(error);
836	if (val & NFSV3FSINFO_LINK)
837		nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_LINK;
838	if (val & NFSV3FSINFO_SYMLINK)
839		nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_SYMLINK;
840	if (val & NFSV3FSINFO_HOMOGENEOUS)
841		nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_HOMOGENEOUS;
842	if (val & NFSV3FSINFO_CANSETTIME)
843		nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_SET_TIME;
844	nmp->nm_state |= NFSSTA_GOTFSINFO;
845	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXREAD);
846	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXWRITE);
847	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXFILESIZE);
848	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_LINK_SUPPORT);
849	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_SYMLINK_SUPPORT);
850	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_HOMOGENEOUS);
851	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CANSETTIME);
852nfsmout:
853	if (nmlocked)
854		lck_mtx_unlock(&nmp->nm_lock);
855	nfsm_chain_cleanup(&nmreq);
856	nfsm_chain_cleanup(&nmrep);
857	return (error);
858}
859
860/*
861 * Mount a remote root fs via. nfs. This depends on the info in the
862 * nfs_diskless structure that has been filled in properly by some primary
863 * bootstrap.
864 * It goes something like this:
865 * - do enough of "ifconfig" by calling ifioctl() so that the system
866 *   can talk to the server
867 * - If nfs_diskless.mygateway is filled in, use that address as
868 *   a default gateway.
869 * - hand craft the swap nfs vnode hanging off a fake mount point
870 *	if swdevt[0].sw_dev == NODEV
871 * - build the rootfs mount point and call mountnfs() to do the rest.
872 */
873int
874nfs_mountroot(void)
875{
876	struct nfs_diskless nd;
877	mount_t mp = NULL;
878	vnode_t vp = NULL;
879	vfs_context_t ctx;
880	int error;
881#if !defined(NO_MOUNT_PRIVATE)
882	mount_t mppriv = NULL;
883	vnode_t vppriv = NULL;
884#endif /* NO_MOUNT_PRIVATE */
885	int v3, sotype;
886
887	/*
888	 * Call nfs_boot_init() to fill in the nfs_diskless struct.
889	 * Note: networking must already have been configured before
890	 * we're called.
891	 */
892	bzero((caddr_t) &nd, sizeof(nd));
893	error = nfs_boot_init(&nd);
894	if (error)
895		panic("nfs_boot_init: unable to initialize NFS root system information, "
896		      "error %d, check configuration: %s\n", error, PE_boot_args());
897
898	/*
899	 * Try NFSv3 first, then fallback to NFSv2.
900	 * Likewise, try TCP first, then fall back to UDP.
901	 */
902	v3 = 1;
903	sotype = SOCK_STREAM;
904
905tryagain:
906	error = nfs_boot_getfh(&nd, v3, sotype);
907	if (error) {
908		if (error == EHOSTDOWN || error == EHOSTUNREACH) {
909			if (nd.nd_root.ndm_mntfrom)
910				FREE_ZONE(nd.nd_root.ndm_mntfrom,
911					  MAXPATHLEN, M_NAMEI);
912			if (nd.nd_root.ndm_path)
913				FREE_ZONE(nd.nd_root.ndm_path,
914					  MAXPATHLEN, M_NAMEI);
915			if (nd.nd_private.ndm_mntfrom)
916				FREE_ZONE(nd.nd_private.ndm_mntfrom,
917					  MAXPATHLEN, M_NAMEI);
918			if (nd.nd_private.ndm_path)
919				FREE_ZONE(nd.nd_private.ndm_path,
920					  MAXPATHLEN, M_NAMEI);
921			return (error);
922		}
923		if (v3) {
924			if (sotype == SOCK_STREAM) {
925				printf("NFS mount (v3,TCP) failed with error %d, trying UDP...\n", error);
926				sotype = SOCK_DGRAM;
927				goto tryagain;
928			}
929			printf("NFS mount (v3,UDP) failed with error %d, trying v2...\n", error);
930			v3 = 0;
931			sotype = SOCK_STREAM;
932			goto tryagain;
933		} else if (sotype == SOCK_STREAM) {
934			printf("NFS mount (v2,TCP) failed with error %d, trying UDP...\n", error);
935			sotype = SOCK_DGRAM;
936			goto tryagain;
937		} else {
938			printf("NFS mount (v2,UDP) failed with error %d, giving up...\n", error);
939		}
940		switch(error) {
941		case EPROGUNAVAIL:
942			panic("NFS mount failed: NFS server mountd not responding, check server configuration: %s", PE_boot_args());
943		case EACCES:
944		case EPERM:
945			panic("NFS mount failed: NFS server refused mount, check server configuration: %s", PE_boot_args());
946		default:
947			panic("NFS mount failed with error %d, check configuration: %s", error, PE_boot_args());
948		}
949	}
950
951	ctx = vfs_context_kernel();
952
953	/*
954	 * Create the root mount point.
955	 */
956#if !defined(NO_MOUNT_PRIVATE)
957	{
958		//PWC hack until we have a real "mount" tool to remount root rw
959		int rw_root=0;
960		int flags = MNT_ROOTFS|MNT_RDONLY;
961		PE_parse_boot_argn("-rwroot_hack", &rw_root, sizeof (rw_root));
962		if(rw_root)
963		{
964			flags = MNT_ROOTFS;
965			kprintf("-rwroot_hack in effect: mounting root fs read/write\n");
966		}
967
968	if ((error = nfs_mount_diskless(&nd.nd_root, "/", flags, &vp, &mp, ctx)))
969#else
970	if ((error = nfs_mount_diskless(&nd.nd_root, "/", MNT_ROOTFS, &vp, &mp, ctx)))
971#endif /* NO_MOUNT_PRIVATE */
972	{
973		if (v3) {
974			if (sotype == SOCK_STREAM) {
975				printf("NFS root mount (v3,TCP) failed with %d, trying UDP...\n", error);
976				sotype = SOCK_DGRAM;
977				goto tryagain;
978			}
979			printf("NFS root mount (v3,UDP) failed with %d, trying v2...\n", error);
980			v3 = 0;
981			sotype = SOCK_STREAM;
982			goto tryagain;
983		} else if (sotype == SOCK_STREAM) {
984			printf("NFS root mount (v2,TCP) failed with %d, trying UDP...\n", error);
985			sotype = SOCK_DGRAM;
986			goto tryagain;
987		} else {
988			printf("NFS root mount (v2,UDP) failed with error %d, giving up...\n", error);
989		}
990		panic("NFS root mount failed with error %d, check configuration: %s\n", error, PE_boot_args());
991	}
992	}
993	printf("root on %s\n", nd.nd_root.ndm_mntfrom);
994
995	vfs_unbusy(mp);
996	mount_list_add(mp);
997	rootvp = vp;
998
999#if !defined(NO_MOUNT_PRIVATE)
1000	if (nd.nd_private.ndm_saddr.sin_addr.s_addr) {
1001	    error = nfs_mount_diskless_private(&nd.nd_private, "/private",
1002					       0, &vppriv, &mppriv, ctx);
1003	    if (error)
1004		panic("NFS /private mount failed with error %d, check configuration: %s\n", error, PE_boot_args());
1005	    printf("private on %s\n", nd.nd_private.ndm_mntfrom);
1006
1007	    vfs_unbusy(mppriv);
1008	    mount_list_add(mppriv);
1009	}
1010
1011#endif /* NO_MOUNT_PRIVATE */
1012
1013	if (nd.nd_root.ndm_mntfrom)
1014		FREE_ZONE(nd.nd_root.ndm_mntfrom, MAXPATHLEN, M_NAMEI);
1015	if (nd.nd_root.ndm_path)
1016		FREE_ZONE(nd.nd_root.ndm_path, MAXPATHLEN, M_NAMEI);
1017	if (nd.nd_private.ndm_mntfrom)
1018		FREE_ZONE(nd.nd_private.ndm_mntfrom, MAXPATHLEN, M_NAMEI);
1019	if (nd.nd_private.ndm_path)
1020		FREE_ZONE(nd.nd_private.ndm_path, MAXPATHLEN, M_NAMEI);
1021
1022	/* Get root attributes (for the time). */
1023	error = nfs_getattr(VTONFS(vp), NULL, ctx, NGA_UNCACHED);
1024	if (error)
1025		panic("NFS mount: failed to get attributes for root directory, error %d, check server", error);
1026	return (0);
1027}
1028
1029/*
1030 * Internal version of mount system call for diskless setup.
1031 */
1032static int
1033nfs_mount_diskless(
1034	struct nfs_dlmount *ndmntp,
1035	const char *mntname,
1036	int mntflag,
1037	vnode_t *vpp,
1038	mount_t *mpp,
1039	vfs_context_t ctx)
1040{
1041	mount_t mp;
1042	int error, numcomps;
1043	char *xdrbuf, *p, *cp, *frompath, *endserverp;
1044	char uaddr[MAX_IPv4_STR_LEN];
1045	struct xdrbuf xb;
1046	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
1047	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN];
1048	uint32_t mflags[NFS_MFLAG_BITMAP_LEN];
1049	uint32_t argslength_offset, attrslength_offset, end_offset;
1050
1051	if ((error = vfs_rootmountalloc("nfs", ndmntp->ndm_mntfrom, &mp))) {
1052		printf("nfs_mount_diskless: NFS not configured\n");
1053		return (error);
1054	}
1055
1056	mp->mnt_flag |= mntflag;
1057	if (!(mntflag & MNT_RDONLY))
1058		mp->mnt_flag &= ~MNT_RDONLY;
1059
1060	/* find the server-side path being mounted */
1061	frompath = ndmntp->ndm_mntfrom;
1062	if (*frompath == '[') {  /* skip IPv6 literal address */
1063		while (*frompath && (*frompath != ']'))
1064			frompath++;
1065		if (*frompath == ']')
1066			frompath++;
1067	}
1068	while (*frompath && (*frompath != ':'))
1069		frompath++;
1070	endserverp = frompath;
1071	while (*frompath && (*frompath == ':'))
1072		frompath++;
1073	/* count fs location path components */
1074	p = frompath;
1075	while (*p && (*p == '/'))
1076		p++;
1077	numcomps = 0;
1078	while (*p) {
1079		numcomps++;
1080		while (*p && (*p != '/'))
1081			p++;
1082		while (*p && (*p == '/'))
1083			p++;
1084	}
1085
1086	/* convert address to universal address string */
1087	if (inet_ntop(AF_INET, &ndmntp->ndm_saddr.sin_addr, uaddr, sizeof(uaddr)) != uaddr) {
1088		printf("nfs_mount_diskless: bad address\n");
1089		return (EINVAL);
1090	}
1091
1092	/* prepare mount attributes */
1093	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
1094	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
1095	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
1096	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
1097	NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
1098	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
1099	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
1100
1101	/* prepare mount flags */
1102	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
1103	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
1104	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
1105	NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
1106
1107	/* build xdr buffer */
1108	xb_init_buffer(&xb, NULL, 0);
1109	xb_add_32(error, &xb, NFS_ARGSVERSION_XDR);
1110	argslength_offset = xb_offset(&xb);
1111	xb_add_32(error, &xb, 0); // args length
1112	xb_add_32(error, &xb, NFS_XDRARGS_VERSION_0);
1113	xb_add_bitmap(error, &xb, mattrs, NFS_MATTR_BITMAP_LEN);
1114	attrslength_offset = xb_offset(&xb);
1115	xb_add_32(error, &xb, 0); // attrs length
1116	xb_add_32(error, &xb, ndmntp->ndm_nfsv3 ? 3 : 2); // NFS version
1117	xb_add_string(error, &xb, ((ndmntp->ndm_sotype == SOCK_DGRAM) ? "udp" : "tcp"), 3);
1118	xb_add_32(error, &xb, ntohs(ndmntp->ndm_saddr.sin_port)); // NFS port
1119	xb_add_fh(error, &xb, &ndmntp->ndm_fh[0], ndmntp->ndm_fhlen);
1120	/* fs location */
1121	xb_add_32(error, &xb, 1); /* fs location count */
1122	xb_add_32(error, &xb, 1); /* server count */
1123	xb_add_string(error, &xb, ndmntp->ndm_mntfrom, (endserverp - ndmntp->ndm_mntfrom)); /* server name */
1124	xb_add_32(error, &xb, 1); /* address count */
1125	xb_add_string(error, &xb, uaddr, strlen(uaddr)); /* address */
1126	xb_add_32(error, &xb, 0); /* empty server info */
1127	xb_add_32(error, &xb, numcomps); /* pathname component count */
1128	p = frompath;
1129	while (*p && (*p == '/'))
1130		p++;
1131	while (*p) {
1132		cp = p;
1133		while (*p && (*p != '/'))
1134			p++;
1135		xb_add_string(error, &xb, cp, (p - cp)); /* component */
1136		if (error)
1137			break;
1138		while (*p && (*p == '/'))
1139			p++;
1140	}
1141	xb_add_32(error, &xb, 0); /* empty fsl info */
1142	xb_add_32(error, &xb, mntflag); /* MNT flags */
1143	xb_build_done(error, &xb);
1144
1145	/* update opaque counts */
1146	end_offset = xb_offset(&xb);
1147	if (!error) {
1148		error = xb_seek(&xb, argslength_offset);
1149		xb_add_32(error, &xb, end_offset - argslength_offset + XDRWORD/*version*/);
1150	}
1151	if (!error) {
1152		error = xb_seek(&xb, attrslength_offset);
1153		xb_add_32(error, &xb, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
1154	}
1155	if (error) {
1156		printf("nfs_mount_diskless: error %d assembling mount args\n", error);
1157		xb_cleanup(&xb);
1158		return (error);
1159	}
1160	/* grab the assembled buffer */
1161	xdrbuf = xb_buffer_base(&xb);
1162	xb.xb_flags &= ~XB_CLEANUP;
1163
1164	/* do the mount */
1165	if ((error = mountnfs(xdrbuf, mp, ctx, vpp))) {
1166		printf("nfs_mountroot: mount %s failed: %d\n", mntname, error);
1167		// XXX vfs_rootmountfailed(mp);
1168		mount_list_lock();
1169		mp->mnt_vtable->vfc_refcount--;
1170		mount_list_unlock();
1171		vfs_unbusy(mp);
1172		mount_lock_destroy(mp);
1173#if CONFIG_MACF
1174		mac_mount_label_destroy(mp);
1175#endif
1176		FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
1177	} else {
1178		*mpp = mp;
1179	}
1180	xb_cleanup(&xb);
1181	return (error);
1182}
1183
1184#if !defined(NO_MOUNT_PRIVATE)
1185/*
1186 * Internal version of mount system call to mount "/private"
1187 * separately in diskless setup
1188 */
1189static int
1190nfs_mount_diskless_private(
1191	struct nfs_dlmount *ndmntp,
1192	const char *mntname,
1193	int mntflag,
1194	vnode_t *vpp,
1195	mount_t *mpp,
1196	vfs_context_t ctx)
1197{
1198	mount_t mp;
1199	int error, numcomps;
1200	proc_t procp;
1201	struct vfstable *vfsp;
1202	struct nameidata nd;
1203	vnode_t vp;
1204	char *xdrbuf = NULL, *p, *cp, *frompath, *endserverp;
1205	char uaddr[MAX_IPv4_STR_LEN];
1206	struct xdrbuf xb;
1207	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
1208	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN], mflags[NFS_MFLAG_BITMAP_LEN];
1209	uint32_t argslength_offset, attrslength_offset, end_offset;
1210
1211	procp = current_proc(); /* XXX */
1212	xb_init(&xb, 0);
1213
1214	{
1215	/*
1216	 * mimic main()!. Temporarily set up rootvnode and other stuff so
1217	 * that namei works. Need to undo this because main() does it, too
1218	 */
1219		struct filedesc *fdp;	/* pointer to file descriptor state */
1220		fdp = procp->p_fd;
1221		mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
1222
1223		/* Get the vnode for '/'. Set fdp->fd_cdir to reference it. */
1224		if (VFS_ROOT(mountlist.tqh_first, &rootvnode, NULL))
1225			panic("cannot find root vnode");
1226		error = vnode_ref(rootvnode);
1227		if (error) {
1228			printf("nfs_mountroot: vnode_ref() failed on root vnode!\n");
1229			goto out;
1230		}
1231		fdp->fd_cdir = rootvnode;
1232		fdp->fd_rdir = NULL;
1233	}
1234
1235	/*
1236	 * Get vnode to be covered
1237	 */
1238	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1239	    CAST_USER_ADDR_T(mntname), ctx);
1240	if ((error = namei(&nd))) {
1241		printf("nfs_mountroot: private namei failed!\n");
1242		goto out;
1243	}
1244	{
1245		/* undo vnode_ref() in mimic main()! */
1246		vnode_rele(rootvnode);
1247	}
1248	nameidone(&nd);
1249	vp = nd.ni_vp;
1250
1251	if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) ||
1252	    (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) {
1253		vnode_put(vp);
1254		goto out;
1255	}
1256	if (vnode_vtype(vp) != VDIR) {
1257		vnode_put(vp);
1258		error = ENOTDIR;
1259		goto out;
1260	}
1261	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1262		if (!strncmp(vfsp->vfc_name, "nfs", sizeof(vfsp->vfc_name)))
1263			break;
1264	if (vfsp == NULL) {
1265		printf("nfs_mountroot: private NFS not configured\n");
1266		vnode_put(vp);
1267		error = ENODEV;
1268		goto out;
1269	}
1270	if (vnode_mountedhere(vp) != NULL) {
1271		vnode_put(vp);
1272		error = EBUSY;
1273		goto out;
1274	}
1275
1276	/*
1277	 * Allocate and initialize the filesystem.
1278	 */
1279	mp = _MALLOC_ZONE((u_int32_t)sizeof(struct mount), M_MOUNT, M_WAITOK);
1280	if (!mp) {
1281		printf("nfs_mountroot: unable to allocate mount structure\n");
1282		vnode_put(vp);
1283		error = ENOMEM;
1284		goto out;
1285	}
1286	bzero((char *)mp, sizeof(struct mount));
1287
1288	/* Initialize the default IO constraints */
1289	mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
1290	mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
1291	mp->mnt_ioflags = 0;
1292	mp->mnt_realrootvp = NULLVP;
1293	mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
1294
1295	mount_lock_init(mp);
1296	TAILQ_INIT(&mp->mnt_vnodelist);
1297	TAILQ_INIT(&mp->mnt_workerqueue);
1298	TAILQ_INIT(&mp->mnt_newvnodes);
1299	(void)vfs_busy(mp, LK_NOWAIT);
1300	TAILQ_INIT(&mp->mnt_vnodelist);
1301	mount_list_lock();
1302	vfsp->vfc_refcount++;
1303	mount_list_unlock();
1304	mp->mnt_vtable = vfsp;
1305	mp->mnt_op = vfsp->vfc_vfsops;
1306	// mp->mnt_stat.f_type = vfsp->vfc_typenum;
1307	mp->mnt_flag = mntflag;
1308	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
1309	strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSNAMELEN-1);
1310	vp->v_mountedhere = mp;
1311	mp->mnt_vnodecovered = vp;
1312	mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get());
1313	(void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN - 1, 0);
1314	(void) copystr(ndmntp->ndm_mntfrom, mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN - 1, 0);
1315#if CONFIG_MACF
1316	mac_mount_label_init(mp);
1317	mac_mount_label_associate(ctx, mp);
1318#endif
1319
1320	/* find the server-side path being mounted */
1321	frompath = ndmntp->ndm_mntfrom;
1322	if (*frompath == '[') {  /* skip IPv6 literal address */
1323		while (*frompath && (*frompath != ']'))
1324			frompath++;
1325		if (*frompath == ']')
1326			frompath++;
1327	}
1328	while (*frompath && (*frompath != ':'))
1329		frompath++;
1330	endserverp = frompath;
1331	while (*frompath && (*frompath == ':'))
1332		frompath++;
1333	/* count fs location path components */
1334	p = frompath;
1335	while (*p && (*p == '/'))
1336		p++;
1337	numcomps = 0;
1338	while (*p) {
1339		numcomps++;
1340		while (*p && (*p != '/'))
1341			p++;
1342		while (*p && (*p == '/'))
1343			p++;
1344	}
1345
1346	/* convert address to universal address string */
1347	if (inet_ntop(AF_INET, &ndmntp->ndm_saddr.sin_addr, uaddr, sizeof(uaddr)) != uaddr) {
1348		printf("nfs_mountroot: bad address\n");
1349		error = EINVAL;
1350		goto out;
1351	}
1352
1353	/* prepare mount attributes */
1354	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
1355	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
1356	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
1357	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
1358	NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
1359	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
1360	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
1361
1362	/* prepare mount flags */
1363	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
1364	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
1365	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
1366	NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
1367
1368	/* build xdr buffer */
1369	xb_init_buffer(&xb, NULL, 0);
1370	xb_add_32(error, &xb, NFS_ARGSVERSION_XDR);
1371	argslength_offset = xb_offset(&xb);
1372	xb_add_32(error, &xb, 0); // args length
1373	xb_add_32(error, &xb, NFS_XDRARGS_VERSION_0);
1374	xb_add_bitmap(error, &xb, mattrs, NFS_MATTR_BITMAP_LEN);
1375	attrslength_offset = xb_offset(&xb);
1376	xb_add_32(error, &xb, 0); // attrs length
1377	xb_add_32(error, &xb, ndmntp->ndm_nfsv3 ? 3 : 2); // NFS version
1378	xb_add_string(error, &xb, ((ndmntp->ndm_sotype == SOCK_DGRAM) ? "udp" : "tcp"), 3);
1379	xb_add_32(error, &xb, ntohs(ndmntp->ndm_saddr.sin_port)); // NFS port
1380	xb_add_fh(error, &xb, &ndmntp->ndm_fh[0], ndmntp->ndm_fhlen);
1381	/* fs location */
1382	xb_add_32(error, &xb, 1); /* fs location count */
1383	xb_add_32(error, &xb, 1); /* server count */
1384	xb_add_string(error, &xb, ndmntp->ndm_mntfrom, (endserverp - ndmntp->ndm_mntfrom)); /* server name */
1385	xb_add_32(error, &xb, 1); /* address count */
1386	xb_add_string(error, &xb, uaddr, strlen(uaddr)); /* address */
1387	xb_add_32(error, &xb, 0); /* empty server info */
1388	xb_add_32(error, &xb, numcomps); /* pathname component count */
1389	p = frompath;
1390	while (*p && (*p == '/'))
1391		p++;
1392	while (*p) {
1393		cp = p;
1394		while (*p && (*p != '/'))
1395			p++;
1396		xb_add_string(error, &xb, cp, (p - cp)); /* component */
1397		if (error)
1398			break;
1399		while (*p && (*p == '/'))
1400			p++;
1401	}
1402	xb_add_32(error, &xb, 0); /* empty fsl info */
1403	xb_add_32(error, &xb, mntflag); /* MNT flags */
1404	xb_build_done(error, &xb);
1405
1406	/* update opaque counts */
1407	end_offset = xb_offset(&xb);
1408	if (!error) {
1409		error = xb_seek(&xb, argslength_offset);
1410		xb_add_32(error, &xb, end_offset - argslength_offset + XDRWORD/*version*/);
1411	}
1412	if (!error) {
1413		error = xb_seek(&xb, attrslength_offset);
1414		xb_add_32(error, &xb, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
1415	}
1416	if (error) {
1417		printf("nfs_mountroot: error %d assembling mount args\n", error);
1418		goto out;
1419	}
1420	/* grab the assembled buffer */
1421	xdrbuf = xb_buffer_base(&xb);
1422	xb.xb_flags &= ~XB_CLEANUP;
1423
1424	/* do the mount */
1425	if ((error = mountnfs(xdrbuf, mp, ctx, &vp))) {
1426		printf("nfs_mountroot: mount %s failed: %d\n", mntname, error);
1427		mount_list_lock();
1428		vfsp->vfc_refcount--;
1429		mount_list_unlock();
1430		vfs_unbusy(mp);
1431		mount_lock_destroy(mp);
1432#if CONFIG_MACF
1433		mac_mount_label_destroy(mp);
1434#endif
1435		FREE_ZONE(mp, sizeof (struct mount), M_MOUNT);
1436		goto out;
1437	}
1438
1439	*mpp = mp;
1440	*vpp = vp;
1441out:
1442	xb_cleanup(&xb);
1443	return (error);
1444}
1445#endif /* NO_MOUNT_PRIVATE */
1446
1447/*
1448 * Convert old style NFS mount args to XDR.
1449 */
1450static int
1451nfs_convert_old_nfs_args(mount_t mp, user_addr_t data, vfs_context_t ctx, int argsversion, int inkernel, char **xdrbufp)
1452{
1453	int error = 0, args64bit, argsize, numcomps;
1454	struct user_nfs_args args;
1455	struct nfs_args tempargs;
1456	caddr_t argsp;
1457	size_t len;
1458	u_char nfh[NFS4_FHSIZE];
1459	char *mntfrom, *endserverp, *frompath, *p, *cp;
1460	struct sockaddr_storage ss;
1461	void *sinaddr;
1462	char uaddr[MAX_IPv6_STR_LEN];
1463	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
1464	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN], mflags[NFS_MFLAG_BITMAP_LEN];
1465	uint32_t nfsvers, nfslockmode = 0, argslength_offset, attrslength_offset, end_offset;
1466	struct xdrbuf xb;
1467
1468	*xdrbufp = NULL;
1469
1470	/* allocate a temporary buffer for mntfrom */
1471	MALLOC_ZONE(mntfrom, char*, MAXPATHLEN, M_NAMEI, M_WAITOK);
1472	if (!mntfrom)
1473		return (ENOMEM);
1474
1475	args64bit = (inkernel || vfs_context_is64bit(ctx));
1476	argsp = args64bit ? (void*)&args : (void*)&tempargs;
1477
1478	argsize = args64bit ? sizeof(args) : sizeof(tempargs);
1479	switch (argsversion) {
1480	case 3:
1481		argsize -= NFS_ARGSVERSION4_INCSIZE;
1482	case 4:
1483		argsize -= NFS_ARGSVERSION5_INCSIZE;
1484	case 5:
1485		argsize -= NFS_ARGSVERSION6_INCSIZE;
1486	case 6:
1487		break;
1488	default:
1489		error = EPROGMISMATCH;
1490		goto nfsmout;
1491	}
1492
1493	/* read in the structure */
1494	if (inkernel)
1495		bcopy(CAST_DOWN(void *, data), argsp, argsize);
1496	else
1497		error = copyin(data, argsp, argsize);
1498	nfsmout_if(error);
1499
1500	if (!args64bit) {
1501		args.addrlen = tempargs.addrlen;
1502		args.sotype = tempargs.sotype;
1503		args.proto = tempargs.proto;
1504		args.fhsize = tempargs.fhsize;
1505		args.flags = tempargs.flags;
1506		args.wsize = tempargs.wsize;
1507		args.rsize = tempargs.rsize;
1508		args.readdirsize = tempargs.readdirsize;
1509		args.timeo = tempargs.timeo;
1510		args.retrans = tempargs.retrans;
1511		args.maxgrouplist = tempargs.maxgrouplist;
1512		args.readahead = tempargs.readahead;
1513		args.leaseterm = tempargs.leaseterm;
1514		args.deadthresh = tempargs.deadthresh;
1515		args.addr = CAST_USER_ADDR_T(tempargs.addr);
1516		args.fh = CAST_USER_ADDR_T(tempargs.fh);
1517		args.hostname = CAST_USER_ADDR_T(tempargs.hostname);
1518		if (args.version >= 4) {
1519			args.acregmin = tempargs.acregmin;
1520			args.acregmax = tempargs.acregmax;
1521			args.acdirmin = tempargs.acdirmin;
1522			args.acdirmax = tempargs.acdirmax;
1523		}
1524		if (args.version >= 5)
1525			args.auth = tempargs.auth;
1526		if (args.version >= 6)
1527			args.deadtimeout = tempargs.deadtimeout;
1528	}
1529
1530	if ((args.fhsize < 0) || (args.fhsize > NFS4_FHSIZE)) {
1531		error = EINVAL;
1532		goto nfsmout;
1533	}
1534	if (args.fhsize > 0) {
1535		if (inkernel)
1536			bcopy(CAST_DOWN(void *, args.fh), (caddr_t)nfh, args.fhsize);
1537		else
1538			error = copyin(args.fh, (caddr_t)nfh, args.fhsize);
1539		nfsmout_if(error);
1540	}
1541
1542	if (inkernel)
1543		error = copystr(CAST_DOWN(void *, args.hostname), mntfrom, MAXPATHLEN-1, &len);
1544	else
1545		error = copyinstr(args.hostname, mntfrom, MAXPATHLEN-1, &len);
1546	nfsmout_if(error);
1547	bzero(&mntfrom[len], MAXPATHLEN - len);
1548
1549	/* find the server-side path being mounted */
1550	frompath = mntfrom;
1551	if (*frompath == '[') {  /* skip IPv6 literal address */
1552		while (*frompath && (*frompath != ']'))
1553			frompath++;
1554		if (*frompath == ']')
1555			frompath++;
1556	}
1557	while (*frompath && (*frompath != ':'))
1558		frompath++;
1559	endserverp = frompath;
1560	while (*frompath && (*frompath == ':'))
1561		frompath++;
1562	/* count fs location path components */
1563	p = frompath;
1564	while (*p && (*p == '/'))
1565		p++;
1566	numcomps = 0;
1567	while (*p) {
1568		numcomps++;
1569		while (*p && (*p != '/'))
1570			p++;
1571		while (*p && (*p == '/'))
1572			p++;
1573	}
1574
1575	/* copy socket address */
1576	if (inkernel)
1577		bcopy(CAST_DOWN(void *, args.addr), &ss, args.addrlen);
1578	else {
1579		if ((size_t)args.addrlen > sizeof (struct sockaddr_storage))
1580			error = EINVAL;
1581		else
1582			error = copyin(args.addr, &ss, args.addrlen);
1583	}
1584	nfsmout_if(error);
1585	ss.ss_len = args.addrlen;
1586
1587	/* convert address to universal address string */
1588	if (ss.ss_family == AF_INET)
1589		sinaddr = &((struct sockaddr_in*)&ss)->sin_addr;
1590	else if (ss.ss_family == AF_INET6)
1591		sinaddr = &((struct sockaddr_in6*)&ss)->sin6_addr;
1592	else
1593		sinaddr = NULL;
1594	if (!sinaddr || (inet_ntop(ss.ss_family, sinaddr, uaddr, sizeof(uaddr)) != uaddr)) {
1595		error = EINVAL;
1596		goto nfsmout;
1597	}
1598
1599	/* prepare mount flags */
1600	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
1601	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
1602	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_SOFT);
1603	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_INTR);
1604	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
1605	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOCONNECT);
1606	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_DUMBTIMER);
1607	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_CALLUMNT);
1608	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RDIRPLUS);
1609	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NONEGNAMECACHE);
1610	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MUTEJUKEBOX);
1611	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOQUOTA);
1612	if (args.flags & NFSMNT_SOFT)
1613		NFS_BITMAP_SET(mflags, NFS_MFLAG_SOFT);
1614	if (args.flags & NFSMNT_INT)
1615		NFS_BITMAP_SET(mflags, NFS_MFLAG_INTR);
1616	if (args.flags & NFSMNT_RESVPORT)
1617		NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
1618	if (args.flags & NFSMNT_NOCONN)
1619		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOCONNECT);
1620	if (args.flags & NFSMNT_DUMBTIMR)
1621		NFS_BITMAP_SET(mflags, NFS_MFLAG_DUMBTIMER);
1622	if (args.flags & NFSMNT_CALLUMNT)
1623		NFS_BITMAP_SET(mflags, NFS_MFLAG_CALLUMNT);
1624	if (args.flags & NFSMNT_RDIRPLUS)
1625		NFS_BITMAP_SET(mflags, NFS_MFLAG_RDIRPLUS);
1626	if (args.flags & NFSMNT_NONEGNAMECACHE)
1627		NFS_BITMAP_SET(mflags, NFS_MFLAG_NONEGNAMECACHE);
1628	if (args.flags & NFSMNT_MUTEJUKEBOX)
1629		NFS_BITMAP_SET(mflags, NFS_MFLAG_MUTEJUKEBOX);
1630	if (args.flags & NFSMNT_NOQUOTA)
1631		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOQUOTA);
1632
1633	/* prepare mount attributes */
1634	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
1635	NFS_BITMAP_SET(mattrs, NFS_MATTR_FLAGS);
1636	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
1637	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
1638	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
1639	NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
1640	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
1641	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
1642	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFROM);
1643	if (args.flags & NFSMNT_NFSV4)
1644		nfsvers = 4;
1645	else if (args.flags & NFSMNT_NFSV3)
1646		nfsvers = 3;
1647	else
1648		nfsvers = 2;
1649	if ((args.flags & NFSMNT_RSIZE) && (args.rsize > 0))
1650		NFS_BITMAP_SET(mattrs, NFS_MATTR_READ_SIZE);
1651	if ((args.flags & NFSMNT_WSIZE) && (args.wsize > 0))
1652		NFS_BITMAP_SET(mattrs, NFS_MATTR_WRITE_SIZE);
1653	if ((args.flags & NFSMNT_TIMEO) && (args.timeo > 0))
1654		NFS_BITMAP_SET(mattrs, NFS_MATTR_REQUEST_TIMEOUT);
1655	if ((args.flags & NFSMNT_RETRANS) && (args.retrans > 0))
1656		NFS_BITMAP_SET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT);
1657	if ((args.flags & NFSMNT_MAXGRPS) && (args.maxgrouplist > 0))
1658		NFS_BITMAP_SET(mattrs, NFS_MATTR_MAX_GROUP_LIST);
1659	if ((args.flags & NFSMNT_READAHEAD) && (args.readahead > 0))
1660		NFS_BITMAP_SET(mattrs, NFS_MATTR_READAHEAD);
1661	if ((args.flags & NFSMNT_READDIRSIZE) && (args.readdirsize > 0))
1662		NFS_BITMAP_SET(mattrs, NFS_MATTR_READDIR_SIZE);
1663	if ((args.flags & NFSMNT_NOLOCKS) ||
1664	    (args.flags & NFSMNT_LOCALLOCKS)) {
1665		NFS_BITMAP_SET(mattrs, NFS_MATTR_LOCK_MODE);
1666		if (args.flags & NFSMNT_NOLOCKS)
1667			nfslockmode = NFS_LOCK_MODE_DISABLED;
1668		else if (args.flags & NFSMNT_LOCALLOCKS)
1669			nfslockmode = NFS_LOCK_MODE_LOCAL;
1670		else
1671			nfslockmode = NFS_LOCK_MODE_ENABLED;
1672	}
1673	if (args.version >= 4) {
1674		if ((args.flags & NFSMNT_ACREGMIN) && (args.acregmin > 0))
1675			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN);
1676		if ((args.flags & NFSMNT_ACREGMAX) && (args.acregmax > 0))
1677			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX);
1678		if ((args.flags & NFSMNT_ACDIRMIN) && (args.acdirmin > 0))
1679			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN);
1680		if ((args.flags & NFSMNT_ACDIRMAX) && (args.acdirmax > 0))
1681			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX);
1682	}
1683	if (args.version >= 5) {
1684		if ((args.flags & NFSMNT_SECFLAVOR) || (args.flags & NFSMNT_SECSYSOK))
1685			NFS_BITMAP_SET(mattrs, NFS_MATTR_SECURITY);
1686	}
1687	if (args.version >= 6) {
1688		if ((args.flags & NFSMNT_DEADTIMEOUT) && (args.deadtimeout > 0))
1689			NFS_BITMAP_SET(mattrs, NFS_MATTR_DEAD_TIMEOUT);
1690	}
1691
1692	/* build xdr buffer */
1693	xb_init_buffer(&xb, NULL, 0);
1694	xb_add_32(error, &xb, args.version);
1695	argslength_offset = xb_offset(&xb);
1696	xb_add_32(error, &xb, 0); // args length
1697	xb_add_32(error, &xb, NFS_XDRARGS_VERSION_0);
1698	xb_add_bitmap(error, &xb, mattrs, NFS_MATTR_BITMAP_LEN);
1699	attrslength_offset = xb_offset(&xb);
1700	xb_add_32(error, &xb, 0); // attrs length
1701	xb_add_bitmap(error, &xb, mflags_mask, NFS_MFLAG_BITMAP_LEN); /* mask */
1702	xb_add_bitmap(error, &xb, mflags, NFS_MFLAG_BITMAP_LEN); /* value */
1703	xb_add_32(error, &xb, nfsvers);
1704	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
1705		xb_add_32(error, &xb, args.rsize);
1706	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
1707		xb_add_32(error, &xb, args.wsize);
1708	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READDIR_SIZE))
1709		xb_add_32(error, &xb, args.readdirsize);
1710	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READAHEAD))
1711		xb_add_32(error, &xb, args.readahead);
1712	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN)) {
1713		xb_add_32(error, &xb, args.acregmin);
1714		xb_add_32(error, &xb, 0);
1715	}
1716	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX)) {
1717		xb_add_32(error, &xb, args.acregmax);
1718		xb_add_32(error, &xb, 0);
1719	}
1720	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN)) {
1721		xb_add_32(error, &xb, args.acdirmin);
1722		xb_add_32(error, &xb, 0);
1723	}
1724	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX)) {
1725		xb_add_32(error, &xb, args.acdirmax);
1726		xb_add_32(error, &xb, 0);
1727	}
1728	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_LOCK_MODE))
1729		xb_add_32(error, &xb, nfslockmode);
1730	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SECURITY)) {
1731		uint32_t flavors[2], i=0;
1732		if (args.flags & NFSMNT_SECFLAVOR)
1733			flavors[i++] = args.auth;
1734		if ((args.flags & NFSMNT_SECSYSOK) && ((i == 0) || (flavors[0] != RPCAUTH_SYS)))
1735			flavors[i++] = RPCAUTH_SYS;
1736		xb_add_word_array(error, &xb, flavors, i);
1737	}
1738	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MAX_GROUP_LIST))
1739		xb_add_32(error, &xb, args.maxgrouplist);
1740	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOCKET_TYPE))
1741		xb_add_string(error, &xb, ((args.sotype == SOCK_DGRAM) ? "udp" : "tcp"), 3);
1742	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_PORT))
1743		xb_add_32(error, &xb, ((ss.ss_family == AF_INET) ?
1744			ntohs(((struct sockaddr_in*)&ss)->sin_port) :
1745			ntohs(((struct sockaddr_in6*)&ss)->sin6_port)));
1746	/* NFS_MATTR_MOUNT_PORT (not available in old args) */
1747	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REQUEST_TIMEOUT)) {
1748		/* convert from .1s increments to time */
1749		xb_add_32(error, &xb, args.timeo/10);
1750		xb_add_32(error, &xb, (args.timeo%10)*100000000);
1751	}
1752	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT))
1753		xb_add_32(error, &xb, args.retrans);
1754	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_DEAD_TIMEOUT)) {
1755		xb_add_32(error, &xb, args.deadtimeout);
1756		xb_add_32(error, &xb, 0);
1757	}
1758	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FH))
1759		xb_add_fh(error, &xb, &nfh[0], args.fhsize);
1760	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FS_LOCATIONS)) {
1761		xb_add_32(error, &xb, 1); /* fs location count */
1762		xb_add_32(error, &xb, 1); /* server count */
1763		xb_add_string(error, &xb, mntfrom, (endserverp - mntfrom)); /* server name */
1764		xb_add_32(error, &xb, 1); /* address count */
1765		xb_add_string(error, &xb, uaddr, strlen(uaddr)); /* address */
1766		xb_add_32(error, &xb, 0); /* empty server info */
1767		xb_add_32(error, &xb, numcomps); /* pathname component count */
1768		nfsmout_if(error);
1769		p = frompath;
1770		while (*p && (*p == '/'))
1771			p++;
1772		while (*p) {
1773			cp = p;
1774			while (*p && (*p != '/'))
1775				p++;
1776			xb_add_string(error, &xb, cp, (p - cp)); /* component */
1777			nfsmout_if(error);
1778			while (*p && (*p == '/'))
1779				p++;
1780		}
1781		xb_add_32(error, &xb, 0); /* empty fsl info */
1782	}
1783	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFLAGS))
1784		xb_add_32(error, &xb, (vfs_flags(mp) & MNT_VISFLAGMASK)); /* VFS MNT_* flags */
1785	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFROM))
1786		xb_add_string(error, &xb, mntfrom, strlen(mntfrom)); /* fixed f_mntfromname */
1787	xb_build_done(error, &xb);
1788
1789	/* update opaque counts */
1790	end_offset = xb_offset(&xb);
1791	error = xb_seek(&xb, argslength_offset);
1792	xb_add_32(error, &xb, end_offset - argslength_offset + XDRWORD/*version*/);
1793	nfsmout_if(error);
1794	error = xb_seek(&xb, attrslength_offset);
1795	xb_add_32(error, &xb, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
1796
1797	if (!error) {
1798		/* grab the assembled buffer */
1799		*xdrbufp = xb_buffer_base(&xb);
1800		xb.xb_flags &= ~XB_CLEANUP;
1801	}
1802nfsmout:
1803	xb_cleanup(&xb);
1804	FREE_ZONE(mntfrom, MAXPATHLEN, M_NAMEI);
1805	return (error);
1806}
1807
1808/*
1809 * VFS Operations.
1810 *
1811 * mount system call
1812 */
1813int
1814nfs_vfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t ctx)
1815{
1816	int error = 0, inkernel = vfs_iskernelmount(mp);
1817	uint32_t argsversion, argslength;
1818	char *xdrbuf = NULL;
1819
1820	/* read in version */
1821	if (inkernel)
1822		bcopy(CAST_DOWN(void *, data), &argsversion, sizeof(argsversion));
1823	else if ((error = copyin(data, &argsversion, sizeof(argsversion))))
1824		return (error);
1825
1826	/* If we have XDR args, then all values in the buffer are in network order */
1827	if (argsversion == htonl(NFS_ARGSVERSION_XDR))
1828		argsversion = NFS_ARGSVERSION_XDR;
1829
1830	switch (argsversion) {
1831	case 3:
1832	case 4:
1833	case 5:
1834	case 6:
1835		/* convert old-style args to xdr */
1836		error = nfs_convert_old_nfs_args(mp, data, ctx, argsversion, inkernel, &xdrbuf);
1837		break;
1838	case NFS_ARGSVERSION_XDR:
1839		/* copy in xdr buffer */
1840		if (inkernel)
1841			bcopy(CAST_DOWN(void *, (data + XDRWORD)), &argslength, XDRWORD);
1842		else
1843			error = copyin((data + XDRWORD), &argslength, XDRWORD);
1844		if (error)
1845			break;
1846		argslength = ntohl(argslength);
1847		/* put a reasonable limit on the size of the XDR args */
1848		if (argslength > 16*1024) {
1849			error = E2BIG;
1850			break;
1851		}
1852		/* allocate xdr buffer */
1853		xdrbuf = xb_malloc(xdr_rndup(argslength));
1854		if (!xdrbuf) {
1855			error = ENOMEM;
1856			break;
1857		}
1858		if (inkernel)
1859			bcopy(CAST_DOWN(void *, data), xdrbuf, argslength);
1860		else
1861			error = copyin(data, xdrbuf, argslength);
1862		break;
1863	default:
1864		error = EPROGMISMATCH;
1865	}
1866
1867	if (error) {
1868		if (xdrbuf)
1869			xb_free(xdrbuf);
1870		return (error);
1871	}
1872	error = mountnfs(xdrbuf, mp, ctx, &vp);
1873	return (error);
1874}
1875
1876/*
1877 * Common code for mount and mountroot
1878 */
1879
1880/* Set up an NFSv2/v3 mount */
1881int
1882nfs3_mount(
1883	struct nfsmount *nmp,
1884	vfs_context_t ctx,
1885	nfsnode_t *npp)
1886{
1887	int error = 0;
1888	struct nfs_vattr nvattr;
1889	u_int64_t xid;
1890
1891	*npp = NULL;
1892
1893	if (!nmp->nm_fh)
1894		return (EINVAL);
1895
1896	/*
1897	 * Get file attributes for the mountpoint.  These are needed
1898	 * in order to properly create the root vnode.
1899	 */
1900	error = nfs3_getattr_rpc(NULL, nmp->nm_mountp, nmp->nm_fh->fh_data, nmp->nm_fh->fh_len, 0,
1901			ctx, &nvattr, &xid);
1902	if (error)
1903		goto out;
1904
1905	error = nfs_nget(nmp->nm_mountp, NULL, NULL, nmp->nm_fh->fh_data, nmp->nm_fh->fh_len,
1906			&nvattr, &xid, RPCAUTH_UNKNOWN, NG_MARKROOT, npp);
1907	if (*npp)
1908		nfs_node_unlock(*npp);
1909	if (error)
1910		goto out;
1911
1912	/*
1913	 * Try to make sure we have all the general info from the server.
1914	 */
1915	if (nmp->nm_vers == NFS_VER2) {
1916		NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME);
1917		nmp->nm_fsattr.nfsa_maxname = NFS_MAXNAMLEN;
1918	} else if (nmp->nm_vers == NFS_VER3) {
1919		/* get the NFSv3 FSINFO */
1920		error = nfs3_fsinfo(nmp, *npp, ctx);
1921		if (error)
1922			goto out;
1923		/* If the server indicates all pathconf info is */
1924		/* the same, grab a copy of that info now */
1925		if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_HOMOGENEOUS) &&
1926		    (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS)) {
1927			struct nfs_fsattr nfsa;
1928			if (!nfs3_pathconf_rpc(*npp, &nfsa, ctx)) {
1929				/* cache a copy of the results */
1930				lck_mtx_lock(&nmp->nm_lock);
1931				nfs3_pathconf_cache(nmp, &nfsa);
1932				lck_mtx_unlock(&nmp->nm_lock);
1933			}
1934		}
1935	}
1936out:
1937	if (*npp && error) {
1938		vnode_put(NFSTOV(*npp));
1939		vnode_recycle(NFSTOV(*npp));
1940		*npp = NULL;
1941	}
1942	return (error);
1943}
1944
1945/*
1946 * Update an NFSv4 mount path with the contents of the symlink.
1947 *
1948 * Read the link for the given file handle.
1949 * Insert the link's components into the path.
1950 */
1951int
1952nfs4_mount_update_path_with_symlink(struct nfsmount *nmp, struct nfs_fs_path *nfsp, uint32_t curcomp, fhandle_t *dirfhp, int *depthp, fhandle_t *fhp, vfs_context_t ctx)
1953{
1954	int error = 0, status, numops;
1955	uint32_t len = 0, comp, newcomp, linkcompcount;
1956	u_int64_t xid;
1957	struct nfsm_chain nmreq, nmrep;
1958	struct nfsreq rq, *req = &rq;
1959	struct nfsreq_secinfo_args si;
1960	char *link = NULL, *p, *q, ch;
1961	struct nfs_fs_path nfsp2;
1962
1963	bzero(&nfsp2, sizeof(nfsp2));
1964	if (dirfhp->fh_len)
1965		NFSREQ_SECINFO_SET(&si, NULL, dirfhp->fh_data, dirfhp->fh_len, nfsp->np_components[curcomp], 0);
1966	else
1967		NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, nfsp->np_components[curcomp], 0);
1968	nfsm_chain_null(&nmreq);
1969	nfsm_chain_null(&nmrep);
1970
1971	MALLOC_ZONE(link, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1972	if (!link)
1973		error = ENOMEM;
1974
1975	// PUTFH, READLINK
1976	numops = 2;
1977	nfsm_chain_build_alloc_init(error, &nmreq, 12 * NFSX_UNSIGNED);
1978	nfsm_chain_add_compound_header(error, &nmreq, "readlink", numops);
1979	numops--;
1980	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
1981	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, fhp->fh_data, fhp->fh_len);
1982	numops--;
1983	nfsm_chain_add_32(error, &nmreq, NFS_OP_READLINK);
1984	nfsm_chain_build_done(error, &nmreq);
1985	nfsm_assert(error, (numops == 0), EPROTO);
1986	nfsmout_if(error);
1987
1988	error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
1989			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
1990	if (!error)
1991		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
1992
1993	nfsm_chain_skip_tag(error, &nmrep);
1994	nfsm_chain_get_32(error, &nmrep, numops);
1995	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
1996	nfsm_chain_op_check(error, &nmrep, NFS_OP_READLINK);
1997	nfsm_chain_get_32(error, &nmrep, len);
1998	nfsmout_if(error);
1999	if (len == 0)
2000		error = ENOENT;
2001	else if (len >= MAXPATHLEN)
2002		len = MAXPATHLEN - 1;
2003	nfsm_chain_get_opaque(error, &nmrep, len, link);
2004	nfsmout_if(error);
2005	/* make sure link string is terminated properly */
2006	link[len] = '\0';
2007
2008	/* count the number of components in link */
2009	p = link;
2010	while (*p && (*p == '/'))
2011		p++;
2012	linkcompcount = 0;
2013	while (*p) {
2014		linkcompcount++;
2015		while (*p && (*p != '/'))
2016			p++;
2017		while (*p && (*p == '/'))
2018			p++;
2019	}
2020
2021	/* free up used components */
2022	for (comp=0; comp <= curcomp; comp++) {
2023		if (nfsp->np_components[comp]) {
2024			FREE(nfsp->np_components[comp], M_TEMP);
2025			nfsp->np_components[comp] = NULL;
2026		}
2027	}
2028
2029	/* set up new path */
2030	nfsp2.np_compcount = nfsp->np_compcount - curcomp - 1 + linkcompcount;
2031	MALLOC(nfsp2.np_components, char **, nfsp2.np_compcount*sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
2032	if (!nfsp2.np_components) {
2033		error = ENOMEM;
2034		goto nfsmout;
2035	}
2036
2037	/* add link components */
2038	p = link;
2039	while (*p && (*p == '/'))
2040		p++;
2041	for (newcomp=0; newcomp < linkcompcount; newcomp++) {
2042		/* find end of component */
2043		q = p;
2044		while (*q && (*q != '/'))
2045			q++;
2046		MALLOC(nfsp2.np_components[newcomp], char *, q-p+1, M_TEMP, M_WAITOK|M_ZERO);
2047		if (!nfsp2.np_components[newcomp]) {
2048			error = ENOMEM;
2049			break;
2050		}
2051		ch = *q;
2052		*q = '\0';
2053		strlcpy(nfsp2.np_components[newcomp], p, q-p+1);
2054		*q = ch;
2055		p = q;
2056		while (*p && (*p == '/'))
2057			p++;
2058	}
2059	nfsmout_if(error);
2060
2061	/* add remaining components */
2062	for(comp = curcomp + 1; comp < nfsp->np_compcount; comp++,newcomp++) {
2063		nfsp2.np_components[newcomp] = nfsp->np_components[comp];
2064		nfsp->np_components[comp] = NULL;
2065	}
2066
2067	/* move new path into place */
2068	FREE(nfsp->np_components, M_TEMP);
2069	nfsp->np_components = nfsp2.np_components;
2070	nfsp->np_compcount = nfsp2.np_compcount;
2071	nfsp2.np_components = NULL;
2072
2073	/* for absolute link, let the caller now that the next dirfh is root */
2074	if (link[0] == '/') {
2075		dirfhp->fh_len = 0;
2076		*depthp = 0;
2077	}
2078nfsmout:
2079	if (link)
2080		FREE_ZONE(link, MAXPATHLEN, M_NAMEI);
2081	if (nfsp2.np_components) {
2082		for (comp=0; comp < nfsp2.np_compcount; comp++)
2083			if (nfsp2.np_components[comp])
2084				FREE(nfsp2.np_components[comp], M_TEMP);
2085		FREE(nfsp2.np_components, M_TEMP);
2086	}
2087	nfsm_chain_cleanup(&nmreq);
2088	nfsm_chain_cleanup(&nmrep);
2089	return (error);
2090}
2091
2092/* Set up an NFSv4 mount */
2093int
2094nfs4_mount(
2095	struct nfsmount *nmp,
2096	vfs_context_t ctx,
2097	nfsnode_t *npp)
2098{
2099	struct nfsm_chain nmreq, nmrep;
2100	int error = 0, numops, status, interval, isdotdot, loopcnt = 0, depth = 0;
2101	struct nfs_fs_path fspath, *nfsp, fspath2;
2102	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], comp, comp2;
2103	fhandle_t fh, dirfh;
2104	struct nfs_vattr nvattr;
2105	u_int64_t xid;
2106	struct nfsreq rq, *req = &rq;
2107	struct nfsreq_secinfo_args si;
2108	struct nfs_sec sec;
2109	struct nfs_fs_locations nfsls;
2110
2111	*npp = NULL;
2112	fh.fh_len = dirfh.fh_len = 0;
2113	TAILQ_INIT(&nmp->nm_open_owners);
2114	TAILQ_INIT(&nmp->nm_delegations);
2115	TAILQ_INIT(&nmp->nm_dreturnq);
2116	nmp->nm_stategenid = 1;
2117	NVATTR_INIT(&nvattr);
2118	bzero(&nfsls, sizeof(nfsls));
2119	nfsm_chain_null(&nmreq);
2120	nfsm_chain_null(&nmrep);
2121
2122	/*
2123	 * If no security flavors were specified we'll want to default to the server's
2124	 * preferred flavor.  For NFSv4.0 we need a file handle and name to get that via
2125	 * SECINFO, so we'll do that on the last component of the server path we are
2126	 * mounting.  If we are mounting the server's root, we'll need to defer the
2127	 * SECINFO call to the first successful LOOKUP request.
2128	 */
2129	if (!nmp->nm_sec.count)
2130		nmp->nm_state |= NFSSTA_NEEDSECINFO;
2131
2132	/* make a copy of the current location's path */
2133	nfsp = &nmp->nm_locations.nl_locations[nmp->nm_locations.nl_current.nli_loc]->nl_path;
2134	bzero(&fspath, sizeof(fspath));
2135	fspath.np_compcount = nfsp->np_compcount;
2136	if (fspath.np_compcount > 0) {
2137		MALLOC(fspath.np_components, char **, fspath.np_compcount*sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
2138		if (!fspath.np_components) {
2139			error = ENOMEM;
2140			goto nfsmout;
2141		}
2142		for (comp=0; comp < nfsp->np_compcount; comp++) {
2143			int slen = strlen(nfsp->np_components[comp]);
2144			MALLOC(fspath.np_components[comp], char *, slen+1, M_TEMP, M_WAITOK|M_ZERO);
2145			if (!fspath.np_components[comp]) {
2146				error = ENOMEM;
2147				break;
2148			}
2149			strlcpy(fspath.np_components[comp], nfsp->np_components[comp], slen+1);
2150		}
2151		if (error)
2152			goto nfsmout;
2153	}
2154
2155	/* for mirror mounts, we can just use the file handle passed in */
2156	if (nmp->nm_fh) {
2157		dirfh.fh_len = nmp->nm_fh->fh_len;
2158		bcopy(nmp->nm_fh->fh_data, dirfh.fh_data, dirfh.fh_len);
2159		NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, NULL, 0);
2160		goto gotfh;
2161	}
2162
2163	/* otherwise, we need to get the fh for the directory we are mounting */
2164
2165	/* if no components, just get root */
2166	if (fspath.np_compcount == 0) {
2167nocomponents:
2168		// PUTROOTFH + GETATTR(FH)
2169		NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, NULL, 0);
2170		numops = 2;
2171		nfsm_chain_build_alloc_init(error, &nmreq, 9 * NFSX_UNSIGNED);
2172		nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
2173		numops--;
2174		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTROOTFH);
2175		numops--;
2176		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
2177		NFS_CLEAR_ATTRIBUTES(bitmap);
2178		NFS4_DEFAULT_ATTRIBUTES(bitmap);
2179		NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
2180		nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
2181		nfsm_chain_build_done(error, &nmreq);
2182		nfsm_assert(error, (numops == 0), EPROTO);
2183		nfsmout_if(error);
2184		error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
2185				vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
2186		if (!error)
2187			error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2188		nfsm_chain_skip_tag(error, &nmrep);
2189		nfsm_chain_get_32(error, &nmrep, numops);
2190		nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTROOTFH);
2191		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
2192		nfsmout_if(error);
2193		NFS_CLEAR_ATTRIBUTES(nmp->nm_fsattr.nfsa_bitmap);
2194		error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, &dirfh, NULL, NULL);
2195		if (!error && !NFS_BITMAP_ISSET(&nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
2196			printf("nfs: mount didn't return filehandle?\n");
2197			error = EBADRPC;
2198		}
2199		nfsmout_if(error);
2200		nfsm_chain_cleanup(&nmrep);
2201		nfsm_chain_null(&nmreq);
2202		NVATTR_CLEANUP(&nvattr);
2203		goto gotfh;
2204	}
2205
2206	/* look up each path component */
2207	for (comp=0; comp < fspath.np_compcount; ) {
2208		isdotdot = 0;
2209		if (fspath.np_components[comp][0] == '.') {
2210			if (fspath.np_components[comp][1] == '\0') {
2211				/* skip "." */
2212				comp++;
2213				continue;
2214			}
2215			/* treat ".." specially */
2216			if ((fspath.np_components[comp][1] == '.') &&
2217			    (fspath.np_components[comp][2] == '\0'))
2218			    	isdotdot = 1;
2219			if (isdotdot && (dirfh.fh_len == 0)) {
2220				/* ".." in root directory is same as "." */
2221				comp++;
2222				continue;
2223			}
2224		}
2225		// PUT(ROOT)FH + LOOKUP(P) + GETFH + GETATTR
2226		if (dirfh.fh_len == 0)
2227			NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, isdotdot ? NULL : fspath.np_components[comp], 0);
2228		else
2229			NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, isdotdot ? NULL : fspath.np_components[comp], 0);
2230		numops = 4;
2231		nfsm_chain_build_alloc_init(error, &nmreq, 18 * NFSX_UNSIGNED);
2232		nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
2233		numops--;
2234		if (dirfh.fh_len) {
2235			nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
2236			nfsm_chain_add_fh(error, &nmreq, NFS_VER4, dirfh.fh_data, dirfh.fh_len);
2237		} else {
2238			nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTROOTFH);
2239		}
2240		numops--;
2241		if (isdotdot) {
2242			nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUPP);
2243		} else {
2244			nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUP);
2245			nfsm_chain_add_name(error, &nmreq,
2246				fspath.np_components[comp], strlen(fspath.np_components[comp]), nmp);
2247		}
2248		numops--;
2249		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETFH);
2250		numops--;
2251		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
2252		NFS_CLEAR_ATTRIBUTES(bitmap);
2253		NFS4_DEFAULT_ATTRIBUTES(bitmap);
2254		/* if no namedattr support or component is ".zfs", clear NFS_FATTR_NAMED_ATTR */
2255		if (NMFLAG(nmp, NONAMEDATTR) || !strcmp(fspath.np_components[comp], ".zfs"))
2256			NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
2257		nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
2258		nfsm_chain_build_done(error, &nmreq);
2259		nfsm_assert(error, (numops == 0), EPROTO);
2260		nfsmout_if(error);
2261		error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
2262				vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
2263		if (!error)
2264			error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2265		nfsm_chain_skip_tag(error, &nmrep);
2266		nfsm_chain_get_32(error, &nmrep, numops);
2267		nfsm_chain_op_check(error, &nmrep, dirfh.fh_len ? NFS_OP_PUTFH : NFS_OP_PUTROOTFH);
2268		nfsm_chain_op_check(error, &nmrep, isdotdot ? NFS_OP_LOOKUPP : NFS_OP_LOOKUP);
2269		nfsmout_if(error);
2270		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETFH);
2271		nfsm_chain_get_32(error, &nmrep, fh.fh_len);
2272		nfsm_chain_get_opaque(error, &nmrep, fh.fh_len, fh.fh_data);
2273		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
2274		if (!error) {
2275			NFS_CLEAR_ATTRIBUTES(nmp->nm_fsattr.nfsa_bitmap);
2276			error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL, &nfsls);
2277		}
2278		nfsm_chain_cleanup(&nmrep);
2279		nfsm_chain_null(&nmreq);
2280		if (error) {
2281			/* LOOKUP succeeded but GETATTR failed?  This could be a referral. */
2282			/* Try the lookup again with a getattr for fs_locations. */
2283			nfs_fs_locations_cleanup(&nfsls);
2284			error = nfs4_get_fs_locations(nmp, NULL, dirfh.fh_data, dirfh.fh_len, fspath.np_components[comp], ctx, &nfsls);
2285			if (!error && (nfsls.nl_numlocs < 1))
2286				error = ENOENT;
2287			nfsmout_if(error);
2288			if (++loopcnt > MAXSYMLINKS) {
2289				/* too many symlink/referral redirections */
2290				error = ELOOP;
2291				goto nfsmout;
2292			}
2293			/* tear down the current connection */
2294			nfs_disconnect(nmp);
2295			/* replace fs locations */
2296			nfs_fs_locations_cleanup(&nmp->nm_locations);
2297			nmp->nm_locations = nfsls;
2298			bzero(&nfsls, sizeof(nfsls));
2299			/* initiate a connection using the new fs locations */
2300			error = nfs_mount_connect(nmp);
2301			if (!error && !(nmp->nm_locations.nl_current.nli_flags & NLI_VALID))
2302				error = EIO;
2303			nfsmout_if(error);
2304			/* add new server's remote path to beginning of our path and continue */
2305			nfsp = &nmp->nm_locations.nl_locations[nmp->nm_locations.nl_current.nli_loc]->nl_path;
2306			bzero(&fspath2, sizeof(fspath2));
2307			fspath2.np_compcount = (fspath.np_compcount - comp - 1) + nfsp->np_compcount;
2308			if (fspath2.np_compcount > 0) {
2309				MALLOC(fspath2.np_components, char **, fspath2.np_compcount*sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
2310				if (!fspath2.np_components) {
2311					error = ENOMEM;
2312					goto nfsmout;
2313				}
2314				for (comp2=0; comp2 < nfsp->np_compcount; comp2++) {
2315					int slen = strlen(nfsp->np_components[comp2]);
2316					MALLOC(fspath2.np_components[comp2], char *, slen+1, M_TEMP, M_WAITOK|M_ZERO);
2317					if (!fspath2.np_components[comp2]) {
2318						/* clean up fspath2, then error out */
2319						while (comp2 > 0) {
2320							comp2--;
2321							FREE(fspath2.np_components[comp2], M_TEMP);
2322						}
2323						FREE(fspath2.np_components, M_TEMP);
2324						error = ENOMEM;
2325						goto nfsmout;
2326					}
2327					strlcpy(fspath2.np_components[comp2], nfsp->np_components[comp2], slen+1);
2328				}
2329				if ((fspath.np_compcount - comp - 1) > 0)
2330					bcopy(&fspath.np_components[comp+1], &fspath2.np_components[nfsp->np_compcount], (fspath.np_compcount - comp - 1)*sizeof(char*));
2331				/* free up unused parts of old path (prior components and component array) */
2332				do {
2333					FREE(fspath.np_components[comp], M_TEMP);
2334				} while (comp-- > 0);
2335				FREE(fspath.np_components, M_TEMP);
2336				/* put new path in place */
2337				fspath = fspath2;
2338			}
2339			/* reset dirfh and component index */
2340			dirfh.fh_len = 0;
2341			comp = 0;
2342			NVATTR_CLEANUP(&nvattr);
2343			if (fspath.np_compcount == 0)
2344				goto nocomponents;
2345			continue;
2346		}
2347		nfsmout_if(error);
2348		/* if file handle is for a symlink, then update the path with the symlink contents */
2349		if (NFS_BITMAP_ISSET(&nvattr.nva_bitmap, NFS_FATTR_TYPE) && (nvattr.nva_type == VLNK)) {
2350			if (++loopcnt > MAXSYMLINKS)
2351				error = ELOOP;
2352			else
2353				error = nfs4_mount_update_path_with_symlink(nmp, &fspath, comp, &dirfh, &depth, &fh, ctx);
2354			nfsmout_if(error);
2355			/* directory file handle is either left the same or reset to root (if link was absolute) */
2356			/* path traversal starts at beginning of the path again */
2357			comp = 0;
2358			NVATTR_CLEANUP(&nvattr);
2359			nfs_fs_locations_cleanup(&nfsls);
2360			continue;
2361		}
2362		NVATTR_CLEANUP(&nvattr);
2363		nfs_fs_locations_cleanup(&nfsls);
2364		/* not a symlink... */
2365		if ((nmp->nm_state & NFSSTA_NEEDSECINFO) && (comp == (fspath.np_compcount-1)) && !isdotdot) {
2366			/* need to get SECINFO for the directory being mounted */
2367			if (dirfh.fh_len == 0)
2368				NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, isdotdot ? NULL : fspath.np_components[comp], 0);
2369			else
2370				NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, isdotdot ? NULL : fspath.np_components[comp], 0);
2371			sec.count = NX_MAX_SEC_FLAVORS;
2372			error = nfs4_secinfo_rpc(nmp, &si, vfs_context_ucred(ctx), sec.flavors, &sec.count);
2373			/* [sigh] some implementations return "illegal" error for unsupported ops */
2374			if (error == NFSERR_OP_ILLEGAL)
2375				error = 0;
2376			nfsmout_if(error);
2377			/* set our default security flavor to the first in the list */
2378			if (sec.count)
2379				nmp->nm_auth = sec.flavors[0];
2380			nmp->nm_state &= ~NFSSTA_NEEDSECINFO;
2381		}
2382		/* advance directory file handle, component index, & update depth */
2383		dirfh = fh;
2384		comp++;
2385		if (!isdotdot) /* going down the hierarchy */
2386			depth++;
2387		else if (--depth <= 0)  /* going up the hierarchy */
2388			dirfh.fh_len = 0; /* clear dirfh when we hit root */
2389	}
2390
2391gotfh:
2392	/* get attrs for mount point root */
2393	numops = NMFLAG(nmp, NONAMEDATTR) ? 2 : 3; // PUTFH + GETATTR + OPENATTR
2394	nfsm_chain_build_alloc_init(error, &nmreq, 25 * NFSX_UNSIGNED);
2395	nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
2396	numops--;
2397	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
2398	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, dirfh.fh_data, dirfh.fh_len);
2399	numops--;
2400	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
2401	NFS_CLEAR_ATTRIBUTES(bitmap);
2402	NFS4_DEFAULT_ATTRIBUTES(bitmap);
2403	/* if no namedattr support or last component is ".zfs", clear NFS_FATTR_NAMED_ATTR */
2404	if (NMFLAG(nmp, NONAMEDATTR) || ((fspath.np_compcount > 0) && !strcmp(fspath.np_components[fspath.np_compcount-1], ".zfs")))
2405		NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
2406	nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
2407	if (!NMFLAG(nmp, NONAMEDATTR)) {
2408		numops--;
2409		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPENATTR);
2410		nfsm_chain_add_32(error, &nmreq, 0);
2411	}
2412	nfsm_chain_build_done(error, &nmreq);
2413	nfsm_assert(error, (numops == 0), EPROTO);
2414	nfsmout_if(error);
2415	error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
2416			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
2417	if (!error)
2418		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2419	nfsm_chain_skip_tag(error, &nmrep);
2420	nfsm_chain_get_32(error, &nmrep, numops);
2421	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
2422	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
2423	nfsmout_if(error);
2424	NFS_CLEAR_ATTRIBUTES(nmp->nm_fsattr.nfsa_bitmap);
2425	error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL, NULL);
2426	nfsmout_if(error);
2427	if (!NMFLAG(nmp, NONAMEDATTR)) {
2428		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPENATTR);
2429		if (error == ENOENT)
2430			error = 0;
2431		/* [sigh] some implementations return "illegal" error for unsupported ops */
2432		if (error || !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_NAMED_ATTR)) {
2433			nmp->nm_fsattr.nfsa_flags &= ~NFS_FSFLAG_NAMED_ATTR;
2434		} else {
2435			nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_NAMED_ATTR;
2436		}
2437	} else {
2438		nmp->nm_fsattr.nfsa_flags &= ~NFS_FSFLAG_NAMED_ATTR;
2439	}
2440	if (NMFLAG(nmp, NOACL)) /* make sure ACL support is turned off */
2441		nmp->nm_fsattr.nfsa_flags &= ~NFS_FSFLAG_ACL;
2442	if (NMFLAG(nmp, ACLONLY) && !(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL))
2443		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_ACLONLY);
2444	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_FH_EXPIRE_TYPE)) {
2445		uint32_t fhtype = ((nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_FHTYPE_MASK) >> NFS_FSFLAG_FHTYPE_SHIFT);
2446		if (fhtype != NFS_FH_PERSISTENT)
2447			printf("nfs: warning: non-persistent file handles! for %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
2448	}
2449
2450	/* make sure it's a directory */
2451	if (!NFS_BITMAP_ISSET(&nvattr.nva_bitmap, NFS_FATTR_TYPE) || (nvattr.nva_type != VDIR)) {
2452		error = ENOTDIR;
2453		goto nfsmout;
2454	}
2455
2456	/* save the NFS fsid */
2457	nmp->nm_fsid = nvattr.nva_fsid;
2458
2459	/* create the root node */
2460	error = nfs_nget(nmp->nm_mountp, NULL, NULL, dirfh.fh_data, dirfh.fh_len, &nvattr, &xid, rq.r_auth, NG_MARKROOT, npp);
2461	nfsmout_if(error);
2462
2463	if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL)
2464		vfs_setextendedsecurity(nmp->nm_mountp);
2465
2466	/* adjust I/O sizes to server limits */
2467	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXREAD) && (nmp->nm_fsattr.nfsa_maxread > 0)) {
2468		if (nmp->nm_fsattr.nfsa_maxread < (uint64_t)nmp->nm_rsize) {
2469			nmp->nm_rsize = nmp->nm_fsattr.nfsa_maxread & ~(NFS_FABLKSIZE - 1);
2470			if (nmp->nm_rsize == 0)
2471				nmp->nm_rsize = nmp->nm_fsattr.nfsa_maxread;
2472		}
2473	}
2474	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXWRITE) && (nmp->nm_fsattr.nfsa_maxwrite > 0)) {
2475		if (nmp->nm_fsattr.nfsa_maxwrite < (uint64_t)nmp->nm_wsize) {
2476			nmp->nm_wsize = nmp->nm_fsattr.nfsa_maxwrite & ~(NFS_FABLKSIZE - 1);
2477			if (nmp->nm_wsize == 0)
2478				nmp->nm_wsize = nmp->nm_fsattr.nfsa_maxwrite;
2479		}
2480	}
2481
2482	/* set up lease renew timer */
2483	nmp->nm_renew_timer = thread_call_allocate(nfs4_renew_timer, nmp);
2484	interval = nmp->nm_fsattr.nfsa_lease / 2;
2485	if (interval < 1)
2486		interval = 1;
2487	nfs_interval_timer_start(nmp->nm_renew_timer, interval * 1000);
2488
2489nfsmout:
2490	if (fspath.np_components) {
2491		for (comp=0; comp < fspath.np_compcount; comp++)
2492			if (fspath.np_components[comp])
2493				FREE(fspath.np_components[comp], M_TEMP);
2494		FREE(fspath.np_components, M_TEMP);
2495	}
2496	NVATTR_CLEANUP(&nvattr);
2497	nfs_fs_locations_cleanup(&nfsls);
2498	if (*npp)
2499		nfs_node_unlock(*npp);
2500	nfsm_chain_cleanup(&nmreq);
2501	nfsm_chain_cleanup(&nmrep);
2502	return (error);
2503}
2504
2505/*
2506 * Thread to handle initial NFS mount connection.
2507 */
2508void
2509nfs_mount_connect_thread(void *arg, __unused wait_result_t wr)
2510{
2511	struct nfsmount *nmp = arg;
2512	int error = 0, savederror = 0, slpflag = (NMFLAG(nmp, INTR) ? PCATCH : 0);
2513	int done = 0, timeo, tries, maxtries;
2514
2515	if (NM_OMFLAG(nmp, MNTQUICK)) {
2516		timeo = 8;
2517		maxtries = 1;
2518	} else {
2519		timeo = 30;
2520		maxtries = 2;
2521	}
2522
2523	for (tries = 0; tries < maxtries; tries++) {
2524		error = nfs_connect(nmp, 1, timeo);
2525		switch (error) {
2526		case ETIMEDOUT:
2527		case EAGAIN:
2528		case EPIPE:
2529		case EADDRNOTAVAIL:
2530		case ENETDOWN:
2531		case ENETUNREACH:
2532		case ENETRESET:
2533		case ECONNABORTED:
2534		case ECONNRESET:
2535		case EISCONN:
2536		case ENOTCONN:
2537		case ESHUTDOWN:
2538		case ECONNREFUSED:
2539		case EHOSTDOWN:
2540		case EHOSTUNREACH:
2541			/* just keep retrying on any of these errors */
2542			break;
2543		case 0:
2544		default:
2545			/* looks like we got an answer... */
2546			done = 1;
2547			break;
2548		}
2549
2550		/* save the best error */
2551		if (nfs_connect_error_class(error) >= nfs_connect_error_class(savederror))
2552			savederror = error;
2553		if (done) {
2554			error = savederror;
2555			break;
2556		}
2557
2558		/* pause before next attempt */
2559		if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0)))
2560			break;
2561		error = tsleep(nmp, PSOCK|slpflag, "nfs_mount_connect_retry", 2*hz);
2562		if (error && (error != EWOULDBLOCK))
2563			break;
2564		error = savederror;
2565	}
2566
2567	/* update status of mount connect */
2568	lck_mtx_lock(&nmp->nm_lock);
2569	if (!nmp->nm_mounterror)
2570		nmp->nm_mounterror = error;
2571	nmp->nm_state &= ~NFSSTA_MOUNT_THREAD;
2572	lck_mtx_unlock(&nmp->nm_lock);
2573	wakeup(&nmp->nm_nss);
2574}
2575
2576int
2577nfs_mount_connect(struct nfsmount *nmp)
2578{
2579	int error = 0, slpflag;
2580	thread_t thd;
2581	struct timespec ts = { 2, 0 };
2582
2583	/*
2584	 * Set up the socket.  Perform initial search for a location/server/address to
2585	 * connect to and negotiate any unspecified mount parameters.  This work is
2586	 * done on a kernel thread to satisfy reserved port usage needs.
2587	 */
2588	slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
2589	lck_mtx_lock(&nmp->nm_lock);
2590	/* set flag that the thread is running */
2591	nmp->nm_state |= NFSSTA_MOUNT_THREAD;
2592	if (kernel_thread_start(nfs_mount_connect_thread, nmp, &thd) != KERN_SUCCESS) {
2593		nmp->nm_state &= ~NFSSTA_MOUNT_THREAD;
2594		nmp->nm_mounterror = EIO;
2595		printf("nfs mount %s start socket connect thread failed\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
2596	} else {
2597		thread_deallocate(thd);
2598	}
2599
2600	/* wait until mount connect thread is finished/gone */
2601	while (nmp->nm_state & NFSSTA_MOUNT_THREAD) {
2602		error = msleep(&nmp->nm_nss, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectthread", &ts);
2603		if ((error && (error != EWOULDBLOCK)) || ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))) {
2604			/* record error */
2605			if (!nmp->nm_mounterror)
2606				nmp->nm_mounterror = error;
2607			/* signal the thread that we are aborting */
2608			nmp->nm_sockflags |= NMSOCK_UNMOUNT;
2609			if (nmp->nm_nss)
2610				wakeup(nmp->nm_nss);
2611			/* and continue waiting on it to finish */
2612			slpflag = 0;
2613		}
2614	}
2615	lck_mtx_unlock(&nmp->nm_lock);
2616
2617	/* grab mount connect status */
2618	error = nmp->nm_mounterror;
2619
2620	return (error);
2621}
2622
2623/*
2624 * Common code to mount an NFS file system.
2625 */
2626int
2627mountnfs(
2628	char *xdrbuf,
2629	mount_t mp,
2630	vfs_context_t ctx,
2631	vnode_t *vpp)
2632{
2633	struct nfsmount *nmp;
2634	nfsnode_t np;
2635	int error = 0;
2636	struct vfsstatfs *sbp;
2637	struct xdrbuf xb;
2638	uint32_t i, val, vers = 0, minorvers, maxio, iosize, len;
2639	uint32_t *mattrs;
2640	uint32_t *mflags_mask;
2641	uint32_t *mflags;
2642	uint32_t argslength, attrslength;
2643	struct nfs_location_index firstloc = { NLI_VALID, 0, 0, 0 };
2644
2645	/* make sure mbuf constants are set up */
2646	if (!nfs_mbuf_mhlen)
2647		nfs_mbuf_init();
2648
2649	if (vfs_flags(mp) & MNT_UPDATE) {
2650		nmp = VFSTONFS(mp);
2651		/* update paths, file handles, etc, here	XXX */
2652		xb_free(xdrbuf);
2653		return (0);
2654	} else {
2655		/* allocate an NFS mount structure for this mount */
2656		MALLOC_ZONE(nmp, struct nfsmount *,
2657				sizeof (struct nfsmount), M_NFSMNT, M_WAITOK);
2658		if (!nmp) {
2659			xb_free(xdrbuf);
2660			return (ENOMEM);
2661		}
2662		bzero((caddr_t)nmp, sizeof (struct nfsmount));
2663		lck_mtx_init(&nmp->nm_lock, nfs_mount_grp, LCK_ATTR_NULL);
2664		TAILQ_INIT(&nmp->nm_resendq);
2665		TAILQ_INIT(&nmp->nm_iodq);
2666		TAILQ_INIT(&nmp->nm_gsscl);
2667		LIST_INIT(&nmp->nm_monlist);
2668		vfs_setfsprivate(mp, nmp);
2669		vfs_getnewfsid(mp);
2670		nmp->nm_mountp = mp;
2671		vfs_setauthopaque(mp);
2672
2673		nfs_nhinit_finish();
2674
2675		nmp->nm_args = xdrbuf;
2676
2677		/* set up defaults */
2678		nmp->nm_vers = 0;
2679		nmp->nm_timeo = NFS_TIMEO;
2680		nmp->nm_retry = NFS_RETRANS;
2681		nmp->nm_sotype = 0;
2682		nmp->nm_sofamily = 0;
2683		nmp->nm_nfsport = 0;
2684		nmp->nm_wsize = NFS_WSIZE;
2685		nmp->nm_rsize = NFS_RSIZE;
2686		nmp->nm_readdirsize = NFS_READDIRSIZE;
2687		nmp->nm_numgrps = NFS_MAXGRPS;
2688		nmp->nm_readahead = NFS_DEFRAHEAD;
2689		nmp->nm_tprintf_delay = nfs_tprintf_delay;
2690		if (nmp->nm_tprintf_delay < 0)
2691			nmp->nm_tprintf_delay = 0;
2692		nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
2693		if (nmp->nm_tprintf_initial_delay < 0)
2694			nmp->nm_tprintf_initial_delay = 0;
2695		nmp->nm_acregmin = NFS_MINATTRTIMO;
2696		nmp->nm_acregmax = NFS_MAXATTRTIMO;
2697		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
2698		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
2699		nmp->nm_auth = RPCAUTH_SYS;
2700		nmp->nm_deadtimeout = 0;
2701		nmp->nm_curdeadtimeout = 0;
2702		NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_NOACL);
2703		nmp->nm_realm = NULL;
2704		nmp->nm_principal = NULL;
2705		nmp->nm_sprinc = NULL;
2706	}
2707
2708	mattrs = nmp->nm_mattrs;
2709	mflags = nmp->nm_mflags;
2710	mflags_mask = nmp->nm_mflags_mask;
2711
2712	/* set up NFS mount with args */
2713	xb_init_buffer(&xb, xdrbuf, 2*XDRWORD);
2714	xb_get_32(error, &xb, val); /* version */
2715	xb_get_32(error, &xb, argslength); /* args length */
2716	nfsmerr_if(error);
2717	xb_init_buffer(&xb, xdrbuf, argslength);	/* restart parsing with actual buffer length */
2718	xb_get_32(error, &xb, val); /* version */
2719	xb_get_32(error, &xb, argslength); /* args length */
2720	xb_get_32(error, &xb, val); /* XDR args version */
2721	if (val != NFS_XDRARGS_VERSION_0)
2722		error = EINVAL;
2723	len = NFS_MATTR_BITMAP_LEN;
2724	xb_get_bitmap(error, &xb, mattrs, len); /* mount attribute bitmap */
2725	attrslength = 0;
2726	xb_get_32(error, &xb, attrslength); /* attrs length */
2727	if (!error && (attrslength > (argslength - ((4+NFS_MATTR_BITMAP_LEN+1)*XDRWORD))))
2728		error = EINVAL;
2729	nfsmerr_if(error);
2730	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FLAGS)) {
2731		len = NFS_MFLAG_BITMAP_LEN;
2732		xb_get_bitmap(error, &xb, mflags_mask, len); /* mount flag mask */
2733		len = NFS_MFLAG_BITMAP_LEN;
2734		xb_get_bitmap(error, &xb, mflags, len); /* mount flag values */
2735		if (!error) {
2736			/* clear all mask bits and OR in all the ones that are set */
2737			nmp->nm_flags[0] &= ~mflags_mask[0];
2738			nmp->nm_flags[0] |= (mflags_mask[0] & mflags[0]);
2739		}
2740	}
2741	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION)) {
2742		xb_get_32(error, &xb, vers);
2743		if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION))
2744			xb_get_32(error, &xb, minorvers);
2745		else
2746			minorvers = 0;
2747		nfsmerr_if(error);
2748		switch (vers) {
2749		case 2:
2750			nmp->nm_vers = NFS_VER2;
2751			break;
2752		case 3:
2753			nmp->nm_vers = NFS_VER3;
2754			break;
2755		case 4:
2756			switch (minorvers) {
2757			case 0:
2758				nmp->nm_vers = NFS_VER4;
2759				break;
2760			default:
2761				error = EINVAL;
2762			}
2763			break;
2764		default:
2765			error = EINVAL;
2766		}
2767	}
2768	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION)) {
2769		/* should have also gotten NFS version (and already gotten minorvers) */
2770		if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION))
2771			error = EINVAL;
2772	}
2773	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
2774		xb_get_32(error, &xb, nmp->nm_rsize);
2775	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
2776		xb_get_32(error, &xb, nmp->nm_wsize);
2777	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READDIR_SIZE))
2778		xb_get_32(error, &xb, nmp->nm_readdirsize);
2779	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READAHEAD))
2780		xb_get_32(error, &xb, nmp->nm_readahead);
2781	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN)) {
2782		xb_get_32(error, &xb, nmp->nm_acregmin);
2783		xb_skip(error, &xb, XDRWORD);
2784	}
2785	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX)) {
2786		xb_get_32(error, &xb, nmp->nm_acregmax);
2787		xb_skip(error, &xb, XDRWORD);
2788	}
2789	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN)) {
2790		xb_get_32(error, &xb, nmp->nm_acdirmin);
2791		xb_skip(error, &xb, XDRWORD);
2792	}
2793	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX)) {
2794		xb_get_32(error, &xb, nmp->nm_acdirmax);
2795		xb_skip(error, &xb, XDRWORD);
2796	}
2797	nfsmerr_if(error);
2798	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_LOCK_MODE)) {
2799		xb_get_32(error, &xb, val);
2800		switch (val) {
2801		case NFS_LOCK_MODE_DISABLED:
2802		case NFS_LOCK_MODE_LOCAL:
2803			if (nmp->nm_vers >= NFS_VER4) {
2804				/* disabled/local lock mode only allowed on v2/v3 */
2805				error = EINVAL;
2806				break;
2807			}
2808			/* FALLTHROUGH */
2809		case NFS_LOCK_MODE_ENABLED:
2810			nmp->nm_lockmode = val;
2811			break;
2812		default:
2813			error = EINVAL;
2814		}
2815	}
2816	nfsmerr_if(error);
2817	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SECURITY)) {
2818		uint32_t seccnt;
2819		xb_get_32(error, &xb, seccnt);
2820		if (!error && ((seccnt < 1) || (seccnt > NX_MAX_SEC_FLAVORS)))
2821			error = EINVAL;
2822		nfsmerr_if(error);
2823		nmp->nm_sec.count = seccnt;
2824		for (i=0; i < seccnt; i++) {
2825			xb_get_32(error, &xb, nmp->nm_sec.flavors[i]);
2826			/* Check for valid security flavor */
2827			switch (nmp->nm_sec.flavors[i]) {
2828			case RPCAUTH_NONE:
2829			case RPCAUTH_SYS:
2830			case RPCAUTH_KRB5:
2831			case RPCAUTH_KRB5I:
2832			case RPCAUTH_KRB5P:
2833				break;
2834			default:
2835				error = EINVAL;
2836			}
2837		}
2838		/* start with the first flavor */
2839		nmp->nm_auth = nmp->nm_sec.flavors[0];
2840	}
2841	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MAX_GROUP_LIST))
2842		xb_get_32(error, &xb, nmp->nm_numgrps);
2843	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOCKET_TYPE)) {
2844		char sotype[6];
2845
2846		xb_get_32(error, &xb, val);
2847		if (!error && ((val < 3) || (val > 5)))
2848			error = EINVAL;
2849		nfsmerr_if(error);
2850		error = xb_get_bytes(&xb, sotype, val, 0);
2851		nfsmerr_if(error);
2852		sotype[val] = '\0';
2853		if (!strcmp(sotype, "tcp")) {
2854			nmp->nm_sotype = SOCK_STREAM;
2855		} else if (!strcmp(sotype, "udp")) {
2856			nmp->nm_sotype = SOCK_DGRAM;
2857		} else if (!strcmp(sotype, "tcp4")) {
2858			nmp->nm_sotype = SOCK_STREAM;
2859			nmp->nm_sofamily = AF_INET;
2860		} else if (!strcmp(sotype, "udp4")) {
2861			nmp->nm_sotype = SOCK_DGRAM;
2862			nmp->nm_sofamily = AF_INET;
2863		} else if (!strcmp(sotype, "tcp6")) {
2864			nmp->nm_sotype = SOCK_STREAM;
2865			nmp->nm_sofamily = AF_INET6;
2866		} else if (!strcmp(sotype, "udp6")) {
2867			nmp->nm_sotype = SOCK_DGRAM;
2868			nmp->nm_sofamily = AF_INET6;
2869		} else if (!strcmp(sotype, "inet4")) {
2870			nmp->nm_sofamily = AF_INET;
2871		} else if (!strcmp(sotype, "inet6")) {
2872			nmp->nm_sofamily = AF_INET6;
2873		} else if (!strcmp(sotype, "inet")) {
2874			nmp->nm_sofamily = 0; /* ok */
2875		} else {
2876			error = EINVAL;
2877		}
2878		if (!error && (nmp->nm_vers >= NFS_VER4) && nmp->nm_sotype &&
2879		    (nmp->nm_sotype != SOCK_STREAM))
2880			error = EINVAL;		/* NFSv4 is only allowed over TCP. */
2881		nfsmerr_if(error);
2882	}
2883	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_PORT))
2884		xb_get_32(error, &xb, nmp->nm_nfsport);
2885	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MOUNT_PORT))
2886		xb_get_32(error, &xb, nmp->nm_mountport);
2887	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REQUEST_TIMEOUT)) {
2888		/* convert from time to 0.1s units */
2889		xb_get_32(error, &xb, nmp->nm_timeo);
2890		xb_get_32(error, &xb, val);
2891		nfsmerr_if(error);
2892		if (val >= 1000000000)
2893			error = EINVAL;
2894		nfsmerr_if(error);
2895		nmp->nm_timeo *= 10;
2896		nmp->nm_timeo += (val+100000000-1)/100000000;
2897		/* now convert to ticks */
2898		nmp->nm_timeo = (nmp->nm_timeo * NFS_HZ + 5) / 10;
2899	}
2900	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT)) {
2901		xb_get_32(error, &xb, val);
2902		if (!error && (val > 1))
2903			nmp->nm_retry = val;
2904	}
2905	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_DEAD_TIMEOUT)) {
2906		xb_get_32(error, &xb, nmp->nm_deadtimeout);
2907		xb_skip(error, &xb, XDRWORD);
2908	}
2909	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FH)) {
2910		nfsmerr_if(error);
2911		MALLOC(nmp->nm_fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK|M_ZERO);
2912		if (!nmp->nm_fh)
2913			error = ENOMEM;
2914		xb_get_32(error, &xb, nmp->nm_fh->fh_len);
2915		nfsmerr_if(error);
2916		error = xb_get_bytes(&xb, (char*)&nmp->nm_fh->fh_data[0], nmp->nm_fh->fh_len, 0);
2917	}
2918	nfsmerr_if(error);
2919	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FS_LOCATIONS)) {
2920		uint32_t loc, serv, addr, comp;
2921		struct nfs_fs_location *fsl;
2922		struct nfs_fs_server *fss;
2923		struct nfs_fs_path *fsp;
2924
2925		xb_get_32(error, &xb, nmp->nm_locations.nl_numlocs); /* fs location count */
2926		/* sanity check location count */
2927		if (!error && ((nmp->nm_locations.nl_numlocs < 1) || (nmp->nm_locations.nl_numlocs > 256)))
2928			error = EINVAL;
2929		nfsmerr_if(error);
2930		MALLOC(nmp->nm_locations.nl_locations, struct nfs_fs_location **, nmp->nm_locations.nl_numlocs * sizeof(struct nfs_fs_location*), M_TEMP, M_WAITOK|M_ZERO);
2931		if (!nmp->nm_locations.nl_locations)
2932			error = ENOMEM;
2933		for (loc = 0; loc < nmp->nm_locations.nl_numlocs; loc++) {
2934			nfsmerr_if(error);
2935			MALLOC(fsl, struct nfs_fs_location *, sizeof(struct nfs_fs_location), M_TEMP, M_WAITOK|M_ZERO);
2936			if (!fsl)
2937				error = ENOMEM;
2938			nmp->nm_locations.nl_locations[loc] = fsl;
2939			xb_get_32(error, &xb, fsl->nl_servcount); /* server count */
2940			/* sanity check server count */
2941			if (!error && ((fsl->nl_servcount < 1) || (fsl->nl_servcount > 256)))
2942				error = EINVAL;
2943			nfsmerr_if(error);
2944			MALLOC(fsl->nl_servers, struct nfs_fs_server **, fsl->nl_servcount * sizeof(struct nfs_fs_server*), M_TEMP, M_WAITOK|M_ZERO);
2945			if (!fsl->nl_servers)
2946				error = ENOMEM;
2947			for (serv = 0; serv < fsl->nl_servcount; serv++) {
2948				nfsmerr_if(error);
2949				MALLOC(fss, struct nfs_fs_server *, sizeof(struct nfs_fs_server), M_TEMP, M_WAITOK|M_ZERO);
2950				if (!fss)
2951					error = ENOMEM;
2952				fsl->nl_servers[serv] = fss;
2953				xb_get_32(error, &xb, val); /* server name length */
2954				/* sanity check server name length */
2955				if (!error && ((val < 1) || (val > MAXPATHLEN)))
2956					error = EINVAL;
2957				nfsmerr_if(error);
2958				MALLOC(fss->ns_name, char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
2959				if (!fss->ns_name)
2960					error = ENOMEM;
2961				nfsmerr_if(error);
2962				error = xb_get_bytes(&xb, fss->ns_name, val, 0); /* server name */
2963				xb_get_32(error, &xb, fss->ns_addrcount); /* address count */
2964				/* sanity check address count (OK to be zero) */
2965				if (!error && (fss->ns_addrcount > 256))
2966					error = EINVAL;
2967				nfsmerr_if(error);
2968				if (fss->ns_addrcount > 0) {
2969					MALLOC(fss->ns_addresses, char **, fss->ns_addrcount * sizeof(char *), M_TEMP, M_WAITOK|M_ZERO);
2970					if (!fss->ns_addresses)
2971						error = ENOMEM;
2972					for (addr = 0; addr < fss->ns_addrcount; addr++) {
2973						xb_get_32(error, &xb, val); /* address length */
2974						/* sanity check address length */
2975						if (!error && ((val < 1) || (val > 128)))
2976							error = EINVAL;
2977						nfsmerr_if(error);
2978						MALLOC(fss->ns_addresses[addr], char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
2979						if (!fss->ns_addresses[addr])
2980							error = ENOMEM;
2981						nfsmerr_if(error);
2982						error = xb_get_bytes(&xb, fss->ns_addresses[addr], val, 0); /* address */
2983					}
2984				}
2985				xb_get_32(error, &xb, val); /* server info length */
2986				xb_skip(error, &xb, val); /* skip server info */
2987			}
2988			/* get pathname */
2989			fsp = &fsl->nl_path;
2990			xb_get_32(error, &xb, fsp->np_compcount); /* component count */
2991			/* sanity check component count */
2992			if (!error && (fsp->np_compcount > MAXPATHLEN))
2993				error = EINVAL;
2994			nfsmerr_if(error);
2995			if (fsp->np_compcount) {
2996				MALLOC(fsp->np_components, char **, fsp->np_compcount * sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
2997				if (!fsp->np_components)
2998					error = ENOMEM;
2999			}
3000			for (comp = 0; comp < fsp->np_compcount; comp++) {
3001				xb_get_32(error, &xb, val); /* component length */
3002				/* sanity check component length */
3003				if (!error && (val == 0)) {
3004					/*
3005					 * Apparently some people think a path with zero components should
3006					 * be encoded with one zero-length component.  So, just ignore any
3007					 * zero length components.
3008					 */
3009					comp--;
3010					fsp->np_compcount--;
3011					if (fsp->np_compcount == 0) {
3012						FREE(fsp->np_components, M_TEMP);
3013						fsp->np_components = NULL;
3014					}
3015					continue;
3016				}
3017				if (!error && ((val < 1) || (val > MAXPATHLEN)))
3018					error = EINVAL;
3019				nfsmerr_if(error);
3020				MALLOC(fsp->np_components[comp], char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
3021				if (!fsp->np_components[comp])
3022					error = ENOMEM;
3023				nfsmerr_if(error);
3024				error = xb_get_bytes(&xb, fsp->np_components[comp], val, 0); /* component */
3025			}
3026			xb_get_32(error, &xb, val); /* fs location info length */
3027			xb_skip(error, &xb, val); /* skip fs location info */
3028		}
3029	}
3030	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFLAGS))
3031		xb_skip(error, &xb, XDRWORD);
3032	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFROM)) {
3033		xb_get_32(error, &xb, len);
3034		nfsmerr_if(error);
3035		val = len;
3036		if (val >= sizeof(vfs_statfs(mp)->f_mntfromname))
3037			val = sizeof(vfs_statfs(mp)->f_mntfromname) - 1;
3038		error = xb_get_bytes(&xb, vfs_statfs(mp)->f_mntfromname, val, 0);
3039		if ((len - val) > 0)
3040			xb_skip(error, &xb, len - val);
3041		nfsmerr_if(error);
3042		vfs_statfs(mp)->f_mntfromname[val] = '\0';
3043	}
3044	nfsmerr_if(error);
3045
3046	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REALM)) {
3047		xb_get_32(error, &xb, len);
3048		if (!error && ((len < 1) || (len > MAXPATHLEN)))
3049			error=EINVAL;
3050		nfsmerr_if(error);
3051		/* allocate an extra byte for a leading '@' if its not already prepended to the realm */
3052		MALLOC(nmp->nm_realm, char *, len+2, M_TEMP, M_WAITOK|M_ZERO);
3053		if (!nmp->nm_realm)
3054			error = ENOMEM;
3055		nfsmerr_if(error);
3056		error = xb_get_bytes(&xb, nmp->nm_realm, len, 0);
3057		if (error == 0 && *nmp->nm_realm != '@') {
3058			bcopy(nmp->nm_realm, &nmp->nm_realm[1], len);
3059			nmp->nm_realm[0] = '@';
3060		}
3061	}
3062	nfsmerr_if(error);
3063
3064	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_PRINCIPAL)) {
3065		xb_get_32(error, &xb, len);
3066		if (!error && ((len < 1) || (len > MAXPATHLEN)))
3067			error=EINVAL;
3068		nfsmerr_if(error);
3069		MALLOC(nmp->nm_principal, char *, len+1, M_TEMP, M_WAITOK|M_ZERO);
3070		if (!nmp->nm_principal)
3071			error = ENOMEM;
3072		nfsmerr_if(error);
3073		error = xb_get_bytes(&xb, nmp->nm_principal, len, 0);
3074	}
3075	nfsmerr_if(error);
3076
3077	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SVCPRINCIPAL)) {
3078		xb_get_32(error, &xb, len);
3079		if (!error && ((len < 1) || (len > MAXPATHLEN)))
3080			error=EINVAL;
3081		nfsmerr_if(error);
3082		MALLOC(nmp->nm_sprinc, char *, len+1, M_TEMP, M_WAITOK|M_ZERO);
3083		if (!nmp->nm_sprinc)
3084			error = ENOMEM;
3085		nfsmerr_if(error);
3086		error = xb_get_bytes(&xb, nmp->nm_sprinc, len, 0);
3087	}
3088	nfsmerr_if(error);
3089
3090	/*
3091	 * Sanity check/finalize settings.
3092	 */
3093
3094	if (nmp->nm_timeo < NFS_MINTIMEO)
3095		nmp->nm_timeo = NFS_MINTIMEO;
3096	else if (nmp->nm_timeo > NFS_MAXTIMEO)
3097		nmp->nm_timeo = NFS_MAXTIMEO;
3098	if (nmp->nm_retry > NFS_MAXREXMIT)
3099		nmp->nm_retry = NFS_MAXREXMIT;
3100
3101	if (nmp->nm_numgrps > NFS_MAXGRPS)
3102		nmp->nm_numgrps = NFS_MAXGRPS;
3103	if (nmp->nm_readahead > NFS_MAXRAHEAD)
3104		nmp->nm_readahead = NFS_MAXRAHEAD;
3105	if (nmp->nm_acregmin > nmp->nm_acregmax)
3106		nmp->nm_acregmin = nmp->nm_acregmax;
3107	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
3108		nmp->nm_acdirmin = nmp->nm_acdirmax;
3109
3110	/* need at least one fs location */
3111	if (nmp->nm_locations.nl_numlocs < 1)
3112		error = EINVAL;
3113	nfsmerr_if(error);
3114
3115	/* init mount's mntfromname to first location */
3116	if (!NM_OMATTR_GIVEN(nmp, MNTFROM))
3117		nfs_location_mntfromname(&nmp->nm_locations, firstloc,
3118			vfs_statfs(mp)->f_mntfromname, sizeof(vfs_statfs(mp)->f_mntfromname), 0);
3119
3120	/* Need to save the mounting credential for v4. */
3121	nmp->nm_mcred = vfs_context_ucred(ctx);
3122	if (IS_VALID_CRED(nmp->nm_mcred))
3123		kauth_cred_ref(nmp->nm_mcred);
3124
3125	/*
3126	 * If a reserved port is required, check for that privilege.
3127	 * (Note that mirror mounts are exempt because the privilege was
3128	 * already checked for the original mount.)
3129	 */
3130	if (NMFLAG(nmp, RESVPORT) && !vfs_iskernelmount(mp))
3131		error = priv_check_cred(nmp->nm_mcred, PRIV_NETINET_RESERVEDPORT, 0);
3132	nfsmerr_if(error);
3133
3134	/* do mount's initial socket connection */
3135	error = nfs_mount_connect(nmp);
3136	nfsmerr_if(error);
3137
3138	/* set up the version-specific function tables */
3139	if (nmp->nm_vers < NFS_VER4)
3140		nmp->nm_funcs = &nfs3_funcs;
3141	else
3142		nmp->nm_funcs = &nfs4_funcs;
3143
3144	/* sanity check settings now that version/connection is set */
3145	if (nmp->nm_vers == NFS_VER2)		/* ignore RDIRPLUS on NFSv2 */
3146		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_RDIRPLUS);
3147	if (nmp->nm_vers >= NFS_VER4) {
3148		if (NFS_BITMAP_ISSET(nmp->nm_flags, NFS_MFLAG_ACLONLY)) /* aclonly trumps noacl */
3149			NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NOACL);
3150		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_CALLUMNT);
3151		if (nmp->nm_lockmode != NFS_LOCK_MODE_ENABLED)
3152			error = EINVAL; /* disabled/local lock mode only allowed on v2/v3 */
3153	} else {
3154		/* ignore these if not v4 */
3155		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NOCALLBACK);
3156		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NONAMEDATTR);
3157		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NOACL);
3158		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_ACLONLY);
3159	}
3160	nfsmerr_if(error);
3161
3162	if (nmp->nm_sotype == SOCK_DGRAM) {
3163		/* I/O size defaults for UDP are different */
3164		if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
3165			nmp->nm_rsize = NFS_DGRAM_RSIZE;
3166		if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
3167			nmp->nm_wsize = NFS_DGRAM_WSIZE;
3168	}
3169
3170	/* round down I/O sizes to multiple of NFS_FABLKSIZE */
3171	nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
3172	if (nmp->nm_rsize <= 0)
3173		nmp->nm_rsize = NFS_FABLKSIZE;
3174	nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
3175	if (nmp->nm_wsize <= 0)
3176		nmp->nm_wsize = NFS_FABLKSIZE;
3177
3178	/* and limit I/O sizes to maximum allowed */
3179	maxio = (nmp->nm_vers == NFS_VER2) ? NFS_V2MAXDATA :
3180		(nmp->nm_sotype == SOCK_DGRAM) ? NFS_MAXDGRAMDATA : NFS_MAXDATA;
3181	if (maxio > NFS_MAXBSIZE)
3182		maxio = NFS_MAXBSIZE;
3183	if (nmp->nm_rsize > maxio)
3184		nmp->nm_rsize = maxio;
3185	if (nmp->nm_wsize > maxio)
3186		nmp->nm_wsize = maxio;
3187
3188	if (nmp->nm_readdirsize > maxio)
3189		nmp->nm_readdirsize = maxio;
3190	if (nmp->nm_readdirsize > nmp->nm_rsize)
3191		nmp->nm_readdirsize = nmp->nm_rsize;
3192
3193	/* Set up the sockets and related info */
3194	if (nmp->nm_sotype == SOCK_DGRAM)
3195		TAILQ_INIT(&nmp->nm_cwndq);
3196
3197	/*
3198	 * Get the root node/attributes from the NFS server and
3199	 * do any basic, version-specific setup.
3200	 */
3201	error = nmp->nm_funcs->nf_mount(nmp, ctx, &np);
3202	nfsmerr_if(error);
3203
3204	/*
3205	 * A reference count is needed on the node representing the
3206	 * remote root.  If this object is not persistent, then backward
3207	 * traversals of the mount point (i.e. "..") will not work if
3208	 * the node gets flushed out of the cache.
3209	 */
3210	nmp->nm_dnp = np;
3211	*vpp = NFSTOV(np);
3212	/* get usecount and drop iocount */
3213	error = vnode_ref(*vpp);
3214	vnode_put(*vpp);
3215	if (error) {
3216		vnode_recycle(*vpp);
3217		goto nfsmerr;
3218	}
3219
3220	/*
3221	 * Do statfs to ensure static info gets set to reasonable values.
3222	 */
3223	if ((error = nmp->nm_funcs->nf_update_statfs(nmp, ctx))) {
3224		int error2 = vnode_getwithref(*vpp);
3225		vnode_rele(*vpp);
3226		if (!error2)
3227			vnode_put(*vpp);
3228		vnode_recycle(*vpp);
3229		goto nfsmerr;
3230	}
3231	sbp = vfs_statfs(mp);
3232	sbp->f_bsize = nmp->nm_fsattr.nfsa_bsize;
3233	sbp->f_blocks = nmp->nm_fsattr.nfsa_space_total / sbp->f_bsize;
3234	sbp->f_bfree = nmp->nm_fsattr.nfsa_space_free / sbp->f_bsize;
3235	sbp->f_bavail = nmp->nm_fsattr.nfsa_space_avail / sbp->f_bsize;
3236	sbp->f_bused = (nmp->nm_fsattr.nfsa_space_total / sbp->f_bsize) -
3237			(nmp->nm_fsattr.nfsa_space_free / sbp->f_bsize);
3238	sbp->f_files = nmp->nm_fsattr.nfsa_files_total;
3239	sbp->f_ffree = nmp->nm_fsattr.nfsa_files_free;
3240	sbp->f_iosize = nfs_iosize;
3241
3242	/*
3243	 * Calculate the size used for I/O buffers.  Use the larger
3244	 * of the two sizes to minimise NFS requests but make sure
3245	 * that it is at least one VM page to avoid wasting buffer
3246	 * space and to allow easy mmapping of I/O buffers.
3247	 * The read/write RPC calls handle the splitting up of
3248	 * buffers into multiple requests if the buffer size is
3249	 * larger than the I/O size.
3250	 */
3251	iosize = max(nmp->nm_rsize, nmp->nm_wsize);
3252	if (iosize < PAGE_SIZE)
3253		iosize = PAGE_SIZE;
3254	nmp->nm_biosize = trunc_page_32(iosize);
3255
3256	/* For NFSv3 and greater, there is a (relatively) reliable ACCESS call. */
3257	if (nmp->nm_vers > NFS_VER2)
3258		vfs_setauthopaqueaccess(mp);
3259
3260	switch (nmp->nm_lockmode) {
3261	case NFS_LOCK_MODE_DISABLED:
3262		break;
3263	case NFS_LOCK_MODE_LOCAL:
3264		vfs_setlocklocal(nmp->nm_mountp);
3265		break;
3266	case NFS_LOCK_MODE_ENABLED:
3267	default:
3268		if (nmp->nm_vers <= NFS_VER3)
3269			nfs_lockd_mount_register(nmp);
3270		break;
3271	}
3272
3273	/* success! */
3274	lck_mtx_lock(&nmp->nm_lock);
3275	nmp->nm_state |= NFSSTA_MOUNTED;
3276	lck_mtx_unlock(&nmp->nm_lock);
3277	return (0);
3278nfsmerr:
3279	nfs_mount_cleanup(nmp);
3280	return (error);
3281}
3282
3283#if CONFIG_TRIGGERS
3284
3285/*
3286 * We've detected a file system boundary on the server and
3287 * need to mount a new file system so that our file systems
3288 * MIRROR the file systems on the server.
3289 *
3290 * Build the mount arguments for the new mount and call kernel_mount().
3291 */
3292int
3293nfs_mirror_mount_domount(vnode_t dvp, vnode_t vp, vfs_context_t ctx)
3294{
3295	nfsnode_t np = VTONFS(vp);
3296	nfsnode_t dnp = VTONFS(dvp);
3297	struct nfsmount *nmp = NFSTONMP(np);
3298	char fstype[MFSTYPENAMELEN], *mntfromname = NULL, *path = NULL, *relpath, *p, *cp;
3299	int error = 0, pathbuflen = MAXPATHLEN, i, mntflags = 0, referral, skipcopy = 0;
3300	size_t nlen;
3301	struct xdrbuf xb, xbnew;
3302	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
3303	uint32_t newmattrs[NFS_MATTR_BITMAP_LEN];
3304	uint32_t newmflags[NFS_MFLAG_BITMAP_LEN];
3305	uint32_t newmflags_mask[NFS_MFLAG_BITMAP_LEN];
3306	uint32_t argslength = 0, val, count, mlen, mlen2, rlen, relpathcomps;
3307	uint32_t argslength_offset, attrslength_offset, end_offset;
3308	uint32_t numlocs, loc, numserv, serv, numaddr, addr, numcomp, comp;
3309	char buf[XDRWORD];
3310	struct nfs_fs_locations nfsls;
3311
3312	referral = (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL);
3313	if (referral)
3314		bzero(&nfsls, sizeof(nfsls));
3315
3316	xb_init(&xbnew, 0);
3317
3318	if (!nmp || (nmp->nm_state & NFSSTA_FORCE))
3319		return (ENXIO);
3320
3321	/* allocate a couple path buffers we need */
3322	MALLOC_ZONE(mntfromname, char *, pathbuflen, M_NAMEI, M_WAITOK);
3323	if (!mntfromname) {
3324		error = ENOMEM;
3325		goto nfsmerr;
3326	}
3327	MALLOC_ZONE(path, char *, pathbuflen, M_NAMEI, M_WAITOK);
3328	if (!path) {
3329		error = ENOMEM;
3330		goto nfsmerr;
3331	}
3332
3333	/* get the path for the directory being mounted on */
3334	error = vn_getpath(vp, path, &pathbuflen);
3335	if (error) {
3336		error = ENOMEM;
3337		goto nfsmerr;
3338	}
3339
3340	/*
3341	 * Set up the mntfromname for the new mount based on the
3342	 * current mount's mntfromname and the directory's path
3343	 * relative to the current mount's mntonname.
3344	 * Set up relpath to point at the relative path on the current mount.
3345	 * Also, count the number of components in relpath.
3346	 * We'll be adding those to each fs location path in the new args.
3347	 */
3348	nlen = strlcpy(mntfromname, vfs_statfs(nmp->nm_mountp)->f_mntfromname, MAXPATHLEN);
3349	if ((nlen > 0) && (mntfromname[nlen-1] == '/')) { /* avoid double '/' in new name */
3350		mntfromname[nlen-1] = '\0';
3351		nlen--;
3352	}
3353	relpath = mntfromname + nlen;
3354	nlen = strlcat(mntfromname, path + strlen(vfs_statfs(nmp->nm_mountp)->f_mntonname), MAXPATHLEN);
3355	if (nlen >= MAXPATHLEN) {
3356		error = ENAMETOOLONG;
3357		goto nfsmerr;
3358	}
3359	/* count the number of components in relpath */
3360	p = relpath;
3361	while (*p && (*p == '/'))
3362		p++;
3363	relpathcomps = 0;
3364	while (*p) {
3365		relpathcomps++;
3366		while (*p && (*p != '/'))
3367			p++;
3368		while (*p && (*p == '/'))
3369			p++;
3370	}
3371
3372	/* grab a copy of the file system type */
3373	vfs_name(vnode_mount(vp), fstype);
3374
3375	/* for referrals, fetch the fs locations */
3376	if (referral) {
3377		const char *vname = vnode_getname(NFSTOV(np));
3378		if (!vname) {
3379			error = ENOENT;
3380		} else {
3381			error = nfs4_get_fs_locations(nmp, dnp, NULL, 0, vname, ctx, &nfsls);
3382			vnode_putname(vname);
3383			if (!error && (nfsls.nl_numlocs < 1))
3384				error = ENOENT;
3385		}
3386		nfsmerr_if(error);
3387	}
3388
3389	/* set up NFS mount args based on current mount args */
3390
3391#define xb_copy_32(E, XBSRC, XBDST, V) \
3392	do { \
3393		if (E) break; \
3394		xb_get_32((E), (XBSRC), (V)); \
3395		if (skipcopy) break; \
3396		xb_add_32((E), (XBDST), (V)); \
3397	} while (0)
3398#define xb_copy_opaque(E, XBSRC, XBDST) \
3399	do { \
3400		uint32_t __count, __val; \
3401		xb_copy_32((E), (XBSRC), (XBDST), __count); \
3402		if (E) break; \
3403		__count = nfsm_rndup(__count); \
3404		__count /= XDRWORD; \
3405		while (__count-- > 0) \
3406			xb_copy_32((E), (XBSRC), (XBDST), __val); \
3407	} while (0)
3408
3409	xb_init_buffer(&xb, nmp->nm_args, 2*XDRWORD);
3410	xb_get_32(error, &xb, val); /* version */
3411	xb_get_32(error, &xb, argslength); /* args length */
3412	xb_init_buffer(&xb, nmp->nm_args, argslength);
3413
3414	xb_init_buffer(&xbnew, NULL, 0);
3415	xb_copy_32(error, &xb, &xbnew, val); /* version */
3416	argslength_offset = xb_offset(&xbnew);
3417	xb_copy_32(error, &xb, &xbnew, val); /* args length */
3418	xb_copy_32(error, &xb, &xbnew, val); /* XDR args version */
3419	count = NFS_MATTR_BITMAP_LEN;
3420	xb_get_bitmap(error, &xb, mattrs, count); /* mount attribute bitmap */
3421	nfsmerr_if(error);
3422	for (i = 0; i < NFS_MATTR_BITMAP_LEN; i++)
3423		newmattrs[i] = mattrs[i];
3424	if (referral)
3425		NFS_BITMAP_SET(newmattrs, NFS_MATTR_FS_LOCATIONS);
3426	else
3427		NFS_BITMAP_SET(newmattrs, NFS_MATTR_FH);
3428	NFS_BITMAP_SET(newmattrs, NFS_MATTR_FLAGS);
3429	NFS_BITMAP_SET(newmattrs, NFS_MATTR_MNTFLAGS);
3430	NFS_BITMAP_CLR(newmattrs, NFS_MATTR_MNTFROM);
3431	xb_add_bitmap(error, &xbnew, newmattrs, NFS_MATTR_BITMAP_LEN);
3432	attrslength_offset = xb_offset(&xbnew);
3433	xb_copy_32(error, &xb, &xbnew, val); /* attrs length */
3434	NFS_BITMAP_ZERO(newmflags_mask, NFS_MFLAG_BITMAP_LEN);
3435	NFS_BITMAP_ZERO(newmflags, NFS_MFLAG_BITMAP_LEN);
3436	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FLAGS)) {
3437		count = NFS_MFLAG_BITMAP_LEN;
3438		xb_get_bitmap(error, &xb, newmflags_mask, count); /* mount flag mask bitmap */
3439		count = NFS_MFLAG_BITMAP_LEN;
3440		xb_get_bitmap(error, &xb, newmflags, count); /* mount flag bitmap */
3441	}
3442	NFS_BITMAP_SET(newmflags_mask, NFS_MFLAG_EPHEMERAL);
3443	NFS_BITMAP_SET(newmflags, NFS_MFLAG_EPHEMERAL);
3444	xb_add_bitmap(error, &xbnew, newmflags_mask, NFS_MFLAG_BITMAP_LEN);
3445	xb_add_bitmap(error, &xbnew, newmflags, NFS_MFLAG_BITMAP_LEN);
3446	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION))
3447		xb_copy_32(error, &xb, &xbnew, val);
3448	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION))
3449		xb_copy_32(error, &xb, &xbnew, val);
3450	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
3451		xb_copy_32(error, &xb, &xbnew, val);
3452	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
3453		xb_copy_32(error, &xb, &xbnew, val);
3454	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READDIR_SIZE))
3455		xb_copy_32(error, &xb, &xbnew, val);
3456	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READAHEAD))
3457		xb_copy_32(error, &xb, &xbnew, val);
3458	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN)) {
3459		xb_copy_32(error, &xb, &xbnew, val);
3460		xb_copy_32(error, &xb, &xbnew, val);
3461	}
3462	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX)) {
3463		xb_copy_32(error, &xb, &xbnew, val);
3464		xb_copy_32(error, &xb, &xbnew, val);
3465	}
3466	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN)) {
3467		xb_copy_32(error, &xb, &xbnew, val);
3468		xb_copy_32(error, &xb, &xbnew, val);
3469	}
3470	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX)) {
3471		xb_copy_32(error, &xb, &xbnew, val);
3472		xb_copy_32(error, &xb, &xbnew, val);
3473	}
3474	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_LOCK_MODE))
3475		xb_copy_32(error, &xb, &xbnew, val);
3476	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SECURITY)) {
3477		xb_copy_32(error, &xb, &xbnew, count);
3478		while (!error && (count-- > 0))
3479			xb_copy_32(error, &xb, &xbnew, val);
3480	}
3481	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MAX_GROUP_LIST))
3482		xb_copy_32(error, &xb, &xbnew, val);
3483	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOCKET_TYPE))
3484		xb_copy_opaque(error, &xb, &xbnew);
3485	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_PORT))
3486		xb_copy_32(error, &xb, &xbnew, val);
3487	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MOUNT_PORT))
3488		xb_copy_32(error, &xb, &xbnew, val);
3489	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REQUEST_TIMEOUT)) {
3490		xb_copy_32(error, &xb, &xbnew, val);
3491		xb_copy_32(error, &xb, &xbnew, val);
3492	}
3493	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT))
3494		xb_copy_32(error, &xb, &xbnew, val);
3495	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_DEAD_TIMEOUT)) {
3496		xb_copy_32(error, &xb, &xbnew, val);
3497		xb_copy_32(error, &xb, &xbnew, val);
3498	}
3499	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FH)) {
3500		xb_get_32(error, &xb, count);
3501		xb_skip(error, &xb, count);
3502	}
3503	if (!referral) {
3504		/* set the initial file handle to the directory's file handle */
3505		xb_add_fh(error, &xbnew, np->n_fhp, np->n_fhsize);
3506	}
3507	/* copy/extend/skip fs locations */
3508	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FS_LOCATIONS)) {
3509		numlocs = numserv = numaddr = numcomp = 0;
3510		if (referral) /* don't copy the fs locations for a referral */
3511			skipcopy = 1;
3512		xb_copy_32(error, &xb, &xbnew, numlocs); /* location count */
3513		for (loc = 0; !error && (loc < numlocs); loc++) {
3514			xb_copy_32(error, &xb, &xbnew, numserv); /* server count */
3515			for (serv = 0; !error && (serv < numserv); serv++) {
3516				xb_copy_opaque(error, &xb, &xbnew); /* server name */
3517				xb_copy_32(error, &xb, &xbnew, numaddr); /* address count */
3518				for (addr = 0; !error && (addr < numaddr); addr++)
3519					xb_copy_opaque(error, &xb, &xbnew); /* address */
3520				xb_copy_opaque(error, &xb, &xbnew); /* server info */
3521			}
3522			/* pathname */
3523			xb_get_32(error, &xb, numcomp); /* component count */
3524			if (!skipcopy)
3525				xb_add_32(error, &xbnew, numcomp+relpathcomps); /* new component count */
3526			for (comp = 0; !error && (comp < numcomp); comp++)
3527				xb_copy_opaque(error, &xb, &xbnew); /* component */
3528			/* add additional components */
3529			for (comp = 0; !skipcopy && !error && (comp < relpathcomps); comp++) {
3530				p = relpath;
3531				while (*p && (*p == '/'))
3532					p++;
3533				while (*p && !error) {
3534					cp = p;
3535					while (*p && (*p != '/'))
3536						p++;
3537					xb_add_string(error, &xbnew, cp, (p - cp)); /* component */
3538					while (*p && (*p == '/'))
3539						p++;
3540				}
3541			}
3542			xb_copy_opaque(error, &xb, &xbnew); /* fs location info */
3543		}
3544		if (referral)
3545			skipcopy = 0;
3546	}
3547	if (referral) {
3548		/* add referral's fs locations */
3549		xb_add_32(error, &xbnew, nfsls.nl_numlocs);			/* FS_LOCATIONS */
3550		for (loc = 0; !error && (loc < nfsls.nl_numlocs); loc++) {
3551			xb_add_32(error, &xbnew, nfsls.nl_locations[loc]->nl_servcount);
3552			for (serv = 0; !error && (serv < nfsls.nl_locations[loc]->nl_servcount); serv++) {
3553				xb_add_string(error, &xbnew, nfsls.nl_locations[loc]->nl_servers[serv]->ns_name,
3554					strlen(nfsls.nl_locations[loc]->nl_servers[serv]->ns_name));
3555				xb_add_32(error, &xbnew, nfsls.nl_locations[loc]->nl_servers[serv]->ns_addrcount);
3556				for (addr = 0; !error && (addr < nfsls.nl_locations[loc]->nl_servers[serv]->ns_addrcount); addr++)
3557					xb_add_string(error, &xbnew, nfsls.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr],
3558						strlen(nfsls.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr]));
3559				xb_add_32(error, &xbnew, 0); /* empty server info */
3560			}
3561			xb_add_32(error, &xbnew, nfsls.nl_locations[loc]->nl_path.np_compcount);
3562			for (comp = 0; !error && (comp < nfsls.nl_locations[loc]->nl_path.np_compcount); comp++)
3563				xb_add_string(error, &xbnew, nfsls.nl_locations[loc]->nl_path.np_components[comp],
3564					strlen(nfsls.nl_locations[loc]->nl_path.np_components[comp]));
3565			xb_add_32(error, &xbnew, 0); /* empty fs location info */
3566		}
3567	}
3568	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFLAGS))
3569		xb_get_32(error, &xb, mntflags);
3570	/*
3571	 * We add the following mount flags to the ones for the mounted-on mount:
3572	 * MNT_DONTBROWSE - to keep the mount from showing up as a separate volume
3573	 * MNT_AUTOMOUNTED - to keep DiskArb from retriggering the mount after
3574	 *                   an unmount (looking for /.autodiskmounted)
3575	 */
3576	mntflags |= (MNT_AUTOMOUNTED | MNT_DONTBROWSE);
3577	xb_add_32(error, &xbnew, mntflags);
3578	if (!referral && NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFROM)) {
3579		/* copy mntfrom string and add relpath */
3580		rlen = strlen(relpath);
3581		xb_get_32(error, &xb, mlen);
3582		nfsmerr_if(error);
3583		mlen2 = mlen + ((relpath[0] != '/') ? 1 : 0) + rlen;
3584		xb_add_32(error, &xbnew, mlen2);
3585		count = mlen/XDRWORD;
3586		/* copy the original string */
3587		while (count-- > 0)
3588			xb_copy_32(error, &xb, &xbnew, val);
3589		if (!error && (mlen % XDRWORD)) {
3590			error = xb_get_bytes(&xb, buf, mlen%XDRWORD, 0);
3591			if (!error)
3592				error = xb_add_bytes(&xbnew, buf, mlen%XDRWORD, 1);
3593		}
3594		/* insert a '/' if the relative path doesn't start with one */
3595		if (!error && (relpath[0] != '/')) {
3596			buf[0] = '/';
3597			error = xb_add_bytes(&xbnew, buf, 1, 1);
3598		}
3599		/* add the additional relative path */
3600		if (!error)
3601			error = xb_add_bytes(&xbnew, relpath, rlen, 1);
3602		/* make sure the resulting string has the right number of pad bytes */
3603		if (!error && (mlen2 != nfsm_rndup(mlen2))) {
3604			bzero(buf, sizeof(buf));
3605			count = nfsm_rndup(mlen2) - mlen2;
3606			error = xb_add_bytes(&xbnew, buf, count, 1);
3607		}
3608	}
3609	xb_build_done(error, &xbnew);
3610
3611	/* update opaque counts */
3612	end_offset = xb_offset(&xbnew);
3613	if (!error) {
3614		error = xb_seek(&xbnew, argslength_offset);
3615		argslength = end_offset - argslength_offset + XDRWORD/*version*/;
3616		xb_add_32(error, &xbnew, argslength);
3617	}
3618	if (!error) {
3619		error = xb_seek(&xbnew, attrslength_offset);
3620		xb_add_32(error, &xbnew, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
3621	}
3622	nfsmerr_if(error);
3623
3624	/*
3625	 * For kernel_mount() call, use the existing mount flags (instead of the
3626	 * original flags) because flags like MNT_NOSUID and MNT_NODEV may have
3627	 * been silently enforced.
3628	 */
3629	mntflags = vnode_vfsvisflags(vp);
3630	mntflags |= (MNT_AUTOMOUNTED | MNT_DONTBROWSE);
3631
3632	/* do the mount */
3633	error = kernel_mount(fstype, dvp, vp, path, xb_buffer_base(&xbnew), argslength,
3634			mntflags, KERNEL_MOUNT_PERMIT_UNMOUNT | KERNEL_MOUNT_NOAUTH, ctx);
3635
3636nfsmerr:
3637	if (error)
3638		printf("nfs: mirror mount of %s on %s failed (%d)\n",
3639			mntfromname, path, error);
3640	/* clean up */
3641	xb_cleanup(&xbnew);
3642	if (referral)
3643		nfs_fs_locations_cleanup(&nfsls);
3644	if (path)
3645		FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
3646	if (mntfromname)
3647		FREE_ZONE(mntfromname, MAXPATHLEN, M_NAMEI);
3648	if (!error)
3649		nfs_ephemeral_mount_harvester_start();
3650	return (error);
3651}
3652
3653/*
3654 * trigger vnode functions
3655 */
3656
3657resolver_result_t
3658nfs_mirror_mount_trigger_resolve(
3659	vnode_t vp,
3660	const struct componentname *cnp,
3661	enum path_operation pop,
3662	__unused int flags,
3663	__unused void *data,
3664	vfs_context_t ctx)
3665{
3666	nfsnode_t np = VTONFS(vp);
3667	vnode_t pvp = NULLVP;
3668	int error = 0;
3669	resolver_result_t result;
3670
3671	/*
3672	 * We have a trigger node that doesn't have anything mounted on it yet.
3673	 * We'll do the mount if either:
3674	 * (a) this isn't the last component of the path OR
3675	 * (b) this is an op that looks like it should trigger the mount.
3676	 */
3677	if (cnp->cn_flags & ISLASTCN) {
3678		switch (pop) {
3679		case OP_MOUNT:
3680		case OP_UNMOUNT:
3681		case OP_STATFS:
3682		case OP_LINK:
3683		case OP_UNLINK:
3684		case OP_RENAME:
3685		case OP_MKNOD:
3686		case OP_MKFIFO:
3687		case OP_SYMLINK:
3688		case OP_ACCESS:
3689		case OP_GETATTR:
3690		case OP_MKDIR:
3691		case OP_RMDIR:
3692		case OP_REVOKE:
3693		case OP_GETXATTR:
3694		case OP_LISTXATTR:
3695			/* don't perform the mount for these operations */
3696			result = vfs_resolver_result(np->n_trigseq, RESOLVER_NOCHANGE, 0);
3697#ifdef NFS_TRIGGER_DEBUG
3698			NP(np, "nfs trigger RESOLVE: no change, last %d nameiop %d, seq %d",
3699				(cnp->cn_flags & ISLASTCN) ? 1 : 0, cnp->cn_nameiop, np->n_trigseq);
3700#endif
3701			return (result);
3702		case OP_OPEN:
3703		case OP_CHDIR:
3704		case OP_CHROOT:
3705		case OP_TRUNCATE:
3706		case OP_COPYFILE:
3707		case OP_PATHCONF:
3708		case OP_READLINK:
3709		case OP_SETATTR:
3710		case OP_EXCHANGEDATA:
3711		case OP_SEARCHFS:
3712		case OP_FSCTL:
3713		case OP_SETXATTR:
3714		case OP_REMOVEXATTR:
3715		default:
3716			/* go ahead and do the mount */
3717			break;
3718		}
3719	}
3720
3721	if (vnode_mountedhere(vp) != NULL) {
3722		/*
3723		 * Um... there's already something mounted.
3724		 * Been there.  Done that.  Let's just say it succeeded.
3725		 */
3726		error = 0;
3727		goto skipmount;
3728	}
3729
3730	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx)))) {
3731		result = vfs_resolver_result(np->n_trigseq, RESOLVER_ERROR, error);
3732#ifdef NFS_TRIGGER_DEBUG
3733		NP(np, "nfs trigger RESOLVE: busy error %d, last %d nameiop %d, seq %d",
3734			error, (cnp->cn_flags & ISLASTCN) ? 1 : 0, cnp->cn_nameiop, np->n_trigseq);
3735#endif
3736		return (result);
3737	}
3738
3739	pvp = vnode_getparent(vp);
3740	if (pvp == NULLVP)
3741		error = EINVAL;
3742	if (!error)
3743		error = nfs_mirror_mount_domount(pvp, vp, ctx);
3744skipmount:
3745	if (!error)
3746		np->n_trigseq++;
3747	result = vfs_resolver_result(np->n_trigseq, error ? RESOLVER_ERROR : RESOLVER_RESOLVED, error);
3748#ifdef NFS_TRIGGER_DEBUG
3749	NP(np, "nfs trigger RESOLVE: %s %d, last %d nameiop %d, seq %d",
3750		error ? "error" : "resolved", error,
3751		(cnp->cn_flags & ISLASTCN) ? 1 : 0, cnp->cn_nameiop, np->n_trigseq);
3752#endif
3753
3754	if (pvp != NULLVP)
3755		vnode_put(pvp);
3756	nfs_node_clear_busy(np);
3757	return (result);
3758}
3759
3760resolver_result_t
3761nfs_mirror_mount_trigger_unresolve(
3762	vnode_t vp,
3763	int flags,
3764	__unused void *data,
3765	vfs_context_t ctx)
3766{
3767	nfsnode_t np = VTONFS(vp);
3768	mount_t mp;
3769	int error;
3770	resolver_result_t result;
3771
3772	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx)))) {
3773		result = vfs_resolver_result(np->n_trigseq, RESOLVER_ERROR, error);
3774#ifdef NFS_TRIGGER_DEBUG
3775		NP(np, "nfs trigger UNRESOLVE: busy error %d, seq %d", error, np->n_trigseq);
3776#endif
3777		return (result);
3778	}
3779
3780	mp = vnode_mountedhere(vp);
3781	if (!mp)
3782		error = EINVAL;
3783	if (!error)
3784		error = vfs_unmountbyfsid(&(vfs_statfs(mp)->f_fsid), flags, ctx);
3785	if (!error)
3786		np->n_trigseq++;
3787	result = vfs_resolver_result(np->n_trigseq, error ? RESOLVER_ERROR : RESOLVER_UNRESOLVED, error);
3788#ifdef NFS_TRIGGER_DEBUG
3789	NP(np, "nfs trigger UNRESOLVE: %s %d, seq %d",
3790		error ? "error" : "unresolved", error, np->n_trigseq);
3791#endif
3792	nfs_node_clear_busy(np);
3793	return (result);
3794}
3795
3796resolver_result_t
3797nfs_mirror_mount_trigger_rearm(
3798	vnode_t vp,
3799	__unused int flags,
3800	__unused void *data,
3801	vfs_context_t ctx)
3802{
3803	nfsnode_t np = VTONFS(vp);
3804	int error;
3805	resolver_result_t result;
3806
3807	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx)))) {
3808		result = vfs_resolver_result(np->n_trigseq, RESOLVER_ERROR, error);
3809#ifdef NFS_TRIGGER_DEBUG
3810		NP(np, "nfs trigger REARM: busy error %d, seq %d", error, np->n_trigseq);
3811#endif
3812		return (result);
3813	}
3814
3815	np->n_trigseq++;
3816	result = vfs_resolver_result(np->n_trigseq,
3817			vnode_mountedhere(vp) ? RESOLVER_RESOLVED : RESOLVER_UNRESOLVED, 0);
3818#ifdef NFS_TRIGGER_DEBUG
3819	NP(np, "nfs trigger REARM: %s, seq %d",
3820		vnode_mountedhere(vp) ? "resolved" : "unresolved", np->n_trigseq);
3821#endif
3822	nfs_node_clear_busy(np);
3823	return (result);
3824}
3825
3826/*
3827 * Periodically attempt to unmount ephemeral (mirror) mounts in an attempt to limit
3828 * the number of unused mounts.
3829 */
3830
3831#define NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL	120	/* how often the harvester runs */
3832struct nfs_ephemeral_mount_harvester_info {
3833	fsid_t		fsid;		/* FSID that we need to try to unmount */
3834	uint32_t	mountcount;	/* count of ephemeral mounts seen in scan */
3835 };
3836/* various globals for the harvester */
3837static thread_call_t nfs_ephemeral_mount_harvester_timer = NULL;
3838static int nfs_ephemeral_mount_harvester_on = 0;
3839
3840kern_return_t thread_terminate(thread_t);
3841
3842static int
3843nfs_ephemeral_mount_harvester_callback(mount_t mp, void *arg)
3844{
3845	struct nfs_ephemeral_mount_harvester_info *hinfo = arg;
3846	struct nfsmount *nmp;
3847	struct timeval now;
3848
3849	if (strcmp(mp->mnt_vfsstat.f_fstypename, "nfs"))
3850		return (VFS_RETURNED);
3851	nmp = VFSTONFS(mp);
3852	if (!nmp || !NMFLAG(nmp, EPHEMERAL))
3853		return (VFS_RETURNED);
3854	hinfo->mountcount++;
3855
3856	/* avoid unmounting mounts that have been triggered within the last harvest interval */
3857	microtime(&now);
3858	if ((nmp->nm_mounttime >> 32) > ((uint32_t)now.tv_sec - NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL))
3859		return (VFS_RETURNED);
3860
3861	if (hinfo->fsid.val[0] || hinfo->fsid.val[1]) {
3862		/* attempt to unmount previously-found ephemeral mount */
3863		vfs_unmountbyfsid(&hinfo->fsid, 0, vfs_context_kernel());
3864		hinfo->fsid.val[0] = hinfo->fsid.val[1] = 0;
3865	}
3866
3867	/*
3868	 * We can't call unmount here since we hold a mount iter ref
3869	 * on mp so save its fsid for the next call iteration to unmount.
3870	 */
3871	hinfo->fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0];
3872	hinfo->fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1];
3873
3874	return (VFS_RETURNED);
3875}
3876
3877/*
3878 * Spawn a thread to do the ephemeral mount harvesting.
3879 */
3880static void
3881nfs_ephemeral_mount_harvester_timer_func(void)
3882{
3883	thread_t thd;
3884
3885	if (kernel_thread_start(nfs_ephemeral_mount_harvester, NULL, &thd) == KERN_SUCCESS)
3886		thread_deallocate(thd);
3887}
3888
3889/*
3890 * Iterate all mounts looking for NFS ephemeral mounts to try to unmount.
3891 */
3892void
3893nfs_ephemeral_mount_harvester(__unused void *arg, __unused wait_result_t wr)
3894{
3895	struct nfs_ephemeral_mount_harvester_info hinfo;
3896	uint64_t deadline;
3897
3898	hinfo.mountcount = 0;
3899	hinfo.fsid.val[0] = hinfo.fsid.val[1] = 0;
3900	vfs_iterate(VFS_ITERATE_TAIL_FIRST, nfs_ephemeral_mount_harvester_callback, &hinfo);
3901	if (hinfo.fsid.val[0] || hinfo.fsid.val[1]) {
3902		/* attempt to unmount last found ephemeral mount */
3903		vfs_unmountbyfsid(&hinfo.fsid, 0, vfs_context_kernel());
3904	}
3905
3906	lck_mtx_lock(nfs_global_mutex);
3907	if (!hinfo.mountcount) {
3908		/* no more ephemeral mounts - don't need timer */
3909		nfs_ephemeral_mount_harvester_on = 0;
3910	} else {
3911		/* re-arm the timer */
3912		clock_interval_to_deadline(NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL, NSEC_PER_SEC, &deadline);
3913		thread_call_enter_delayed(nfs_ephemeral_mount_harvester_timer, deadline);
3914		nfs_ephemeral_mount_harvester_on = 1;
3915	}
3916	lck_mtx_unlock(nfs_global_mutex);
3917
3918	/* thread done */
3919	thread_terminate(current_thread());
3920}
3921
3922/*
3923 * Make sure the NFS ephemeral mount harvester timer is running.
3924 */
3925void
3926nfs_ephemeral_mount_harvester_start(void)
3927{
3928	uint64_t deadline;
3929
3930	lck_mtx_lock(nfs_global_mutex);
3931	if (nfs_ephemeral_mount_harvester_on) {
3932		lck_mtx_unlock(nfs_global_mutex);
3933		return;
3934	}
3935	if (nfs_ephemeral_mount_harvester_timer == NULL)
3936		nfs_ephemeral_mount_harvester_timer = thread_call_allocate((thread_call_func_t)nfs_ephemeral_mount_harvester_timer_func, NULL);
3937	clock_interval_to_deadline(NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL, NSEC_PER_SEC, &deadline);
3938	thread_call_enter_delayed(nfs_ephemeral_mount_harvester_timer, deadline);
3939	nfs_ephemeral_mount_harvester_on = 1;
3940	lck_mtx_unlock(nfs_global_mutex);
3941}
3942
3943#endif
3944
3945/*
3946 * Send a MOUNT protocol MOUNT request to the server to get the initial file handle (and security).
3947 */
3948int
3949nfs3_mount_rpc(struct nfsmount *nmp, struct sockaddr *sa, int sotype, int nfsvers, char *path, vfs_context_t ctx, int timeo, fhandle_t *fh, struct nfs_sec *sec)
3950{
3951	int error = 0, slen, mntproto;
3952	thread_t thd = vfs_context_thread(ctx);
3953	kauth_cred_t cred = vfs_context_ucred(ctx);
3954	uint64_t xid = 0;
3955	struct nfsm_chain nmreq, nmrep;
3956	mbuf_t mreq;
3957	uint32_t mntvers, mntport, val;
3958	struct sockaddr_storage ss;
3959	struct sockaddr *saddr = (struct sockaddr*)&ss;
3960
3961	nfsm_chain_null(&nmreq);
3962	nfsm_chain_null(&nmrep);
3963
3964	mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
3965	mntproto = (NM_OMFLAG(nmp, MNTUDP) || (sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
3966	sec->count = 0;
3967
3968	bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
3969	if (saddr->sa_family == AF_INET) {
3970		if (nmp->nm_mountport)
3971			((struct sockaddr_in*)saddr)->sin_port = htons(nmp->nm_mountport);
3972		mntport = ntohs(((struct sockaddr_in*)saddr)->sin_port);
3973	} else {
3974		if (nmp->nm_mountport)
3975			((struct sockaddr_in6*)saddr)->sin6_port = htons(nmp->nm_mountport);
3976		mntport = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
3977	}
3978
3979	while (!mntport) {
3980		error = nfs_portmap_lookup(nmp, ctx, saddr, NULL, RPCPROG_MNT, mntvers, mntproto, timeo);
3981		nfsmout_if(error);
3982		if (saddr->sa_family == AF_INET)
3983			mntport = ntohs(((struct sockaddr_in*)saddr)->sin_port);
3984		else
3985			mntport = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
3986		if (!mntport) {
3987			/* if not found and TCP, then retry with UDP */
3988			if (mntproto == IPPROTO_UDP) {
3989				error = EPROGUNAVAIL;
3990				break;
3991			}
3992			mntproto = IPPROTO_UDP;
3993			bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
3994		}
3995	}
3996	nfsmout_if(error || !mntport);
3997
3998	/* MOUNT protocol MOUNT request */
3999	slen = strlen(path);
4000	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_UNSIGNED + nfsm_rndup(slen));
4001	nfsm_chain_add_name(error, &nmreq, path, slen, nmp);
4002	nfsm_chain_build_done(error, &nmreq);
4003	nfsmout_if(error);
4004	error = nfsm_rpchead2(nmp, (mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
4005			RPCPROG_MNT, mntvers, RPCMNT_MOUNT,
4006			RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
4007	nfsmout_if(error);
4008	nmreq.nmc_mhead = NULL;
4009	error = nfs_aux_request(nmp, thd, saddr, NULL,
4010			((mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM),
4011			mreq, R_XID32(xid), 1, timeo, &nmrep);
4012	nfsmout_if(error);
4013	nfsm_chain_get_32(error, &nmrep, val);
4014	if (!error && val)
4015		error = val;
4016	nfsm_chain_get_fh(error, &nmrep, nfsvers, fh);
4017	if (!error && (nfsvers > NFS_VER2)) {
4018		sec->count = NX_MAX_SEC_FLAVORS;
4019		error = nfsm_chain_get_secinfo(&nmrep, &sec->flavors[0], &sec->count);
4020	}
4021nfsmout:
4022	nfsm_chain_cleanup(&nmreq);
4023	nfsm_chain_cleanup(&nmrep);
4024	return (error);
4025}
4026
4027
4028/*
4029 * Send a MOUNT protocol UNMOUNT request to tell the server we've unmounted it.
4030 */
4031void
4032nfs3_umount_rpc(struct nfsmount *nmp, vfs_context_t ctx, int timeo)
4033{
4034	int error = 0, slen, mntproto;
4035	thread_t thd = vfs_context_thread(ctx);
4036	kauth_cred_t cred = vfs_context_ucred(ctx);
4037	char *path;
4038	uint64_t xid = 0;
4039	struct nfsm_chain nmreq, nmrep;
4040	mbuf_t mreq;
4041	uint32_t mntvers, mntport;
4042	struct sockaddr_storage ss;
4043	struct sockaddr *saddr = (struct sockaddr*)&ss;
4044
4045	if (!nmp->nm_saddr)
4046		return;
4047
4048	nfsm_chain_null(&nmreq);
4049	nfsm_chain_null(&nmrep);
4050
4051	mntvers = (nmp->nm_vers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
4052	mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nmp->nm_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
4053	mntport = nmp->nm_mountport;
4054
4055	bcopy(nmp->nm_saddr, saddr, min(sizeof(ss), nmp->nm_saddr->sa_len));
4056	if (saddr->sa_family == AF_INET)
4057		((struct sockaddr_in*)saddr)->sin_port = htons(mntport);
4058	else
4059		((struct sockaddr_in6*)saddr)->sin6_port = htons(mntport);
4060
4061	while (!mntport) {
4062		error = nfs_portmap_lookup(nmp, ctx, saddr, NULL, RPCPROG_MNT, mntvers, mntproto, timeo);
4063  		nfsmout_if(error);
4064		if (saddr->sa_family == AF_INET)
4065			mntport = ntohs(((struct sockaddr_in*)saddr)->sin_port);
4066		else
4067			mntport = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
4068		/* if not found and mntvers > VER1, then retry with VER1 */
4069		if (!mntport) {
4070			if (mntvers > RPCMNT_VER1) {
4071				mntvers = RPCMNT_VER1;
4072			} else if (mntproto == IPPROTO_TCP) {
4073				mntproto = IPPROTO_UDP;
4074				mntvers = (nmp->nm_vers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
4075			} else {
4076				break;
4077			}
4078			bcopy(nmp->nm_saddr, saddr, min(sizeof(ss), nmp->nm_saddr->sa_len));
4079		}
4080	}
4081	nfsmout_if(!mntport);
4082
4083	/* MOUNT protocol UNMOUNT request */
4084	path = &vfs_statfs(nmp->nm_mountp)->f_mntfromname[0];
4085	while (*path && (*path != '/'))
4086		path++;
4087	slen = strlen(path);
4088	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_UNSIGNED + nfsm_rndup(slen));
4089	nfsm_chain_add_name(error, &nmreq, path, slen, nmp);
4090	nfsm_chain_build_done(error, &nmreq);
4091	nfsmout_if(error);
4092	error = nfsm_rpchead2(nmp, (mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
4093			RPCPROG_MNT, RPCMNT_VER1, RPCMNT_UMOUNT,
4094			RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
4095	nfsmout_if(error);
4096	nmreq.nmc_mhead = NULL;
4097	error = nfs_aux_request(nmp, thd, saddr, NULL,
4098		((mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM),
4099		mreq, R_XID32(xid), 1, timeo, &nmrep);
4100nfsmout:
4101	nfsm_chain_cleanup(&nmreq);
4102	nfsm_chain_cleanup(&nmrep);
4103}
4104
4105/*
4106 * unmount system call
4107 */
4108int
4109nfs_vfs_unmount(
4110	mount_t mp,
4111	int mntflags,
4112	__unused vfs_context_t ctx)
4113{
4114	struct nfsmount *nmp;
4115	vnode_t vp;
4116	int error, flags = 0;
4117	struct timespec ts = { 1, 0 };
4118
4119	nmp = VFSTONFS(mp);
4120	lck_mtx_lock(&nmp->nm_lock);
4121	/*
4122	 * Set the flag indicating that an unmount attempt is in progress.
4123	 */
4124	nmp->nm_state |= NFSSTA_UNMOUNTING;
4125	/*
4126	 * During a force unmount we want to...
4127	 *   Mark that we are doing a force unmount.
4128	 *   Make the mountpoint soft.
4129	 */
4130	if (mntflags & MNT_FORCE) {
4131		flags |= FORCECLOSE;
4132		nmp->nm_state |= NFSSTA_FORCE;
4133		NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT);
4134	}
4135	/*
4136	 * Wait for any in-progress monitored node scan to complete.
4137	 */
4138	while (nmp->nm_state & NFSSTA_MONITOR_SCAN)
4139		msleep(&nmp->nm_state, &nmp->nm_lock, PZERO-1, "nfswaitmonscan", &ts);
4140	/*
4141	 * Goes something like this..
4142	 * - Call vflush() to clear out vnodes for this file system,
4143	 *   except for the swap files. Deal with them in 2nd pass.
4144	 * - Decrement reference on the vnode representing remote root.
4145	 * - Clean up the NFS mount structure.
4146	 */
4147	vp = NFSTOV(nmp->nm_dnp);
4148	lck_mtx_unlock(&nmp->nm_lock);
4149
4150	/*
4151	 * vflush will check for busy vnodes on mountpoint.
4152	 * Will do the right thing for MNT_FORCE. That is, we should
4153	 * not get EBUSY back.
4154	 */
4155	error = vflush(mp, vp, SKIPSWAP | flags);
4156	if (mntflags & MNT_FORCE) {
4157		error = vflush(mp, NULLVP, flags); /* locks vp in the process */
4158	} else {
4159		if (vnode_isinuse(vp, 1))
4160			error = EBUSY;
4161		else
4162			error = vflush(mp, vp, flags);
4163	}
4164	if (error) {
4165		lck_mtx_lock(&nmp->nm_lock);
4166		nmp->nm_state &= ~NFSSTA_UNMOUNTING;
4167		lck_mtx_unlock(&nmp->nm_lock);
4168		return (error);
4169	}
4170
4171	lck_mtx_lock(&nmp->nm_lock);
4172	nmp->nm_dnp = NULL;
4173	lck_mtx_unlock(&nmp->nm_lock);
4174
4175	/*
4176	 * Release the root vnode reference held by mountnfs()
4177	 */
4178	error = vnode_get(vp);
4179	vnode_rele(vp);
4180	if (!error)
4181		vnode_put(vp);
4182
4183	vflush(mp, NULLVP, FORCECLOSE);
4184
4185	nfs_mount_cleanup(nmp);
4186	return (0);
4187}
4188
4189/*
4190 * cleanup/destroy NFS fs locations structure
4191 */
4192void
4193nfs_fs_locations_cleanup(struct nfs_fs_locations *nfslsp)
4194{
4195	struct nfs_fs_location *fsl;
4196	struct nfs_fs_server *fss;
4197	struct nfs_fs_path *fsp;
4198	uint32_t loc, serv, addr, comp;
4199
4200	/* free up fs locations */
4201	if (!nfslsp->nl_numlocs || !nfslsp->nl_locations)
4202		return;
4203
4204	for (loc = 0; loc < nfslsp->nl_numlocs; loc++) {
4205		fsl = nfslsp->nl_locations[loc];
4206		if (!fsl)
4207			continue;
4208		if ((fsl->nl_servcount > 0) && fsl->nl_servers) {
4209			for (serv = 0; serv < fsl->nl_servcount; serv++) {
4210				fss = fsl->nl_servers[serv];
4211				if (!fss)
4212					continue;
4213				if ((fss->ns_addrcount > 0) && fss->ns_addresses) {
4214					for (addr = 0; addr < fss->ns_addrcount; addr++)
4215						FREE(fss->ns_addresses[addr], M_TEMP);
4216					FREE(fss->ns_addresses, M_TEMP);
4217				}
4218				FREE(fss->ns_name, M_TEMP);
4219				FREE(fss, M_TEMP);
4220			}
4221			FREE(fsl->nl_servers, M_TEMP);
4222		}
4223		fsp = &fsl->nl_path;
4224		if (fsp->np_compcount && fsp->np_components) {
4225			for (comp = 0; comp < fsp->np_compcount; comp++)
4226				if (fsp->np_components[comp])
4227					FREE(fsp->np_components[comp], M_TEMP);
4228			FREE(fsp->np_components, M_TEMP);
4229		}
4230		FREE(fsl, M_TEMP);
4231	}
4232	FREE(nfslsp->nl_locations, M_TEMP);
4233	nfslsp->nl_numlocs = 0;
4234	nfslsp->nl_locations = NULL;
4235}
4236
4237/*
4238 * cleanup/destroy an nfsmount
4239 */
4240void
4241nfs_mount_cleanup(struct nfsmount *nmp)
4242{
4243	struct nfsreq *req, *treq;
4244	struct nfs_reqqhead iodq;
4245	struct timespec ts = { 1, 0 };
4246	struct nfs_open_owner *noop, *nextnoop;
4247	nfsnode_t np;
4248	int docallback;
4249
4250	/* stop callbacks */
4251	if ((nmp->nm_vers >= NFS_VER4) && !NMFLAG(nmp, NOCALLBACK) && nmp->nm_cbid)
4252		nfs4_mount_callback_shutdown(nmp);
4253
4254	/* Destroy any RPCSEC_GSS contexts */
4255	if (!TAILQ_EMPTY(&nmp->nm_gsscl))
4256		nfs_gss_clnt_ctx_unmount(nmp);
4257
4258	/* mark the socket for termination */
4259	lck_mtx_lock(&nmp->nm_lock);
4260	nmp->nm_sockflags |= NMSOCK_UNMOUNT;
4261
4262	/* Have the socket thread send the unmount RPC, if requested/appropriate. */
4263	if ((nmp->nm_vers < NFS_VER4) && (nmp->nm_state & NFSSTA_MOUNTED) &&
4264	    !(nmp->nm_state & NFSSTA_FORCE) && NMFLAG(nmp, CALLUMNT))
4265		nfs_mount_sock_thread_wake(nmp);
4266
4267	/* wait for the socket thread to terminate */
4268	while (nmp->nm_sockthd) {
4269		wakeup(&nmp->nm_sockthd);
4270		msleep(&nmp->nm_sockthd, &nmp->nm_lock, PZERO-1, "nfswaitsockthd", &ts);
4271	}
4272
4273	lck_mtx_unlock(&nmp->nm_lock);
4274
4275	/* tear down the socket */
4276	nfs_disconnect(nmp);
4277
4278	if (nmp->nm_mountp)
4279		vfs_setfsprivate(nmp->nm_mountp, NULL);
4280
4281	lck_mtx_lock(&nmp->nm_lock);
4282
4283	if ((nmp->nm_vers >= NFS_VER4) && !NMFLAG(nmp, NOCALLBACK) && nmp->nm_cbid) {
4284		/* clear out any pending delegation return requests */
4285		while ((np = TAILQ_FIRST(&nmp->nm_dreturnq))) {
4286			TAILQ_REMOVE(&nmp->nm_dreturnq, np, n_dreturn);
4287			np->n_dreturn.tqe_next = NFSNOLIST;
4288		}
4289	}
4290
4291	/* cancel any renew timer */
4292	if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_renew_timer) {
4293		thread_call_cancel(nmp->nm_renew_timer);
4294		thread_call_free(nmp->nm_renew_timer);
4295	}
4296
4297	if (nmp->nm_saddr)
4298		FREE(nmp->nm_saddr, M_SONAME);
4299	if ((nmp->nm_vers < NFS_VER4) && nmp->nm_rqsaddr)
4300		FREE(nmp->nm_rqsaddr, M_SONAME);
4301	lck_mtx_unlock(&nmp->nm_lock);
4302
4303	if (nmp->nm_state & NFSSTA_MOUNTED)
4304		switch (nmp->nm_lockmode) {
4305		case NFS_LOCK_MODE_DISABLED:
4306		case NFS_LOCK_MODE_LOCAL:
4307			break;
4308		case NFS_LOCK_MODE_ENABLED:
4309		default:
4310			if (nmp->nm_vers <= NFS_VER3)
4311				nfs_lockd_mount_unregister(nmp);
4312			break;
4313		}
4314
4315	if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_longid) {
4316		/* remove/deallocate the client ID data */
4317		lck_mtx_lock(nfs_global_mutex);
4318		TAILQ_REMOVE(&nfsclientids, nmp->nm_longid, nci_link);
4319		if (nmp->nm_longid->nci_id)
4320			FREE(nmp->nm_longid->nci_id, M_TEMP);
4321		FREE(nmp->nm_longid, M_TEMP);
4322		lck_mtx_unlock(nfs_global_mutex);
4323	}
4324
4325	/*
4326	 * Loop through outstanding request list and remove dangling
4327	 * references to defunct nfsmount struct
4328	 */
4329	TAILQ_INIT(&iodq);
4330	lck_mtx_lock(nfs_request_mutex);
4331	TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
4332		if (req->r_nmp == nmp) {
4333			lck_mtx_lock(&req->r_mtx);
4334			req->r_nmp = NULL;
4335			lck_mtx_unlock(&req->r_mtx);
4336			if (req->r_callback.rcb_func) {
4337				/* async I/O RPC needs to be finished */
4338				lck_mtx_lock(nfsiod_mutex);
4339				if (req->r_achain.tqe_next == NFSREQNOLIST)
4340					TAILQ_INSERT_TAIL(&iodq, req, r_achain);
4341				lck_mtx_unlock(nfsiod_mutex);
4342			}
4343			lck_mtx_lock(&req->r_mtx);
4344			lck_mtx_lock(&nmp->nm_lock);
4345			if (req->r_flags & R_RESENDQ) {
4346				if (req->r_rchain.tqe_next != NFSREQNOLIST) {
4347					TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
4348					req->r_rchain.tqe_next = NFSREQNOLIST;
4349				}
4350				req->r_flags &= ~R_RESENDQ;
4351			}
4352			lck_mtx_unlock(&nmp->nm_lock);
4353			lck_mtx_unlock(&req->r_mtx);
4354			wakeup(req);
4355		}
4356	}
4357	lck_mtx_unlock(nfs_request_mutex);
4358
4359	/* finish any async I/O RPCs queued up */
4360	lck_mtx_lock(nfsiod_mutex);
4361	TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
4362	lck_mtx_unlock(nfsiod_mutex);
4363	TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
4364		TAILQ_REMOVE(&iodq, req, r_achain);
4365		req->r_achain.tqe_next = NFSREQNOLIST;
4366		lck_mtx_lock(&req->r_mtx);
4367		req->r_error = ENXIO;
4368		docallback = !(req->r_flags & R_WAITSENT);
4369		lck_mtx_unlock(&req->r_mtx);
4370		if (docallback)
4371			req->r_callback.rcb_func(req);
4372	}
4373
4374	/* clean up common state */
4375	lck_mtx_lock(&nmp->nm_lock);
4376 	while ((np = LIST_FIRST(&nmp->nm_monlist))) {
4377 		LIST_REMOVE(np, n_monlink);
4378 		np->n_monlink.le_next = NFSNOLIST;
4379 	}
4380	TAILQ_FOREACH_SAFE(noop, &nmp->nm_open_owners, noo_link, nextnoop) {
4381		TAILQ_REMOVE(&nmp->nm_open_owners, noop, noo_link);
4382		noop->noo_flags &= ~NFS_OPEN_OWNER_LINK;
4383		if (noop->noo_refcnt)
4384			continue;
4385		nfs_open_owner_destroy(noop);
4386	}
4387	lck_mtx_unlock(&nmp->nm_lock);
4388
4389	/* clean up NFSv4 state */
4390	if (nmp->nm_vers >= NFS_VER4) {
4391		lck_mtx_lock(&nmp->nm_lock);
4392		while ((np = TAILQ_FIRST(&nmp->nm_delegations))) {
4393			TAILQ_REMOVE(&nmp->nm_delegations, np, n_dlink);
4394			np->n_dlink.tqe_next = NFSNOLIST;
4395		}
4396		lck_mtx_unlock(&nmp->nm_lock);
4397	}
4398	if (IS_VALID_CRED(nmp->nm_mcred))
4399		kauth_cred_unref(&nmp->nm_mcred);
4400
4401	nfs_fs_locations_cleanup(&nmp->nm_locations);
4402
4403	if (nmp->nm_realm)
4404		FREE(nmp->nm_realm, M_TEMP);
4405	if (nmp->nm_principal)
4406		FREE(nmp->nm_principal, M_TEMP);
4407	if (nmp->nm_sprinc)
4408		FREE(nmp->nm_sprinc, M_TEMP);
4409
4410	if (nmp->nm_args)
4411		xb_free(nmp->nm_args);
4412	lck_mtx_destroy(&nmp->nm_lock, nfs_mount_grp);
4413	if (nmp->nm_fh)
4414		FREE(nmp->nm_fh, M_TEMP);
4415	FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT);
4416}
4417
4418/*
4419 * Return root of a filesystem
4420 */
4421int
4422nfs_vfs_root(mount_t mp, vnode_t *vpp, __unused vfs_context_t ctx)
4423{
4424	vnode_t vp;
4425	struct nfsmount *nmp;
4426	int error;
4427	u_int32_t vpid;
4428
4429	nmp = VFSTONFS(mp);
4430	if (!nmp || !nmp->nm_dnp)
4431		return (ENXIO);
4432	vp = NFSTOV(nmp->nm_dnp);
4433	vpid = vnode_vid(vp);
4434	while ((error = vnode_getwithvid(vp, vpid))) {
4435		/* vnode_get() may return ENOENT if the dir changes. */
4436		/* If that happens, just try it again, else return the error. */
4437		if ((error != ENOENT) || (vnode_vid(vp) == vpid))
4438			return (error);
4439		vpid = vnode_vid(vp);
4440	}
4441	*vpp = vp;
4442	return (0);
4443}
4444
4445/*
4446 * Do operations associated with quotas
4447 */
4448#if !QUOTA
4449int
4450nfs_vfs_quotactl(
4451	__unused mount_t mp,
4452	__unused int cmds,
4453	__unused uid_t uid,
4454	__unused caddr_t datap,
4455	__unused vfs_context_t context)
4456{
4457	return (ENOTSUP);
4458}
4459#else
4460
4461int
4462nfs3_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struct dqblk *dqb)
4463{
4464	int error = 0, slen, timeo;
4465	int rqport = 0, rqproto, rqvers = (type == GRPQUOTA) ? RPCRQUOTA_EXT_VER : RPCRQUOTA_VER;
4466	thread_t thd = vfs_context_thread(ctx);
4467	kauth_cred_t cred = vfs_context_ucred(ctx);
4468	char *path;
4469	uint64_t xid = 0;
4470	struct nfsm_chain nmreq, nmrep;
4471	mbuf_t mreq;
4472	uint32_t val = 0, bsize = 0;
4473	struct sockaddr *rqsaddr;
4474	struct timeval now;
4475
4476	if (!nmp->nm_saddr)
4477		return (ENXIO);
4478
4479	if (NMFLAG(nmp, NOQUOTA))
4480		return (ENOTSUP);
4481
4482	if (!nmp->nm_rqsaddr)
4483		MALLOC(nmp->nm_rqsaddr, struct sockaddr *, sizeof(struct sockaddr_storage), M_SONAME, M_WAITOK|M_ZERO);
4484	if (!nmp->nm_rqsaddr)
4485		return (ENOMEM);
4486	rqsaddr = nmp->nm_rqsaddr;
4487	if (rqsaddr->sa_family == AF_INET6)
4488		rqport = ntohs(((struct sockaddr_in6*)rqsaddr)->sin6_port);
4489	else if (rqsaddr->sa_family == AF_INET)
4490		rqport = ntohs(((struct sockaddr_in*)rqsaddr)->sin_port);
4491
4492	timeo = NMFLAG(nmp, SOFT) ? 10 : 60;
4493	rqproto = IPPROTO_UDP; /* XXX should prefer TCP if mount is TCP */
4494
4495	/* check if we have a recently cached rquota port */
4496	microuptime(&now);
4497	if (!rqport || ((nmp->nm_rqsaddrstamp + 60) >= (uint32_t)now.tv_sec)) {
4498		/* send portmap request to get rquota port */
4499		bcopy(nmp->nm_saddr, rqsaddr, min(sizeof(struct sockaddr_storage), nmp->nm_saddr->sa_len));
4500		error = nfs_portmap_lookup(nmp, ctx, rqsaddr, NULL, RPCPROG_RQUOTA, rqvers, rqproto, timeo);
4501		if (error)
4502			return (error);
4503		if (rqsaddr->sa_family == AF_INET6)
4504			rqport = ntohs(((struct sockaddr_in6*)rqsaddr)->sin6_port);
4505		else if (rqsaddr->sa_family == AF_INET)
4506			rqport = ntohs(((struct sockaddr_in*)rqsaddr)->sin_port);
4507		else
4508			return (EIO);
4509		if (!rqport)
4510			return (ENOTSUP);
4511		microuptime(&now);
4512		nmp->nm_rqsaddrstamp = now.tv_sec;
4513	}
4514
4515	/* rquota request */
4516	nfsm_chain_null(&nmreq);
4517	nfsm_chain_null(&nmrep);
4518	path = &vfs_statfs(nmp->nm_mountp)->f_mntfromname[0];
4519	while (*path && (*path != '/'))
4520		path++;
4521	slen = strlen(path);
4522	nfsm_chain_build_alloc_init(error, &nmreq, 3 * NFSX_UNSIGNED + nfsm_rndup(slen));
4523	nfsm_chain_add_name(error, &nmreq, path, slen, nmp);
4524	if (type == GRPQUOTA)
4525		nfsm_chain_add_32(error, &nmreq, type);
4526	nfsm_chain_add_32(error, &nmreq, id);
4527	nfsm_chain_build_done(error, &nmreq);
4528	nfsmout_if(error);
4529	error = nfsm_rpchead2(nmp, (rqproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
4530			RPCPROG_RQUOTA, rqvers, RPCRQUOTA_GET,
4531			RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
4532	nfsmout_if(error);
4533	nmreq.nmc_mhead = NULL;
4534	error = nfs_aux_request(nmp, thd, rqsaddr, NULL,
4535			(rqproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
4536			mreq, R_XID32(xid), 0, timeo, &nmrep);
4537	nfsmout_if(error);
4538
4539	/* parse rquota response */
4540	nfsm_chain_get_32(error, &nmrep, val);
4541	if (!error && (val != RQUOTA_STAT_OK)) {
4542		if (val == RQUOTA_STAT_NOQUOTA)
4543			error = ENOENT;
4544		else if (val == RQUOTA_STAT_EPERM)
4545			error = EPERM;
4546		else
4547			error = EIO;
4548	}
4549	nfsm_chain_get_32(error, &nmrep, bsize);
4550	nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED);
4551	nfsm_chain_get_32(error, &nmrep, val);
4552	nfsmout_if(error);
4553	dqb->dqb_bhardlimit = (uint64_t)val * bsize;
4554	nfsm_chain_get_32(error, &nmrep, val);
4555	nfsmout_if(error);
4556	dqb->dqb_bsoftlimit = (uint64_t)val * bsize;
4557	nfsm_chain_get_32(error, &nmrep, val);
4558	nfsmout_if(error);
4559	dqb->dqb_curbytes = (uint64_t)val * bsize;
4560	nfsm_chain_get_32(error, &nmrep, dqb->dqb_ihardlimit);
4561	nfsm_chain_get_32(error, &nmrep, dqb->dqb_isoftlimit);
4562	nfsm_chain_get_32(error, &nmrep, dqb->dqb_curinodes);
4563	nfsm_chain_get_32(error, &nmrep, dqb->dqb_btime);
4564	nfsm_chain_get_32(error, &nmrep, dqb->dqb_itime);
4565	nfsmout_if(error);
4566	dqb->dqb_id = id;
4567nfsmout:
4568	nfsm_chain_cleanup(&nmreq);
4569	nfsm_chain_cleanup(&nmrep);
4570	return (error);
4571}
4572
4573int
4574nfs4_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struct dqblk *dqb)
4575{
4576	nfsnode_t np;
4577	int error = 0, status, nfsvers, numops;
4578	u_int64_t xid;
4579	struct nfsm_chain nmreq, nmrep;
4580	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
4581	thread_t thd = vfs_context_thread(ctx);
4582	kauth_cred_t cred = vfs_context_ucred(ctx);
4583	struct nfsreq_secinfo_args si;
4584
4585	if (type != USRQUOTA)  /* NFSv4 only supports user quotas */
4586		return (ENOTSUP);
4587
4588	/* first check that the server supports any of the quota attributes */
4589	if (!NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_HARD) &&
4590	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_SOFT) &&
4591	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_USED))
4592		return (ENOTSUP);
4593
4594	/*
4595	 * The credential passed to the server needs to have
4596	 * an effective uid that matches the given uid.
4597	 */
4598	if (id != kauth_cred_getuid(cred)) {
4599		struct posix_cred temp_pcred;
4600		posix_cred_t pcred = posix_cred_get(cred);
4601		bzero(&temp_pcred, sizeof(temp_pcred));
4602		temp_pcred.cr_uid = id;
4603		temp_pcred.cr_ngroups = pcred->cr_ngroups;
4604		bcopy(pcred->cr_groups, temp_pcred.cr_groups, sizeof(temp_pcred.cr_groups));
4605		cred = posix_cred_create(&temp_pcred);
4606		if (!IS_VALID_CRED(cred))
4607			return (ENOMEM);
4608	} else {
4609		kauth_cred_ref(cred);
4610	}
4611
4612	nfsvers = nmp->nm_vers;
4613	np = nmp->nm_dnp;
4614	if (!np)
4615		error = ENXIO;
4616	if (error || ((error = vnode_get(NFSTOV(np))))) {
4617		kauth_cred_unref(&cred);
4618		return(error);
4619	}
4620
4621	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
4622	nfsm_chain_null(&nmreq);
4623	nfsm_chain_null(&nmrep);
4624
4625	// PUTFH + GETATTR
4626	numops = 2;
4627	nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
4628	nfsm_chain_add_compound_header(error, &nmreq, "quota", numops);
4629	numops--;
4630	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
4631	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
4632	numops--;
4633	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
4634	NFS_CLEAR_ATTRIBUTES(bitmap);
4635	NFS_BITMAP_SET(bitmap, NFS_FATTR_QUOTA_AVAIL_HARD);
4636	NFS_BITMAP_SET(bitmap, NFS_FATTR_QUOTA_AVAIL_SOFT);
4637	NFS_BITMAP_SET(bitmap, NFS_FATTR_QUOTA_USED);
4638	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, NULL);
4639	nfsm_chain_build_done(error, &nmreq);
4640	nfsm_assert(error, (numops == 0), EPROTO);
4641	nfsmout_if(error);
4642	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, 0, &nmrep, &xid, &status);
4643	nfsm_chain_skip_tag(error, &nmrep);
4644	nfsm_chain_get_32(error, &nmrep, numops);
4645	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
4646	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
4647	nfsm_assert(error, NFSTONMP(np), ENXIO);
4648	nfsmout_if(error);
4649	error = nfs4_parsefattr(&nmrep, NULL, NULL, NULL, dqb, NULL);
4650	nfsmout_if(error);
4651	nfsm_assert(error, NFSTONMP(np), ENXIO);
4652nfsmout:
4653	nfsm_chain_cleanup(&nmreq);
4654	nfsm_chain_cleanup(&nmrep);
4655	vnode_put(NFSTOV(np));
4656	kauth_cred_unref(&cred);
4657	return (error);
4658}
4659
4660int
4661nfs_vfs_quotactl(mount_t mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t ctx)
4662{
4663	struct nfsmount *nmp;
4664	int cmd, type, error, nfsvers;
4665	uid_t euid = kauth_cred_getuid(vfs_context_ucred(ctx));
4666	struct dqblk *dqb = (struct dqblk*)datap;
4667
4668	if (!(nmp = VFSTONFS(mp)))
4669		return (ENXIO);
4670	nfsvers = nmp->nm_vers;
4671
4672	if (uid == ~0U)
4673		uid = euid;
4674
4675	/* we can only support Q_GETQUOTA */
4676	cmd = cmds >> SUBCMDSHIFT;
4677	switch (cmd) {
4678	case Q_GETQUOTA:
4679		break;
4680	case Q_QUOTAON:
4681	case Q_QUOTAOFF:
4682	case Q_SETQUOTA:
4683	case Q_SETUSE:
4684	case Q_SYNC:
4685	case Q_QUOTASTAT:
4686		return (ENOTSUP);
4687	default:
4688		return (EINVAL);
4689	}
4690
4691	type = cmds & SUBCMDMASK;
4692	if ((u_int)type >= MAXQUOTAS)
4693		return (EINVAL);
4694	if ((uid != euid) && ((error = vfs_context_suser(ctx))))
4695		return (error);
4696
4697	if (vfs_busy(mp, LK_NOWAIT))
4698		return (0);
4699	bzero(dqb, sizeof(*dqb));
4700	error = nmp->nm_funcs->nf_getquota(nmp, ctx, uid, type, dqb);
4701	vfs_unbusy(mp);
4702	return (error);
4703}
4704#endif
4705
4706/*
4707 * Flush out the buffer cache
4708 */
4709int nfs_sync_callout(vnode_t, void *);
4710
4711struct nfs_sync_cargs {
4712	vfs_context_t	ctx;
4713	int		waitfor;
4714	int		error;
4715};
4716
4717int
4718nfs_sync_callout(vnode_t vp, void *arg)
4719{
4720	struct nfs_sync_cargs *cargs = (struct nfs_sync_cargs*)arg;
4721	nfsnode_t np = VTONFS(vp);
4722	int error;
4723
4724	if (np->n_flag & NREVOKE) {
4725		vn_revoke(vp, REVOKEALL, cargs->ctx);
4726		return (VNODE_RETURNED);
4727	}
4728
4729	if (LIST_EMPTY(&np->n_dirtyblkhd))
4730		return (VNODE_RETURNED);
4731	if (np->n_wrbusy > 0)
4732		return (VNODE_RETURNED);
4733	if (np->n_bflag & (NBFLUSHINPROG|NBINVALINPROG))
4734		return (VNODE_RETURNED);
4735
4736	error = nfs_flush(np, cargs->waitfor, vfs_context_thread(cargs->ctx), 0);
4737	if (error)
4738		cargs->error = error;
4739
4740	return (VNODE_RETURNED);
4741}
4742
4743int
4744nfs_vfs_sync(mount_t mp, int waitfor, vfs_context_t ctx)
4745{
4746	struct nfs_sync_cargs cargs;
4747
4748	cargs.waitfor = waitfor;
4749	cargs.ctx = ctx;
4750	cargs.error = 0;
4751
4752	vnode_iterate(mp, 0, nfs_sync_callout, &cargs);
4753
4754	return (cargs.error);
4755}
4756
4757/*
4758 * NFS flat namespace lookup.
4759 * Currently unsupported.
4760 */
4761/*ARGSUSED*/
4762int
4763nfs_vfs_vget(
4764	__unused mount_t mp,
4765	__unused ino64_t ino,
4766	__unused vnode_t *vpp,
4767	__unused vfs_context_t ctx)
4768{
4769
4770	return (ENOTSUP);
4771}
4772
4773/*
4774 * At this point, this should never happen
4775 */
4776/*ARGSUSED*/
4777int
4778nfs_vfs_fhtovp(
4779	__unused mount_t mp,
4780	__unused int fhlen,
4781	__unused unsigned char *fhp,
4782	__unused vnode_t *vpp,
4783	__unused vfs_context_t ctx)
4784{
4785
4786	return (ENOTSUP);
4787}
4788
4789/*
4790 * Vnode pointer to File handle, should never happen either
4791 */
4792/*ARGSUSED*/
4793int
4794nfs_vfs_vptofh(
4795	__unused vnode_t vp,
4796	__unused int *fhlenp,
4797	__unused unsigned char *fhp,
4798	__unused vfs_context_t ctx)
4799{
4800
4801	return (ENOTSUP);
4802}
4803
4804/*
4805 * Vfs start routine, a no-op.
4806 */
4807/*ARGSUSED*/
4808int
4809nfs_vfs_start(
4810	__unused mount_t mp,
4811	__unused int flags,
4812	__unused vfs_context_t ctx)
4813{
4814
4815	return (0);
4816}
4817
4818/*
4819 * Build the mount info buffer for NFS_MOUNTINFO.
4820 */
4821int
4822nfs_mountinfo_assemble(struct nfsmount *nmp, struct xdrbuf *xb)
4823{
4824	struct xdrbuf xbinfo, xborig;
4825	char sotype[6];
4826	uint32_t origargsvers, origargslength;
4827	uint32_t infolength_offset, curargsopaquelength_offset, curargslength_offset, attrslength_offset, curargs_end_offset, end_offset;
4828	uint32_t miattrs[NFS_MIATTR_BITMAP_LEN];
4829	uint32_t miflags_mask[NFS_MIFLAG_BITMAP_LEN];
4830	uint32_t miflags[NFS_MIFLAG_BITMAP_LEN];
4831	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
4832	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN];
4833	uint32_t mflags[NFS_MFLAG_BITMAP_LEN];
4834	uint32_t loc, serv, addr, comp;
4835	int i, timeo, error = 0;
4836
4837	/* set up mount info attr and flag bitmaps */
4838	NFS_BITMAP_ZERO(miattrs, NFS_MIATTR_BITMAP_LEN);
4839	NFS_BITMAP_SET(miattrs, NFS_MIATTR_FLAGS);
4840	NFS_BITMAP_SET(miattrs, NFS_MIATTR_ORIG_ARGS);
4841	NFS_BITMAP_SET(miattrs, NFS_MIATTR_CUR_ARGS);
4842	NFS_BITMAP_SET(miattrs, NFS_MIATTR_CUR_LOC_INDEX);
4843	NFS_BITMAP_ZERO(miflags_mask, NFS_MIFLAG_BITMAP_LEN);
4844	NFS_BITMAP_ZERO(miflags, NFS_MIFLAG_BITMAP_LEN);
4845	NFS_BITMAP_SET(miflags_mask, NFS_MIFLAG_DEAD);
4846	NFS_BITMAP_SET(miflags_mask, NFS_MIFLAG_NOTRESP);
4847	NFS_BITMAP_SET(miflags_mask, NFS_MIFLAG_RECOVERY);
4848	if (nmp->nm_state & NFSSTA_DEAD)
4849		NFS_BITMAP_SET(miflags, NFS_MIFLAG_DEAD);
4850	if ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_JUKEBOXTIMEO)) ||
4851	    ((nmp->nm_state & NFSSTA_LOCKTIMEO) && (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED)))
4852		NFS_BITMAP_SET(miflags, NFS_MIFLAG_NOTRESP);
4853	if (nmp->nm_state & NFSSTA_RECOVER)
4854		NFS_BITMAP_SET(miflags, NFS_MIFLAG_RECOVERY);
4855
4856	/* get original mount args length */
4857	xb_init_buffer(&xborig, nmp->nm_args, 2*XDRWORD);
4858	xb_get_32(error, &xborig, origargsvers); /* version */
4859	xb_get_32(error, &xborig, origargslength); /* args length */
4860	nfsmerr_if(error);
4861
4862	/* set up current mount attributes bitmap */
4863	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
4864	NFS_BITMAP_SET(mattrs, NFS_MATTR_FLAGS);
4865	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
4866	if (nmp->nm_vers >= NFS_VER4)
4867		NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_MINOR_VERSION);
4868	NFS_BITMAP_SET(mattrs, NFS_MATTR_READ_SIZE);
4869	NFS_BITMAP_SET(mattrs, NFS_MATTR_WRITE_SIZE);
4870	NFS_BITMAP_SET(mattrs, NFS_MATTR_READDIR_SIZE);
4871	NFS_BITMAP_SET(mattrs, NFS_MATTR_READAHEAD);
4872	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN);
4873	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX);
4874	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN);
4875	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX);
4876	NFS_BITMAP_SET(mattrs, NFS_MATTR_LOCK_MODE);
4877	NFS_BITMAP_SET(mattrs, NFS_MATTR_SECURITY);
4878	NFS_BITMAP_SET(mattrs, NFS_MATTR_MAX_GROUP_LIST);
4879	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
4880	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
4881	if ((nmp->nm_vers < NFS_VER4) && nmp->nm_mountport)
4882		NFS_BITMAP_SET(mattrs, NFS_MATTR_MOUNT_PORT);
4883	NFS_BITMAP_SET(mattrs, NFS_MATTR_REQUEST_TIMEOUT);
4884	if (NMFLAG(nmp, SOFT))
4885		NFS_BITMAP_SET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT);
4886	if (nmp->nm_deadtimeout)
4887		NFS_BITMAP_SET(mattrs, NFS_MATTR_DEAD_TIMEOUT);
4888	if (nmp->nm_fh)
4889		NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
4890	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
4891	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
4892	if (origargsvers < NFS_ARGSVERSION_XDR)
4893		NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFROM);
4894	if (nmp->nm_realm)
4895		NFS_BITMAP_SET(mattrs, NFS_MATTR_REALM);
4896	if (nmp->nm_principal)
4897		NFS_BITMAP_SET(mattrs, NFS_MATTR_PRINCIPAL);
4898	if (nmp->nm_sprinc)
4899		NFS_BITMAP_SET(mattrs, NFS_MATTR_SVCPRINCIPAL);
4900
4901	/* set up current mount flags bitmap */
4902	/* first set the flags that we will be setting - either on OR off */
4903	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
4904	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_SOFT);
4905	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_INTR);
4906	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
4907	if (nmp->nm_sotype == SOCK_DGRAM)
4908		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOCONNECT);
4909	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_DUMBTIMER);
4910	if (nmp->nm_vers < NFS_VER4)
4911		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_CALLUMNT);
4912	if (nmp->nm_vers >= NFS_VER3)
4913		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RDIRPLUS);
4914	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NONEGNAMECACHE);
4915	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MUTEJUKEBOX);
4916	if (nmp->nm_vers >= NFS_VER4) {
4917		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_EPHEMERAL);
4918		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOCALLBACK);
4919		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NONAMEDATTR);
4920		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOACL);
4921		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_ACLONLY);
4922	}
4923	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NFC);
4924	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOQUOTA);
4925	if (nmp->nm_vers < NFS_VER4)
4926		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MNTUDP);
4927	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MNTQUICK);
4928	/* now set the flags that should be set */
4929	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
4930	if (NMFLAG(nmp, SOFT))
4931		NFS_BITMAP_SET(mflags, NFS_MFLAG_SOFT);
4932	if (NMFLAG(nmp, INTR))
4933		NFS_BITMAP_SET(mflags, NFS_MFLAG_INTR);
4934	if (NMFLAG(nmp, RESVPORT))
4935		NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
4936	if ((nmp->nm_sotype == SOCK_DGRAM) && NMFLAG(nmp, NOCONNECT))
4937		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOCONNECT);
4938	if (NMFLAG(nmp, DUMBTIMER))
4939		NFS_BITMAP_SET(mflags, NFS_MFLAG_DUMBTIMER);
4940	if ((nmp->nm_vers < NFS_VER4) && NMFLAG(nmp, CALLUMNT))
4941		NFS_BITMAP_SET(mflags, NFS_MFLAG_CALLUMNT);
4942	if ((nmp->nm_vers >= NFS_VER3) && NMFLAG(nmp, RDIRPLUS))
4943		NFS_BITMAP_SET(mflags, NFS_MFLAG_RDIRPLUS);
4944	if (NMFLAG(nmp, NONEGNAMECACHE))
4945		NFS_BITMAP_SET(mflags, NFS_MFLAG_NONEGNAMECACHE);
4946	if (NMFLAG(nmp, MUTEJUKEBOX))
4947		NFS_BITMAP_SET(mflags, NFS_MFLAG_MUTEJUKEBOX);
4948	if (nmp->nm_vers >= NFS_VER4) {
4949		if (NMFLAG(nmp, EPHEMERAL))
4950			NFS_BITMAP_SET(mflags, NFS_MFLAG_EPHEMERAL);
4951		if (NMFLAG(nmp, NOCALLBACK))
4952			NFS_BITMAP_SET(mflags, NFS_MFLAG_NOCALLBACK);
4953		if (NMFLAG(nmp, NONAMEDATTR))
4954			NFS_BITMAP_SET(mflags, NFS_MFLAG_NONAMEDATTR);
4955		if (NMFLAG(nmp, NOACL))
4956			NFS_BITMAP_SET(mflags, NFS_MFLAG_NOACL);
4957		if (NMFLAG(nmp, ACLONLY))
4958			NFS_BITMAP_SET(mflags, NFS_MFLAG_ACLONLY);
4959	}
4960	if (NMFLAG(nmp, NFC))
4961		NFS_BITMAP_SET(mflags, NFS_MFLAG_NFC);
4962	if (NMFLAG(nmp, NOQUOTA) || ((nmp->nm_vers >= NFS_VER4) &&
4963	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_HARD) &&
4964	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_SOFT) &&
4965	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_USED)))
4966		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOQUOTA);
4967	if ((nmp->nm_vers < NFS_VER4) && NMFLAG(nmp, MNTUDP))
4968		NFS_BITMAP_SET(mflags, NFS_MFLAG_MNTUDP);
4969	if (NMFLAG(nmp, MNTQUICK))
4970		NFS_BITMAP_SET(mflags, NFS_MFLAG_MNTQUICK);
4971
4972	/* assemble info buffer: */
4973	xb_init_buffer(&xbinfo, NULL, 0);
4974	xb_add_32(error, &xbinfo, NFS_MOUNT_INFO_VERSION);
4975	infolength_offset = xb_offset(&xbinfo);
4976	xb_add_32(error, &xbinfo, 0);
4977	xb_add_bitmap(error, &xbinfo, miattrs, NFS_MIATTR_BITMAP_LEN);
4978	xb_add_bitmap(error, &xbinfo, miflags, NFS_MIFLAG_BITMAP_LEN);
4979	xb_add_32(error, &xbinfo, origargslength);
4980	if (!error)
4981		error = xb_add_bytes(&xbinfo, nmp->nm_args, origargslength, 0);
4982
4983	/* the opaque byte count for the current mount args values: */
4984	curargsopaquelength_offset = xb_offset(&xbinfo);
4985	xb_add_32(error, &xbinfo, 0);
4986
4987	/* Encode current mount args values */
4988	xb_add_32(error, &xbinfo, NFS_ARGSVERSION_XDR);
4989	curargslength_offset = xb_offset(&xbinfo);
4990	xb_add_32(error, &xbinfo, 0);
4991	xb_add_32(error, &xbinfo, NFS_XDRARGS_VERSION_0);
4992	xb_add_bitmap(error, &xbinfo, mattrs, NFS_MATTR_BITMAP_LEN);
4993	attrslength_offset = xb_offset(&xbinfo);
4994	xb_add_32(error, &xbinfo, 0);
4995	xb_add_bitmap(error, &xbinfo, mflags_mask, NFS_MFLAG_BITMAP_LEN);
4996	xb_add_bitmap(error, &xbinfo, mflags, NFS_MFLAG_BITMAP_LEN);
4997	xb_add_32(error, &xbinfo, nmp->nm_vers);		/* NFS_VERSION */
4998	if (nmp->nm_vers >= NFS_VER4)
4999		xb_add_32(error, &xbinfo, 0);			/* NFS_MINOR_VERSION */
5000	xb_add_32(error, &xbinfo, nmp->nm_rsize);		/* READ_SIZE */
5001	xb_add_32(error, &xbinfo, nmp->nm_wsize);		/* WRITE_SIZE */
5002	xb_add_32(error, &xbinfo, nmp->nm_readdirsize);		/* READDIR_SIZE */
5003	xb_add_32(error, &xbinfo, nmp->nm_readahead);		/* READAHEAD */
5004	xb_add_32(error, &xbinfo, nmp->nm_acregmin);		/* ATTRCACHE_REG_MIN */
5005	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_REG_MIN */
5006	xb_add_32(error, &xbinfo, nmp->nm_acregmax);		/* ATTRCACHE_REG_MAX */
5007	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_REG_MAX */
5008	xb_add_32(error, &xbinfo, nmp->nm_acdirmin);		/* ATTRCACHE_DIR_MIN */
5009	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_DIR_MIN */
5010	xb_add_32(error, &xbinfo, nmp->nm_acdirmax);		/* ATTRCACHE_DIR_MAX */
5011	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_DIR_MAX */
5012	xb_add_32(error, &xbinfo, nmp->nm_lockmode);		/* LOCK_MODE */
5013	if (nmp->nm_sec.count) {
5014		xb_add_32(error, &xbinfo, nmp->nm_sec.count);		/* SECURITY */
5015		nfsmerr_if(error);
5016		for (i=0; i < nmp->nm_sec.count; i++)
5017			xb_add_32(error, &xbinfo, nmp->nm_sec.flavors[i]);
5018	} else if (nmp->nm_servsec.count) {
5019		xb_add_32(error, &xbinfo, nmp->nm_servsec.count);	/* SECURITY */
5020		nfsmerr_if(error);
5021		for (i=0; i < nmp->nm_servsec.count; i++)
5022			xb_add_32(error, &xbinfo, nmp->nm_servsec.flavors[i]);
5023	} else {
5024		xb_add_32(error, &xbinfo, 1);				/* SECURITY */
5025		xb_add_32(error, &xbinfo, nmp->nm_auth);
5026	}
5027	xb_add_32(error, &xbinfo, nmp->nm_numgrps);		/* MAX_GROUP_LIST */
5028	nfsmerr_if(error);
5029	snprintf(sotype, sizeof(sotype), "%s%s", (nmp->nm_sotype == SOCK_DGRAM) ? "udp" : "tcp",
5030		nmp->nm_sofamily ? (nmp->nm_sofamily == AF_INET) ? "4" : "6" : "");
5031	xb_add_string(error, &xbinfo, sotype, strlen(sotype));	/* SOCKET_TYPE */
5032	xb_add_32(error, &xbinfo, ntohs(((struct sockaddr_in*)nmp->nm_saddr)->sin_port)); /* NFS_PORT */
5033	if ((nmp->nm_vers < NFS_VER4) && nmp->nm_mountport)
5034		xb_add_32(error, &xbinfo, nmp->nm_mountport);	/* MOUNT_PORT */
5035	timeo = (nmp->nm_timeo * 10) / NFS_HZ;
5036	xb_add_32(error, &xbinfo, timeo/10);			/* REQUEST_TIMEOUT */
5037	xb_add_32(error, &xbinfo, (timeo%10)*100000000);	/* REQUEST_TIMEOUT */
5038	if (NMFLAG(nmp, SOFT))
5039		xb_add_32(error, &xbinfo, nmp->nm_retry);	/* SOFT_RETRY_COUNT */
5040	if (nmp->nm_deadtimeout) {
5041		xb_add_32(error, &xbinfo, nmp->nm_deadtimeout);	/* DEAD_TIMEOUT */
5042		xb_add_32(error, &xbinfo, 0);			/* DEAD_TIMEOUT */
5043	}
5044	if (nmp->nm_fh)
5045		xb_add_fh(error, &xbinfo, &nmp->nm_fh->fh_data[0], nmp->nm_fh->fh_len); /* FH */
5046	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_numlocs);			/* FS_LOCATIONS */
5047	for (loc = 0; !error && (loc < nmp->nm_locations.nl_numlocs); loc++) {
5048		xb_add_32(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servcount);
5049		for (serv = 0; !error && (serv < nmp->nm_locations.nl_locations[loc]->nl_servcount); serv++) {
5050			xb_add_string(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name,
5051				strlen(nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name));
5052			xb_add_32(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount);
5053			for (addr = 0; !error && (addr < nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount); addr++)
5054				xb_add_string(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr],
5055					strlen(nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr]));
5056			xb_add_32(error, &xbinfo, 0); /* empty server info */
5057		}
5058		xb_add_32(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_path.np_compcount);
5059		for (comp = 0; !error && (comp < nmp->nm_locations.nl_locations[loc]->nl_path.np_compcount); comp++)
5060			xb_add_string(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_path.np_components[comp],
5061				strlen(nmp->nm_locations.nl_locations[loc]->nl_path.np_components[comp]));
5062		xb_add_32(error, &xbinfo, 0); /* empty fs location info */
5063	}
5064	xb_add_32(error, &xbinfo, vfs_flags(nmp->nm_mountp));		/* MNTFLAGS */
5065	if (origargsvers < NFS_ARGSVERSION_XDR)
5066		xb_add_string(error, &xbinfo, vfs_statfs(nmp->nm_mountp)->f_mntfromname,
5067			strlen(vfs_statfs(nmp->nm_mountp)->f_mntfromname));	/* MNTFROM */
5068	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REALM))
5069		xb_add_string(error, &xbinfo, nmp->nm_realm, strlen(nmp->nm_realm));
5070	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_PRINCIPAL))
5071		xb_add_string(error, &xbinfo, nmp->nm_principal, strlen(nmp->nm_principal));
5072	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SVCPRINCIPAL))
5073		xb_add_string(error, &xbinfo, nmp->nm_sprinc, strlen(nmp->nm_sprinc));
5074
5075	curargs_end_offset = xb_offset(&xbinfo);
5076
5077	/* NFS_MIATTR_CUR_LOC_INDEX */
5078	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_flags);
5079	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_loc);
5080	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_serv);
5081	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_addr);
5082
5083	xb_build_done(error, &xbinfo);
5084
5085	/* update opaque counts */
5086	end_offset = xb_offset(&xbinfo);
5087	if (!error) {
5088		error = xb_seek(&xbinfo, attrslength_offset);
5089		xb_add_32(error, &xbinfo, curargs_end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
5090	}
5091	if (!error) {
5092		error = xb_seek(&xbinfo, curargslength_offset);
5093		xb_add_32(error, &xbinfo, curargs_end_offset - curargslength_offset + XDRWORD/*version*/);
5094	}
5095	if (!error) {
5096		error = xb_seek(&xbinfo, curargsopaquelength_offset);
5097		xb_add_32(error, &xbinfo, curargs_end_offset - curargslength_offset + XDRWORD/*version*/);
5098	}
5099	if (!error) {
5100		error = xb_seek(&xbinfo, infolength_offset);
5101		xb_add_32(error, &xbinfo, end_offset - infolength_offset + XDRWORD/*version*/);
5102	}
5103	nfsmerr_if(error);
5104
5105	/* copy result xdrbuf to caller */
5106	*xb = xbinfo;
5107
5108	/* and mark the local copy as not needing cleanup */
5109	xbinfo.xb_flags &= ~XB_CLEANUP;
5110nfsmerr:
5111	xb_cleanup(&xbinfo);
5112	return (error);
5113}
5114
5115/*
5116 * Do that sysctl thang...
5117 */
5118int
5119nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
5120           user_addr_t newp, size_t newlen, vfs_context_t ctx)
5121{
5122	int error = 0, val, softnobrowse;
5123	struct sysctl_req *req = NULL;
5124	union union_vfsidctl vc;
5125	mount_t mp;
5126	struct nfsmount *nmp = NULL;
5127	struct vfsquery vq;
5128	boolean_t is_64_bit;
5129	fsid_t fsid;
5130	struct xdrbuf xb;
5131#if NFSSERVER
5132	struct nfs_exportfs *nxfs;
5133	struct nfs_export *nx;
5134	struct nfs_active_user_list *ulist;
5135	struct nfs_export_stat_desc stat_desc;
5136	struct nfs_export_stat_rec statrec;
5137	struct nfs_user_stat_node *unode, *unode_next;
5138	struct nfs_user_stat_desc ustat_desc;
5139	struct nfs_user_stat_user_rec ustat_rec;
5140	struct nfs_user_stat_path_rec upath_rec;
5141	uint bytes_avail, bytes_total, recs_copied;
5142	uint numExports, totlen, pos, numRecs, count;
5143#endif /* NFSSERVER */
5144
5145	/*
5146	 * All names at this level are terminal.
5147	 */
5148	if (namelen > 1)
5149		return (ENOTDIR);	/* overloaded */
5150
5151	is_64_bit = vfs_context_is64bit(ctx);
5152
5153	/* common code for "new style" VFS_CTL sysctl, get the mount. */
5154	switch (name[0]) {
5155	case VFS_CTL_TIMEO:
5156	case VFS_CTL_QUERY:
5157	case VFS_CTL_NOLOCKS:
5158		req = CAST_DOWN(struct sysctl_req *, oldp);
5159		error = SYSCTL_IN(req, &vc, is_64_bit? sizeof(vc.vc64):sizeof(vc.vc32));
5160		if (error)
5161			return (error);
5162		mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
5163		if (mp == NULL)
5164			return (ENOENT);
5165		nmp = VFSTONFS(mp);
5166		if (nmp == NULL)
5167			return (ENOENT);
5168		bzero(&vq, sizeof(vq));
5169		req->newidx = 0;
5170		if (is_64_bit) {
5171			req->newptr = vc.vc64.vc_ptr;
5172			req->newlen = (size_t)vc.vc64.vc_len;
5173		} else {
5174			req->newptr = CAST_USER_ADDR_T(vc.vc32.vc_ptr);
5175			req->newlen = vc.vc32.vc_len;
5176		}
5177	}
5178
5179	switch(name[0]) {
5180	case NFS_NFSSTATS:
5181		if (!oldp) {
5182			*oldlenp = sizeof nfsstats;
5183			return (0);
5184		}
5185
5186		if (*oldlenp < sizeof nfsstats) {
5187			*oldlenp = sizeof nfsstats;
5188			return (ENOMEM);
5189		}
5190
5191		error = copyout(&nfsstats, oldp, sizeof nfsstats);
5192		if (error)
5193			return (error);
5194
5195		if (newp && newlen != sizeof nfsstats)
5196			return (EINVAL);
5197
5198		if (newp)
5199			return copyin(newp, &nfsstats, sizeof nfsstats);
5200		return (0);
5201	case NFS_MOUNTINFO:
5202		/* read in the fsid */
5203		if (*oldlenp < sizeof(fsid))
5204			return (EINVAL);
5205		if ((error = copyin(oldp, &fsid, sizeof(fsid))))
5206			return (error);
5207		/* swizzle it back to host order */
5208		fsid.val[0] = ntohl(fsid.val[0]);
5209		fsid.val[1] = ntohl(fsid.val[1]);
5210		/* find mount and make sure it's NFS */
5211		if (((mp = vfs_getvfs(&fsid))) == NULL)
5212			return (ENOENT);
5213		if (strcmp(mp->mnt_vfsstat.f_fstypename, "nfs"))
5214			return (EINVAL);
5215		if (((nmp = VFSTONFS(mp))) == NULL)
5216			return (ENOENT);
5217		xb_init(&xb, 0);
5218		if ((error = nfs_mountinfo_assemble(nmp, &xb)))
5219			return (error);
5220		if (*oldlenp < xb.xb_u.xb_buffer.xbb_len)
5221			error = ENOMEM;
5222		else
5223			error = copyout(xb_buffer_base(&xb), oldp, xb.xb_u.xb_buffer.xbb_len);
5224		*oldlenp = xb.xb_u.xb_buffer.xbb_len;
5225		xb_cleanup(&xb);
5226		break;
5227#if NFSSERVER
5228	case NFS_EXPORTSTATS:
5229		/* setup export stat descriptor */
5230		stat_desc.rec_vers = NFS_EXPORT_STAT_REC_VERSION;
5231
5232		if (!nfsrv_is_initialized()) {
5233			stat_desc.rec_count = 0;
5234			if (oldp && (*oldlenp >= sizeof(struct nfs_export_stat_desc)))
5235				error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
5236			*oldlenp = sizeof(struct nfs_export_stat_desc);
5237			return (error);
5238		}
5239
5240		/* Count the number of exported directories */
5241		lck_rw_lock_shared(&nfsrv_export_rwlock);
5242		numExports = 0;
5243		LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next)
5244			LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next)
5245					numExports += 1;
5246
5247		/* update stat descriptor's export record count */
5248		stat_desc.rec_count = numExports;
5249
5250		/* calculate total size of required buffer */
5251		totlen = sizeof(struct nfs_export_stat_desc) + (numExports * sizeof(struct nfs_export_stat_rec));
5252
5253		/* Check caller's buffer */
5254		if (oldp == 0) {
5255			lck_rw_done(&nfsrv_export_rwlock);
5256			/* indicate required buffer len */
5257			*oldlenp = totlen;
5258			return (0);
5259		}
5260
5261		/* We require the caller's buffer to be at least large enough to hold the descriptor */
5262		if (*oldlenp < sizeof(struct nfs_export_stat_desc)) {
5263			lck_rw_done(&nfsrv_export_rwlock);
5264			/* indicate required buffer len */
5265			*oldlenp = totlen;
5266			return (ENOMEM);
5267		}
5268
5269		/* indicate required buffer len */
5270		*oldlenp = totlen;
5271
5272		/* check if export table is empty */
5273		if (!numExports) {
5274			lck_rw_done(&nfsrv_export_rwlock);
5275			error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
5276			return (error);
5277		}
5278
5279		/* calculate how many actual export stat records fit into caller's buffer */
5280		numRecs = (*oldlenp - sizeof(struct nfs_export_stat_desc)) / sizeof(struct nfs_export_stat_rec);
5281
5282		if (!numRecs) {
5283			/* caller's buffer can only accomodate descriptor */
5284			lck_rw_done(&nfsrv_export_rwlock);
5285			stat_desc.rec_count = 0;
5286			error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
5287			return (error);
5288		}
5289
5290		/* adjust to actual number of records to copyout to caller's buffer */
5291		if (numRecs > numExports)
5292			numRecs = numExports;
5293
5294		/* set actual number of records we are returning */
5295		stat_desc.rec_count = numRecs;
5296
5297		/* first copy out the stat descriptor */
5298		pos = 0;
5299		error = copyout(&stat_desc, oldp + pos, sizeof(struct nfs_export_stat_desc));
5300		if (error) {
5301			lck_rw_done(&nfsrv_export_rwlock);
5302			return (error);
5303		}
5304		pos += sizeof(struct nfs_export_stat_desc);
5305
5306		/* Loop through exported directories */
5307		count = 0;
5308		LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
5309			LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
5310
5311				if (count >= numRecs)
5312					break;
5313
5314				/* build exported filesystem path */
5315				snprintf(statrec.path, sizeof(statrec.path), "%s%s%s",
5316					nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
5317					nx->nx_path);
5318
5319				/* build the 64-bit export stat counters */
5320				statrec.ops = ((uint64_t)nx->nx_stats.ops.hi << 32) |
5321						nx->nx_stats.ops.lo;
5322				statrec.bytes_read = ((uint64_t)nx->nx_stats.bytes_read.hi << 32) |
5323						nx->nx_stats.bytes_read.lo;
5324				statrec.bytes_written = ((uint64_t)nx->nx_stats.bytes_written.hi << 32) |
5325						nx->nx_stats.bytes_written.lo;
5326				error = copyout(&statrec, oldp + pos, sizeof(statrec));
5327				if (error) {
5328					lck_rw_done(&nfsrv_export_rwlock);
5329					return (error);
5330				}
5331				/* advance buffer position */
5332				pos += sizeof(statrec);
5333			}
5334		}
5335		lck_rw_done(&nfsrv_export_rwlock);
5336		break;
5337	case NFS_USERSTATS:
5338		/* init structures used for copying out of kernel */
5339		ustat_desc.rec_vers = NFS_USER_STAT_REC_VERSION;
5340		ustat_rec.rec_type = NFS_USER_STAT_USER_REC;
5341		upath_rec.rec_type = NFS_USER_STAT_PATH_REC;
5342
5343		/* initialize counters */
5344		bytes_total = sizeof(struct nfs_user_stat_desc);
5345		bytes_avail  = *oldlenp;
5346		recs_copied = 0;
5347
5348		if (!nfsrv_is_initialized()) /* NFS server not initialized, so no stats */
5349			goto ustat_skip;
5350
5351		/* reclaim old expired user nodes */
5352		nfsrv_active_user_list_reclaim();
5353
5354		/* reserve space for the buffer descriptor */
5355		if (bytes_avail >= sizeof(struct nfs_user_stat_desc))
5356			bytes_avail -= sizeof(struct nfs_user_stat_desc);
5357		else
5358			bytes_avail = 0;
5359
5360		/* put buffer position past the buffer descriptor */
5361		pos = sizeof(struct nfs_user_stat_desc);
5362
5363		/* Loop through exported directories */
5364		lck_rw_lock_shared(&nfsrv_export_rwlock);
5365		LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
5366			LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
5367				/* copy out path */
5368				if (bytes_avail >= sizeof(struct nfs_user_stat_path_rec)) {
5369					snprintf(upath_rec.path, sizeof(upath_rec.path), "%s%s%s",
5370					    nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
5371					    nx->nx_path);
5372
5373					error = copyout(&upath_rec, oldp + pos, sizeof(struct nfs_user_stat_path_rec));
5374					if (error) {
5375						/* punt */
5376						goto ustat_done;
5377					}
5378
5379					pos += sizeof(struct nfs_user_stat_path_rec);
5380					bytes_avail -= sizeof(struct nfs_user_stat_path_rec);
5381					recs_copied++;
5382				}
5383				else {
5384					/* Caller's buffer is exhausted */
5385					bytes_avail = 0;
5386				}
5387
5388				bytes_total += sizeof(struct nfs_user_stat_path_rec);
5389
5390				/* Scan through all user nodes of this export */
5391				ulist = &nx->nx_user_list;
5392				lck_mtx_lock(&ulist->user_mutex);
5393				for (unode = TAILQ_FIRST(&ulist->user_lru); unode; unode = unode_next) {
5394					unode_next = TAILQ_NEXT(unode, lru_link);
5395
5396					/* copy out node if there is space */
5397					if (bytes_avail >= sizeof(struct nfs_user_stat_user_rec)) {
5398						/* prepare a user stat rec for copying out */
5399						ustat_rec.uid = unode->uid;
5400						bcopy(&unode->sock, &ustat_rec.sock, unode->sock.ss_len);
5401						ustat_rec.ops = unode->ops;
5402						ustat_rec.bytes_read = unode->bytes_read;
5403						ustat_rec.bytes_written = unode->bytes_written;
5404						ustat_rec.tm_start = unode->tm_start;
5405						ustat_rec.tm_last = unode->tm_last;
5406
5407						error = copyout(&ustat_rec, oldp + pos, sizeof(struct nfs_user_stat_user_rec));
5408
5409						if (error) {
5410							/* punt */
5411							lck_mtx_unlock(&ulist->user_mutex);
5412							goto ustat_done;
5413						}
5414
5415						pos += sizeof(struct nfs_user_stat_user_rec);
5416						bytes_avail -= sizeof(struct nfs_user_stat_user_rec);
5417						recs_copied++;
5418					}
5419					else {
5420						/* Caller's buffer is exhausted */
5421						bytes_avail = 0;
5422					}
5423					bytes_total += sizeof(struct nfs_user_stat_user_rec);
5424				}
5425				/* can unlock this export's list now */
5426				lck_mtx_unlock(&ulist->user_mutex);
5427			}
5428		}
5429
5430ustat_done:
5431		/* unlock the export table */
5432		lck_rw_done(&nfsrv_export_rwlock);
5433
5434ustat_skip:
5435		/* indicate number of actual records copied */
5436		ustat_desc.rec_count = recs_copied;
5437
5438		if (!error) {
5439			/* check if there was enough room for the buffer descriptor */
5440			if (*oldlenp >= sizeof(struct nfs_user_stat_desc))
5441				error = copyout(&ustat_desc, oldp, sizeof(struct nfs_user_stat_desc));
5442			else
5443				error = ENOMEM;
5444
5445			/* always indicate required buffer size */
5446			*oldlenp = bytes_total;
5447		}
5448		break;
5449	case NFS_USERCOUNT:
5450		if (!oldp) {
5451			*oldlenp = sizeof(nfsrv_user_stat_node_count);
5452			return (0);
5453		}
5454
5455		if (*oldlenp < sizeof(nfsrv_user_stat_node_count)) {
5456			*oldlenp = sizeof(nfsrv_user_stat_node_count);
5457			return (ENOMEM);
5458		}
5459
5460		if (nfsrv_is_initialized()) {
5461			/* reclaim old expired user nodes */
5462			nfsrv_active_user_list_reclaim();
5463		}
5464
5465		error = copyout(&nfsrv_user_stat_node_count, oldp, sizeof(nfsrv_user_stat_node_count));
5466		break;
5467#endif /* NFSSERVER */
5468	case VFS_CTL_NOLOCKS:
5469 		if (req->oldptr != USER_ADDR_NULL) {
5470			lck_mtx_lock(&nmp->nm_lock);
5471			val = (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) ? 1 : 0;
5472			lck_mtx_unlock(&nmp->nm_lock);
5473 			error = SYSCTL_OUT(req, &val, sizeof(val));
5474 			if (error)
5475 				return (error);
5476 		}
5477 		if (req->newptr != USER_ADDR_NULL) {
5478 			error = SYSCTL_IN(req, &val, sizeof(val));
5479 			if (error)
5480 				return (error);
5481			lck_mtx_lock(&nmp->nm_lock);
5482			if (nmp->nm_lockmode == NFS_LOCK_MODE_LOCAL) {
5483				/* can't toggle locks when using local locks */
5484				error = EINVAL;
5485			} else if ((nmp->nm_vers >= NFS_VER4) && val) {
5486				/* can't disable locks for NFSv4 */
5487				error = EINVAL;
5488			} else if (val) {
5489				if ((nmp->nm_vers <= NFS_VER3) && (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED))
5490					nfs_lockd_mount_unregister(nmp);
5491				nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
5492				nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
5493			} else {
5494				if ((nmp->nm_vers <= NFS_VER3) && (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED))
5495					nfs_lockd_mount_register(nmp);
5496				nmp->nm_lockmode = NFS_LOCK_MODE_ENABLED;
5497			}
5498			lck_mtx_unlock(&nmp->nm_lock);
5499 		}
5500		break;
5501	case VFS_CTL_QUERY:
5502		lck_mtx_lock(&nmp->nm_lock);
5503		/* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
5504		softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
5505		if (!softnobrowse && (nmp->nm_state & NFSSTA_TIMEO))
5506			vq.vq_flags |= VQ_NOTRESP;
5507		if (!softnobrowse && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO) && !NMFLAG(nmp, MUTEJUKEBOX))
5508			vq.vq_flags |= VQ_NOTRESP;
5509		if (!softnobrowse && (nmp->nm_state & NFSSTA_LOCKTIMEO) &&
5510		    (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED))
5511			vq.vq_flags |= VQ_NOTRESP;
5512		if (nmp->nm_state & NFSSTA_DEAD)
5513			vq.vq_flags |= VQ_DEAD;
5514		lck_mtx_unlock(&nmp->nm_lock);
5515		error = SYSCTL_OUT(req, &vq, sizeof(vq));
5516		break;
5517 	case VFS_CTL_TIMEO:
5518 		if (req->oldptr != USER_ADDR_NULL) {
5519			lck_mtx_lock(&nmp->nm_lock);
5520			val = nmp->nm_tprintf_initial_delay;
5521			lck_mtx_unlock(&nmp->nm_lock);
5522 			error = SYSCTL_OUT(req, &val, sizeof(val));
5523 			if (error)
5524 				return (error);
5525 		}
5526 		if (req->newptr != USER_ADDR_NULL) {
5527 			error = SYSCTL_IN(req, &val, sizeof(val));
5528 			if (error)
5529 				return (error);
5530			lck_mtx_lock(&nmp->nm_lock);
5531 			if (val < 0)
5532 				nmp->nm_tprintf_initial_delay = 0;
5533			else
5534				nmp->nm_tprintf_initial_delay = val;
5535			lck_mtx_unlock(&nmp->nm_lock);
5536 		}
5537		break;
5538	default:
5539		return (ENOTSUP);
5540	}
5541	return (error);
5542}
5543