1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1993, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD$");
39
40#include "opt_bootp.h"
41#include "opt_nfsroot.h"
42#include "opt_kern_tls.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/bio.h>
48#include <sys/buf.h>
49#include <sys/clock.h>
50#include <sys/jail.h>
51#include <sys/limits.h>
52#include <sys/lock.h>
53#include <sys/malloc.h>
54#include <sys/mbuf.h>
55#include <sys/module.h>
56#include <sys/mount.h>
57#include <sys/proc.h>
58#include <sys/socket.h>
59#include <sys/socketvar.h>
60#include <sys/sockio.h>
61#include <sys/sysctl.h>
62#include <sys/vnode.h>
63#include <sys/signalvar.h>
64
65#include <vm/vm.h>
66#include <vm/vm_extern.h>
67#include <vm/uma.h>
68
69#include <net/if.h>
70#include <net/route.h>
71#include <net/route/route_ctl.h>
72#include <netinet/in.h>
73
74#include <fs/nfs/nfsport.h>
75#include <fs/nfsclient/nfsnode.h>
76#include <fs/nfsclient/nfsmount.h>
77#include <fs/nfsclient/nfs.h>
78#include <nfs/nfsdiskless.h>
79
80#include <rpc/rpcsec_tls.h>
81
82FEATURE(nfscl, "NFSv4 client");
83
84extern int nfscl_ticks;
85extern struct timeval nfsboottime;
86extern int nfsrv_useacl;
87extern int nfscl_debuglevel;
88extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
89extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
90extern struct mtx ncl_iod_mutex;
91NFSCLSTATEMUTEX;
92extern struct mtx nfsrv_dslock_mtx;
93
94MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
95MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
96
97SYSCTL_DECL(_vfs_nfs);
98static int nfs_ip_paranoia = 1;
99SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
100    &nfs_ip_paranoia, 0, "");
101static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
102SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
103        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
104/* how long between console messages "nfs server foo not responding" */
105static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
106SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
107        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
108#ifdef NFS_DEBUG
109int nfs_debug;
110SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
111    "Toggle debug flag");
112#endif
113
114static int	nfs_mountroot(struct mount *);
115static void	nfs_sec_name(char *, int *);
116static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
117		    struct nfs_args *argp, const char *, struct ucred *,
118		    struct thread *);
119static int	mountnfs(struct nfs_args *, struct mount *,
120		    struct sockaddr *, char *, u_char *, int, u_char *, int,
121		    u_char *, int, struct vnode **, struct ucred *,
122		    struct thread *, int, int, int, uint32_t, char *);
123static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
124		    struct sockaddr_storage *, int *, off_t *,
125		    struct timeval *);
126static vfs_mount_t nfs_mount;
127static vfs_cmount_t nfs_cmount;
128static vfs_unmount_t nfs_unmount;
129static vfs_root_t nfs_root;
130static vfs_statfs_t nfs_statfs;
131static vfs_sync_t nfs_sync;
132static vfs_sysctl_t nfs_sysctl;
133static vfs_purge_t nfs_purge;
134
135/*
136 * nfs vfs operations.
137 */
138static struct vfsops nfs_vfsops = {
139	.vfs_init =		ncl_init,
140	.vfs_mount =		nfs_mount,
141	.vfs_cmount =		nfs_cmount,
142	.vfs_root =		vfs_cache_root,
143	.vfs_cachedroot =	nfs_root,
144	.vfs_statfs =		nfs_statfs,
145	.vfs_sync =		nfs_sync,
146	.vfs_uninit =		ncl_uninit,
147	.vfs_unmount =		nfs_unmount,
148	.vfs_sysctl =		nfs_sysctl,
149	.vfs_purge =		nfs_purge,
150};
151VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
152
153/* So that loader and kldload(2) can find us, wherever we are.. */
154MODULE_VERSION(nfs, 1);
155MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
156MODULE_DEPEND(nfs, krpc, 1, 1, 1);
157MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
158MODULE_DEPEND(nfs, xdr, 1, 1, 1);
159
160/*
161 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
162 * can be shared by both NFS clients. It is declared here so that it
163 * will be defined for kernels built without NFS_ROOT, although it
164 * isn't used in that case.
165 */
166#if !defined(NFS_ROOT)
167struct nfs_diskless	nfs_diskless = { { { 0 } } };
168struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
169int			nfs_diskless_valid = 0;
170#endif
171
172SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
173    &nfs_diskless_valid, 0,
174    "Has the diskless struct been filled correctly");
175
176SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
177    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
178
179SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
180    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
181    "%Ssockaddr_in", "Diskless root nfs address");
182
183void		newnfsargs_ntoh(struct nfs_args *);
184static int	nfs_mountdiskless(char *,
185		    struct sockaddr_in *, struct nfs_args *,
186		    struct thread *, struct vnode **, struct mount *);
187static void	nfs_convert_diskless(void);
188static void	nfs_convert_oargs(struct nfs_args *args,
189		    struct onfs_args *oargs);
190
191int
192newnfs_iosize(struct nfsmount *nmp)
193{
194	int iosize, maxio;
195
196	/* First, set the upper limit for iosize */
197	if (nmp->nm_flag & NFSMNT_NFSV4) {
198		maxio = NFS_MAXBSIZE;
199	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
200		if (nmp->nm_sotype == SOCK_DGRAM)
201			maxio = NFS_MAXDGRAMDATA;
202		else
203			maxio = NFS_MAXBSIZE;
204	} else {
205		maxio = NFS_V2MAXDATA;
206	}
207	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
208		nmp->nm_rsize = maxio;
209	if (nmp->nm_rsize > NFS_MAXBSIZE)
210		nmp->nm_rsize = NFS_MAXBSIZE;
211	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
212		nmp->nm_readdirsize = maxio;
213	if (nmp->nm_readdirsize > nmp->nm_rsize)
214		nmp->nm_readdirsize = nmp->nm_rsize;
215	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
216		nmp->nm_wsize = maxio;
217	if (nmp->nm_wsize > NFS_MAXBSIZE)
218		nmp->nm_wsize = NFS_MAXBSIZE;
219
220	/*
221	 * Calculate the size used for io buffers.  Use the larger
222	 * of the two sizes to minimise nfs requests but make sure
223	 * that it is at least one VM page to avoid wasting buffer
224	 * space.  It must also be at least NFS_DIRBLKSIZ, since
225	 * that is the buffer size used for directories.
226	 */
227	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
228	iosize = imax(iosize, PAGE_SIZE);
229	iosize = imax(iosize, NFS_DIRBLKSIZ);
230	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
231	return (iosize);
232}
233
234static void
235nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
236{
237
238	args->version = NFS_ARGSVERSION;
239	args->addr = oargs->addr;
240	args->addrlen = oargs->addrlen;
241	args->sotype = oargs->sotype;
242	args->proto = oargs->proto;
243	args->fh = oargs->fh;
244	args->fhsize = oargs->fhsize;
245	args->flags = oargs->flags;
246	args->wsize = oargs->wsize;
247	args->rsize = oargs->rsize;
248	args->readdirsize = oargs->readdirsize;
249	args->timeo = oargs->timeo;
250	args->retrans = oargs->retrans;
251	args->readahead = oargs->readahead;
252	args->hostname = oargs->hostname;
253}
254
255static void
256nfs_convert_diskless(void)
257{
258
259	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
260		sizeof(struct ifaliasreq));
261	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
262		sizeof(struct sockaddr_in));
263	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
264	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
265		nfsv3_diskless.root_fhsize = NFSX_MYFH;
266		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
267	} else {
268		nfsv3_diskless.root_fhsize = NFSX_V2FH;
269		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
270	}
271	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
272		sizeof(struct sockaddr_in));
273	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
274	nfsv3_diskless.root_time = nfs_diskless.root_time;
275	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
276		MAXHOSTNAMELEN);
277	nfs_diskless_valid = 3;
278}
279
280/*
281 * nfs statfs call
282 */
283static int
284nfs_statfs(struct mount *mp, struct statfs *sbp)
285{
286	struct vnode *vp;
287	struct thread *td;
288	struct nfsmount *nmp = VFSTONFS(mp);
289	struct nfsvattr nfsva;
290	struct nfsfsinfo fs;
291	struct nfsstatfs sb;
292	int error = 0, attrflag, gotfsinfo = 0, ret;
293	struct nfsnode *np;
294
295	td = curthread;
296
297	error = vfs_busy(mp, MBF_NOWAIT);
298	if (error)
299		return (error);
300	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
301	if (error) {
302		vfs_unbusy(mp);
303		return (error);
304	}
305	vp = NFSTOV(np);
306	mtx_lock(&nmp->nm_mtx);
307	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
308		mtx_unlock(&nmp->nm_mtx);
309		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
310		    &attrflag, NULL);
311		if (!error)
312			gotfsinfo = 1;
313	} else
314		mtx_unlock(&nmp->nm_mtx);
315	if (!error)
316		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
317		    &attrflag, NULL);
318	if (error != 0)
319		NFSCL_DEBUG(2, "statfs=%d\n", error);
320	if (attrflag == 0) {
321		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
322		    td->td_ucred, td, &nfsva, NULL, NULL);
323		if (ret) {
324			/*
325			 * Just set default values to get things going.
326			 */
327			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
328			nfsva.na_vattr.va_type = VDIR;
329			nfsva.na_vattr.va_mode = 0777;
330			nfsva.na_vattr.va_nlink = 100;
331			nfsva.na_vattr.va_uid = (uid_t)0;
332			nfsva.na_vattr.va_gid = (gid_t)0;
333			nfsva.na_vattr.va_fileid = 2;
334			nfsva.na_vattr.va_gen = 1;
335			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
336			nfsva.na_vattr.va_size = 512 * 1024;
337		}
338	}
339	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
340	if (!error) {
341	    mtx_lock(&nmp->nm_mtx);
342	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
343		nfscl_loadfsinfo(nmp, &fs);
344	    nfscl_loadsbinfo(nmp, &sb, sbp);
345	    sbp->f_iosize = newnfs_iosize(nmp);
346	    mtx_unlock(&nmp->nm_mtx);
347	    if (sbp != &mp->mnt_stat) {
348		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
349		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
350	    }
351	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
352	} else if (NFS_ISV4(vp)) {
353		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
354	}
355	vput(vp);
356	vfs_unbusy(mp);
357	return (error);
358}
359
360/*
361 * nfs version 3 fsinfo rpc call
362 */
363int
364ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
365    struct thread *td)
366{
367	struct nfsfsinfo fs;
368	struct nfsvattr nfsva;
369	int error, attrflag;
370
371	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
372	if (!error) {
373		if (attrflag)
374			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
375			    1);
376		mtx_lock(&nmp->nm_mtx);
377		nfscl_loadfsinfo(nmp, &fs);
378		mtx_unlock(&nmp->nm_mtx);
379	}
380	return (error);
381}
382
383/*
384 * Mount a remote root fs via. nfs. This depends on the info in the
385 * nfs_diskless structure that has been filled in properly by some primary
386 * bootstrap.
387 * It goes something like this:
388 * - do enough of "ifconfig" by calling ifioctl() so that the system
389 *   can talk to the server
390 * - If nfs_diskless.mygateway is filled in, use that address as
391 *   a default gateway.
392 * - build the rootfs mount point and call mountnfs() to do the rest.
393 *
394 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
395 * structure, as well as other global NFS client variables here, as
396 * nfs_mountroot() will be called once in the boot before any other NFS
397 * client activity occurs.
398 */
399static int
400nfs_mountroot(struct mount *mp)
401{
402	struct thread *td = curthread;
403	struct nfsv3_diskless *nd = &nfsv3_diskless;
404	struct socket *so;
405	struct vnode *vp;
406	struct ifreq ir;
407	int error;
408	u_long l;
409	char buf[128];
410	char *cp;
411
412#if defined(BOOTP_NFSROOT) && defined(BOOTP)
413	bootpc_init();		/* use bootp to get nfs_diskless filled in */
414#elif defined(NFS_ROOT)
415	nfs_setup_diskless();
416#endif
417
418	if (nfs_diskless_valid == 0)
419		return (-1);
420	if (nfs_diskless_valid == 1)
421		nfs_convert_diskless();
422
423	/*
424	 * Do enough of ifconfig(8) so that the critical net interface can
425	 * talk to the server.
426	 */
427	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
428	    td->td_ucred, td);
429	if (error)
430		panic("nfs_mountroot: socreate(%04x): %d",
431			nd->myif.ifra_addr.sa_family, error);
432
433#if 0 /* XXX Bad idea */
434	/*
435	 * We might not have been told the right interface, so we pass
436	 * over the first ten interfaces of the same kind, until we get
437	 * one of them configured.
438	 */
439
440	for (i = strlen(nd->myif.ifra_name) - 1;
441		nd->myif.ifra_name[i] >= '0' &&
442		nd->myif.ifra_name[i] <= '9';
443		nd->myif.ifra_name[i] ++) {
444		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
445		if(!error)
446			break;
447	}
448#endif
449	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
450	if (error)
451		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
452	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
453		ir.ifr_mtu = strtol(cp, NULL, 10);
454		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
455		freeenv(cp);
456		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
457		if (error)
458			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
459	}
460	soclose(so);
461
462	/*
463	 * If the gateway field is filled in, set it as the default route.
464	 * Note that pxeboot will set a default route of 0 if the route
465	 * is not set by the DHCP server.  Check also for a value of 0
466	 * to avoid panicking inappropriately in that situation.
467	 */
468	if (nd->mygateway.sin_len != 0 &&
469	    nd->mygateway.sin_addr.s_addr != 0) {
470		struct sockaddr_in mask, sin;
471		struct epoch_tracker et;
472		struct rt_addrinfo info;
473		struct rib_cmd_info rc;
474
475		bzero((caddr_t)&mask, sizeof(mask));
476		sin = mask;
477		sin.sin_family = AF_INET;
478		sin.sin_len = sizeof(sin);
479                /* XXX MRT use table 0 for this sort of thing */
480		NET_EPOCH_ENTER(et);
481		CURVNET_SET(TD_TO_VNET(td));
482
483		bzero((caddr_t)&info, sizeof(info));
484		info.rti_flags = RTF_UP | RTF_GATEWAY;
485		info.rti_info[RTAX_DST] = (struct sockaddr *)&sin;
486		info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&nd->mygateway;
487		info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&mask;
488
489		error = rib_action(RT_DEFAULT_FIB, RTM_ADD, &info, &rc);
490		CURVNET_RESTORE();
491		NET_EPOCH_EXIT(et);
492		if (error)
493			panic("nfs_mountroot: RTM_ADD: %d", error);
494	}
495
496	/*
497	 * Create the rootfs mount point.
498	 */
499	nd->root_args.fh = nd->root_fh;
500	nd->root_args.fhsize = nd->root_fhsize;
501	l = ntohl(nd->root_saddr.sin_addr.s_addr);
502	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
503		(l >> 24) & 0xff, (l >> 16) & 0xff,
504		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
505	printf("NFS ROOT: %s\n", buf);
506	nd->root_args.hostname = buf;
507	if ((error = nfs_mountdiskless(buf,
508	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
509		return (error);
510	}
511
512	/*
513	 * This is not really an nfs issue, but it is much easier to
514	 * set hostname here and then let the "/etc/rc.xxx" files
515	 * mount the right /var based upon its preset value.
516	 */
517	mtx_lock(&prison0.pr_mtx);
518	strlcpy(prison0.pr_hostname, nd->my_hostnam,
519	    sizeof(prison0.pr_hostname));
520	mtx_unlock(&prison0.pr_mtx);
521	inittodr(ntohl(nd->root_time));
522	return (0);
523}
524
525/*
526 * Internal version of mount system call for diskless setup.
527 */
528static int
529nfs_mountdiskless(char *path,
530    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
531    struct vnode **vpp, struct mount *mp)
532{
533	struct sockaddr *nam;
534	int dirlen, error;
535	char *dirpath;
536
537	/*
538	 * Find the directory path in "path", which also has the server's
539	 * name/ip address in it.
540	 */
541	dirpath = strchr(path, ':');
542	if (dirpath != NULL)
543		dirlen = strlen(++dirpath);
544	else
545		dirlen = 0;
546	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
547	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
548	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
549	    NFS_DEFAULT_NEGNAMETIMEO, 0, 0, NULL)) != 0) {
550		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
551		return (error);
552	}
553	return (0);
554}
555
556static void
557nfs_sec_name(char *sec, int *flagsp)
558{
559	if (!strcmp(sec, "krb5"))
560		*flagsp |= NFSMNT_KERB;
561	else if (!strcmp(sec, "krb5i"))
562		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
563	else if (!strcmp(sec, "krb5p"))
564		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
565}
566
567static void
568nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
569    const char *hostname, struct ucred *cred, struct thread *td)
570{
571	int adjsock;
572	char *p;
573
574	/*
575	 * Set read-only flag if requested; otherwise, clear it if this is
576	 * an update.  If this is not an update, then either the read-only
577	 * flag is already clear, or this is a root mount and it was set
578	 * intentionally at some previous point.
579	 */
580	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
581		MNT_ILOCK(mp);
582		mp->mnt_flag |= MNT_RDONLY;
583		MNT_IUNLOCK(mp);
584	} else if (mp->mnt_flag & MNT_UPDATE) {
585		MNT_ILOCK(mp);
586		mp->mnt_flag &= ~MNT_RDONLY;
587		MNT_IUNLOCK(mp);
588	}
589
590	/*
591	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
592	 * no sense in that context.  Also, set up appropriate retransmit
593	 * and soft timeout behavior.
594	 */
595	if (argp->sotype == SOCK_STREAM) {
596		nmp->nm_flag &= ~NFSMNT_NOCONN;
597		nmp->nm_timeo = NFS_MAXTIMEO;
598		if ((argp->flags & NFSMNT_NFSV4) != 0)
599			nmp->nm_retry = INT_MAX;
600		else
601			nmp->nm_retry = NFS_RETRANS_TCP;
602	}
603
604	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
605	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
606		argp->flags &= ~NFSMNT_RDIRPLUS;
607		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
608	}
609
610	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
611	if (nmp->nm_minorvers == 0) {
612		argp->flags &= ~NFSMNT_ONEOPENOWN;
613		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
614	}
615
616	/* Re-bind if rsrvd port requested and wasn't on one */
617	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
618		  && (argp->flags & NFSMNT_RESVPORT);
619	/* Also re-bind if we're switching to/from a connected UDP socket */
620	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
621		    (argp->flags & NFSMNT_NOCONN));
622
623	/* Update flags atomically.  Don't change the lock bits. */
624	nmp->nm_flag = argp->flags | nmp->nm_flag;
625
626	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
627		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
628		if (nmp->nm_timeo < NFS_MINTIMEO)
629			nmp->nm_timeo = NFS_MINTIMEO;
630		else if (nmp->nm_timeo > NFS_MAXTIMEO)
631			nmp->nm_timeo = NFS_MAXTIMEO;
632	}
633
634	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
635		nmp->nm_retry = argp->retrans;
636		if (nmp->nm_retry > NFS_MAXREXMIT)
637			nmp->nm_retry = NFS_MAXREXMIT;
638	}
639
640	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
641		nmp->nm_wsize = argp->wsize;
642		/*
643		 * Clip at the power of 2 below the size. There is an
644		 * issue (not isolated) that causes intermittent page
645		 * faults if this is not done.
646		 */
647		if (nmp->nm_wsize > NFS_FABLKSIZE)
648			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
649		else
650			nmp->nm_wsize = NFS_FABLKSIZE;
651	}
652
653	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
654		nmp->nm_rsize = argp->rsize;
655		/*
656		 * Clip at the power of 2 below the size. There is an
657		 * issue (not isolated) that causes intermittent page
658		 * faults if this is not done.
659		 */
660		if (nmp->nm_rsize > NFS_FABLKSIZE)
661			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
662		else
663			nmp->nm_rsize = NFS_FABLKSIZE;
664	}
665
666	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
667		nmp->nm_readdirsize = argp->readdirsize;
668	}
669
670	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
671		nmp->nm_acregmin = argp->acregmin;
672	else
673		nmp->nm_acregmin = NFS_MINATTRTIMO;
674	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
675		nmp->nm_acregmax = argp->acregmax;
676	else
677		nmp->nm_acregmax = NFS_MAXATTRTIMO;
678	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
679		nmp->nm_acdirmin = argp->acdirmin;
680	else
681		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
682	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
683		nmp->nm_acdirmax = argp->acdirmax;
684	else
685		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
686	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
687		nmp->nm_acdirmin = nmp->nm_acdirmax;
688	if (nmp->nm_acregmin > nmp->nm_acregmax)
689		nmp->nm_acregmin = nmp->nm_acregmax;
690
691	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
692		if (argp->readahead <= NFS_MAXRAHEAD)
693			nmp->nm_readahead = argp->readahead;
694		else
695			nmp->nm_readahead = NFS_MAXRAHEAD;
696	}
697	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
698		if (argp->wcommitsize < nmp->nm_wsize)
699			nmp->nm_wcommitsize = nmp->nm_wsize;
700		else
701			nmp->nm_wcommitsize = argp->wcommitsize;
702	}
703
704	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
705		    (nmp->nm_soproto != argp->proto));
706
707	if (nmp->nm_client != NULL && adjsock) {
708		int haslock = 0, error = 0;
709
710		if (nmp->nm_sotype == SOCK_STREAM) {
711			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
712			if (!error)
713				haslock = 1;
714		}
715		if (!error) {
716		    newnfs_disconnect(&nmp->nm_sockreq);
717		    if (haslock)
718			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
719		    nmp->nm_sotype = argp->sotype;
720		    nmp->nm_soproto = argp->proto;
721		    if (nmp->nm_sotype == SOCK_DGRAM)
722			while (newnfs_connect(nmp, &nmp->nm_sockreq,
723			    cred, td, 0, false)) {
724				printf("newnfs_args: retrying connect\n");
725				(void) nfs_catnap(PSOCK, 0, "nfscon");
726			}
727		}
728	} else {
729		nmp->nm_sotype = argp->sotype;
730		nmp->nm_soproto = argp->proto;
731	}
732
733	if (hostname != NULL) {
734		strlcpy(nmp->nm_hostname, hostname,
735		    sizeof(nmp->nm_hostname));
736		p = strchr(nmp->nm_hostname, ':');
737		if (p != NULL)
738			*p = '\0';
739	}
740}
741
742static const char *nfs_opts[] = { "from", "nfs_args",
743    "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
744    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
745    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
746    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
747    "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
748    "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
749    "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
750    "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
751    "pnfs", "wcommitsize", "oneopenown", "tls", "tlscertname",
752    NULL };
753
754/*
755 * Parse the "from" mountarg, passed by the generic mount(8) program
756 * or the mountroot code.  This is used when rerooting into NFS.
757 *
758 * Note that the "hostname" is actually a "hostname:/share/path" string.
759 */
760static int
761nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
762    struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
763{
764	char *nam, *delimp, *hostp, *spec;
765	int error, have_bracket = 0, offset, rv, speclen;
766	struct sockaddr_in *sin;
767	size_t len;
768
769	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
770	if (error != 0)
771		return (error);
772	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
773
774	/*
775	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
776	 */
777	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
778	    *(delimp + 1) == ':') {
779		hostp = spec + 1;
780		spec = delimp + 2;
781		have_bracket = 1;
782	} else if ((delimp = strrchr(spec, ':')) != NULL) {
783		hostp = spec;
784		spec = delimp + 1;
785	} else if ((delimp = strrchr(spec, '@')) != NULL) {
786		printf("%s: path@server syntax is deprecated, "
787		    "use server:path\n", __func__);
788		hostp = delimp + 1;
789	} else {
790		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
791		free(nam, M_TEMP);
792		return (EINVAL);
793	}
794	*delimp = '\0';
795
796	/*
797	 * If there has been a trailing slash at mounttime it seems
798	 * that some mountd implementations fail to remove the mount
799	 * entries from their mountlist while unmounting.
800	 */
801	for (speclen = strlen(spec);
802	    speclen > 1 && spec[speclen - 1] == '/';
803	    speclen--)
804		spec[speclen - 1] = '\0';
805	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
806		printf("%s: %s:%s: name too long", __func__, hostp, spec);
807		free(nam, M_TEMP);
808		return (EINVAL);
809	}
810	/* Make both '@' and ':' notations equal */
811	if (*hostp != '\0') {
812		len = strlen(hostp);
813		offset = 0;
814		if (have_bracket)
815			nam[offset++] = '[';
816		memmove(nam + offset, hostp, len);
817		if (have_bracket)
818			nam[len + offset++] = ']';
819		nam[len + offset++] = ':';
820		memmove(nam + len + offset, spec, speclen);
821		nam[len + speclen + offset] = '\0';
822	} else
823		nam[0] = '\0';
824
825	/*
826	 * XXX: IPv6
827	 */
828	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
829	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
830	if (rv != 1) {
831		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
832		    __func__, hostp, rv);
833		free(nam, M_TEMP);
834		free(sin, M_SONAME);
835		return (EINVAL);
836	}
837
838	sin->sin_len = sizeof(*sin);
839	sin->sin_family = AF_INET;
840	/*
841	 * XXX: hardcoded port number.
842	 */
843	sin->sin_port = htons(2049);
844
845	*hostnamep = strdup(nam, M_NEWNFSMNT);
846	*sinp = sin;
847	strlcpy(dirpath, spec, dirpathsize);
848	*dirlenp = strlen(dirpath);
849
850	free(nam, M_TEMP);
851	return (0);
852}
853
854/*
855 * VFS Operations.
856 *
857 * mount system call
858 * It seems a bit dumb to copyinstr() the host and path here and then
859 * bcopy() them in mountnfs(), but I wanted to detect errors before
860 * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
861 * an error after that means that I have to release the mbuf.
862 */
863/* ARGSUSED */
864static int
865nfs_mount(struct mount *mp)
866{
867	struct nfs_args args = {
868	    .version = NFS_ARGSVERSION,
869	    .addr = NULL,
870	    .addrlen = sizeof (struct sockaddr_in),
871	    .sotype = SOCK_STREAM,
872	    .proto = 0,
873	    .fh = NULL,
874	    .fhsize = 0,
875	    .flags = NFSMNT_RESVPORT,
876	    .wsize = NFS_WSIZE,
877	    .rsize = NFS_RSIZE,
878	    .readdirsize = NFS_READDIRSIZE,
879	    .timeo = 10,
880	    .retrans = NFS_RETRANS,
881	    .readahead = NFS_DEFRAHEAD,
882	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
883	    .hostname = NULL,
884	    .acregmin = NFS_MINATTRTIMO,
885	    .acregmax = NFS_MAXATTRTIMO,
886	    .acdirmin = NFS_MINDIRATTRTIMO,
887	    .acdirmax = NFS_MAXDIRATTRTIMO,
888	};
889	int error = 0, ret, len;
890	struct sockaddr *nam = NULL;
891	struct vnode *vp;
892	struct thread *td;
893	char *hst;
894	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
895	char *cp, *opt, *name, *secname, *tlscertname;
896	int nametimeo = NFS_DEFAULT_NAMETIMEO;
897	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
898	int minvers = 0;
899	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
900	    krbnamelen, srvkrbnamelen;
901	size_t hstlen;
902	uint32_t newflag;
903
904	has_nfs_args_opt = 0;
905	has_nfs_from_opt = 0;
906	newflag = 0;
907	tlscertname = NULL;
908	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
909	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
910		error = EINVAL;
911		goto out;
912	}
913
914	td = curthread;
915	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
916	    nfs_diskless_valid != 0) {
917		error = nfs_mountroot(mp);
918		goto out;
919	}
920
921	nfscl_init();
922
923	/*
924	 * The old mount_nfs program passed the struct nfs_args
925	 * from userspace to kernel.  The new mount_nfs program
926	 * passes string options via nmount() from userspace to kernel
927	 * and we populate the struct nfs_args in the kernel.
928	 */
929	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
930		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
931		    sizeof(args));
932		if (error != 0)
933			goto out;
934
935		if (args.version != NFS_ARGSVERSION) {
936			error = EPROGMISMATCH;
937			goto out;
938		}
939		has_nfs_args_opt = 1;
940	}
941
942	/* Handle the new style options. */
943	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
944		args.acdirmin = args.acdirmax =
945		    args.acregmin = args.acregmax = 0;
946		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
947		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
948	}
949	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
950		args.flags |= NFSMNT_NOCONN;
951	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
952		args.flags &= ~NFSMNT_NOCONN;
953	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
954		args.flags |= NFSMNT_NOLOCKD;
955	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
956		args.flags &= ~NFSMNT_NOLOCKD;
957	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
958		args.flags |= NFSMNT_INT;
959	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
960		args.flags |= NFSMNT_RDIRPLUS;
961	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
962		args.flags |= NFSMNT_RESVPORT;
963	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
964		args.flags &= ~NFSMNT_RESVPORT;
965	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
966		args.flags |= NFSMNT_SOFT;
967	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
968		args.flags &= ~NFSMNT_SOFT;
969	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
970		args.sotype = SOCK_DGRAM;
971	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
972		args.sotype = SOCK_DGRAM;
973	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
974		args.sotype = SOCK_STREAM;
975	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
976		args.flags |= NFSMNT_NFSV3;
977	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
978		args.flags |= NFSMNT_NFSV4;
979		args.sotype = SOCK_STREAM;
980	}
981	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
982		args.flags |= NFSMNT_ALLGSSNAME;
983	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
984		args.flags |= NFSMNT_NOCTO;
985	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
986		args.flags |= NFSMNT_NONCONTIGWR;
987	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
988		args.flags |= NFSMNT_PNFS;
989	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
990		args.flags |= NFSMNT_ONEOPENOWN;
991	if (vfs_getopt(mp->mnt_optnew, "tls", NULL, NULL) == 0)
992		newflag |= NFSMNT_TLS;
993	if (vfs_getopt(mp->mnt_optnew, "tlscertname", (void **)&opt, &len) ==
994	    0) {
995		/*
996		 * tlscertname with "key.pem" appended to it forms a file
997		 * name.  As such, the maximum allowable strlen(tlscertname) is
998		 * NAME_MAX - 7. However, "len" includes the nul termination
999		 * byte so it can be up to NAME_MAX - 6.
1000		 */
1001		if (opt == NULL || len <= 1 || len > NAME_MAX - 6) {
1002			vfs_mount_error(mp, "invalid tlscertname");
1003			error = EINVAL;
1004			goto out;
1005		}
1006		tlscertname = malloc(len, M_NEWNFSMNT, M_WAITOK);
1007		strlcpy(tlscertname, opt, len);
1008	}
1009	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
1010		if (opt == NULL) {
1011			vfs_mount_error(mp, "illegal readdirsize");
1012			error = EINVAL;
1013			goto out;
1014		}
1015		ret = sscanf(opt, "%d", &args.readdirsize);
1016		if (ret != 1 || args.readdirsize <= 0) {
1017			vfs_mount_error(mp, "illegal readdirsize: %s",
1018			    opt);
1019			error = EINVAL;
1020			goto out;
1021		}
1022		args.flags |= NFSMNT_READDIRSIZE;
1023	}
1024	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
1025		if (opt == NULL) {
1026			vfs_mount_error(mp, "illegal readahead");
1027			error = EINVAL;
1028			goto out;
1029		}
1030		ret = sscanf(opt, "%d", &args.readahead);
1031		if (ret != 1 || args.readahead <= 0) {
1032			vfs_mount_error(mp, "illegal readahead: %s",
1033			    opt);
1034			error = EINVAL;
1035			goto out;
1036		}
1037		args.flags |= NFSMNT_READAHEAD;
1038	}
1039	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1040		if (opt == NULL) {
1041			vfs_mount_error(mp, "illegal wsize");
1042			error = EINVAL;
1043			goto out;
1044		}
1045		ret = sscanf(opt, "%d", &args.wsize);
1046		if (ret != 1 || args.wsize <= 0) {
1047			vfs_mount_error(mp, "illegal wsize: %s",
1048			    opt);
1049			error = EINVAL;
1050			goto out;
1051		}
1052		args.flags |= NFSMNT_WSIZE;
1053	}
1054	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1055		if (opt == NULL) {
1056			vfs_mount_error(mp, "illegal rsize");
1057			error = EINVAL;
1058			goto out;
1059		}
1060		ret = sscanf(opt, "%d", &args.rsize);
1061		if (ret != 1 || args.rsize <= 0) {
1062			vfs_mount_error(mp, "illegal wsize: %s",
1063			    opt);
1064			error = EINVAL;
1065			goto out;
1066		}
1067		args.flags |= NFSMNT_RSIZE;
1068	}
1069	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1070		if (opt == NULL) {
1071			vfs_mount_error(mp, "illegal retrans");
1072			error = EINVAL;
1073			goto out;
1074		}
1075		ret = sscanf(opt, "%d", &args.retrans);
1076		if (ret != 1 || args.retrans <= 0) {
1077			vfs_mount_error(mp, "illegal retrans: %s",
1078			    opt);
1079			error = EINVAL;
1080			goto out;
1081		}
1082		args.flags |= NFSMNT_RETRANS;
1083	}
1084	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1085		ret = sscanf(opt, "%d", &args.acregmin);
1086		if (ret != 1 || args.acregmin < 0) {
1087			vfs_mount_error(mp, "illegal actimeo: %s",
1088			    opt);
1089			error = EINVAL;
1090			goto out;
1091		}
1092		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1093		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1094		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1095	}
1096	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1097		ret = sscanf(opt, "%d", &args.acregmin);
1098		if (ret != 1 || args.acregmin < 0) {
1099			vfs_mount_error(mp, "illegal acregmin: %s",
1100			    opt);
1101			error = EINVAL;
1102			goto out;
1103		}
1104		args.flags |= NFSMNT_ACREGMIN;
1105	}
1106	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1107		ret = sscanf(opt, "%d", &args.acregmax);
1108		if (ret != 1 || args.acregmax < 0) {
1109			vfs_mount_error(mp, "illegal acregmax: %s",
1110			    opt);
1111			error = EINVAL;
1112			goto out;
1113		}
1114		args.flags |= NFSMNT_ACREGMAX;
1115	}
1116	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1117		ret = sscanf(opt, "%d", &args.acdirmin);
1118		if (ret != 1 || args.acdirmin < 0) {
1119			vfs_mount_error(mp, "illegal acdirmin: %s",
1120			    opt);
1121			error = EINVAL;
1122			goto out;
1123		}
1124		args.flags |= NFSMNT_ACDIRMIN;
1125	}
1126	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1127		ret = sscanf(opt, "%d", &args.acdirmax);
1128		if (ret != 1 || args.acdirmax < 0) {
1129			vfs_mount_error(mp, "illegal acdirmax: %s",
1130			    opt);
1131			error = EINVAL;
1132			goto out;
1133		}
1134		args.flags |= NFSMNT_ACDIRMAX;
1135	}
1136	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1137		ret = sscanf(opt, "%d", &args.wcommitsize);
1138		if (ret != 1 || args.wcommitsize < 0) {
1139			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1140			error = EINVAL;
1141			goto out;
1142		}
1143		args.flags |= NFSMNT_WCOMMITSIZE;
1144	}
1145	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1146		ret = sscanf(opt, "%d", &args.timeo);
1147		if (ret != 1 || args.timeo <= 0) {
1148			vfs_mount_error(mp, "illegal timeo: %s",
1149			    opt);
1150			error = EINVAL;
1151			goto out;
1152		}
1153		args.flags |= NFSMNT_TIMEO;
1154	}
1155	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1156		ret = sscanf(opt, "%d", &args.timeo);
1157		if (ret != 1 || args.timeo <= 0) {
1158			vfs_mount_error(mp, "illegal timeout: %s",
1159			    opt);
1160			error = EINVAL;
1161			goto out;
1162		}
1163		args.flags |= NFSMNT_TIMEO;
1164	}
1165	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1166		ret = sscanf(opt, "%d", &nametimeo);
1167		if (ret != 1 || nametimeo < 0) {
1168			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1169			error = EINVAL;
1170			goto out;
1171		}
1172	}
1173	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1174	    == 0) {
1175		ret = sscanf(opt, "%d", &negnametimeo);
1176		if (ret != 1 || negnametimeo < 0) {
1177			vfs_mount_error(mp, "illegal negnametimeo: %s",
1178			    opt);
1179			error = EINVAL;
1180			goto out;
1181		}
1182	}
1183	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1184	    0) {
1185		ret = sscanf(opt, "%d", &minvers);
1186		if (ret != 1 || minvers < 0 || minvers > 2 ||
1187		    (args.flags & NFSMNT_NFSV4) == 0) {
1188			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1189			error = EINVAL;
1190			goto out;
1191		}
1192	}
1193	if (vfs_getopt(mp->mnt_optnew, "sec",
1194		(void **) &secname, NULL) == 0)
1195		nfs_sec_name(secname, &args.flags);
1196
1197	if (mp->mnt_flag & MNT_UPDATE) {
1198		struct nfsmount *nmp = VFSTONFS(mp);
1199
1200		if (nmp == NULL) {
1201			error = EIO;
1202			goto out;
1203		}
1204
1205		/*
1206		 * If a change from TCP->UDP is done and there are thread(s)
1207		 * that have I/O RPC(s) in progress with a transfer size
1208		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1209		 * hung, retrying the RPC(s) forever. Usually these threads
1210		 * will be seen doing an uninterruptible sleep on wait channel
1211		 * "nfsreq".
1212		 */
1213		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1214			tprintf(td->td_proc, LOG_WARNING,
1215	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1216
1217		/*
1218		 * When doing an update, we can't change version,
1219		 * security, switch lockd strategies, change cookie
1220		 * translation or switch oneopenown.
1221		 */
1222		args.flags = (args.flags &
1223		    ~(NFSMNT_NFSV3 |
1224		      NFSMNT_NFSV4 |
1225		      NFSMNT_KERB |
1226		      NFSMNT_INTEGRITY |
1227		      NFSMNT_PRIVACY |
1228		      NFSMNT_ONEOPENOWN |
1229		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1230		    (nmp->nm_flag &
1231			(NFSMNT_NFSV3 |
1232			 NFSMNT_NFSV4 |
1233			 NFSMNT_KERB |
1234			 NFSMNT_INTEGRITY |
1235			 NFSMNT_PRIVACY |
1236			 NFSMNT_ONEOPENOWN |
1237			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1238		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1239		goto out;
1240	}
1241
1242	/*
1243	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1244	 * or no-connection mode for those protocols that support
1245	 * no-connection mode (the flag will be cleared later for protocols
1246	 * that do not support no-connection mode).  This will allow a client
1247	 * to receive replies from a different IP then the request was
1248	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1249	 * not 0.
1250	 */
1251	if (nfs_ip_paranoia == 0)
1252		args.flags |= NFSMNT_NOCONN;
1253
1254	if (has_nfs_args_opt != 0) {
1255		/*
1256		 * In the 'nfs_args' case, the pointers in the args
1257		 * structure are in userland - we copy them in here.
1258		 */
1259		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1260			vfs_mount_error(mp, "Bad file handle");
1261			error = EINVAL;
1262			goto out;
1263		}
1264		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1265		    args.fhsize);
1266		if (error != 0)
1267			goto out;
1268		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1269		if (error != 0)
1270			goto out;
1271		bzero(&hst[hstlen], MNAMELEN - hstlen);
1272		args.hostname = hst;
1273		/* getsockaddr() call must be after above copyin() calls */
1274		error = getsockaddr(&nam, args.addr, args.addrlen);
1275		if (error != 0)
1276			goto out;
1277	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1278	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1279	    sizeof(dirpath), &dirlen) == 0) {
1280		has_nfs_from_opt = 1;
1281		bcopy(args.hostname, hst, MNAMELEN);
1282		hst[MNAMELEN - 1] = '\0';
1283
1284		/*
1285		 * This only works with NFSv4 for now.
1286		 */
1287		args.fhsize = 0;
1288		args.flags |= NFSMNT_NFSV4;
1289		args.sotype = SOCK_STREAM;
1290	} else {
1291		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1292		    &args.fhsize) == 0) {
1293			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1294				vfs_mount_error(mp, "Bad file handle");
1295				error = EINVAL;
1296				goto out;
1297			}
1298			bcopy(args.fh, nfh, args.fhsize);
1299		} else {
1300			args.fhsize = 0;
1301		}
1302		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1303		    (void **)&args.hostname, &len);
1304		if (args.hostname == NULL) {
1305			vfs_mount_error(mp, "Invalid hostname");
1306			error = EINVAL;
1307			goto out;
1308		}
1309		if (len >= MNAMELEN) {
1310			vfs_mount_error(mp, "Hostname too long");
1311			error = EINVAL;
1312			goto out;
1313		}
1314		bcopy(args.hostname, hst, len);
1315		hst[len] = '\0';
1316	}
1317
1318	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1319		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1320	else {
1321		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1322		cp = strchr(srvkrbname, ':');
1323		if (cp != NULL)
1324			*cp = '\0';
1325	}
1326	srvkrbnamelen = strlen(srvkrbname);
1327
1328	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1329		strlcpy(krbname, name, sizeof (krbname));
1330	else
1331		krbname[0] = '\0';
1332	krbnamelen = strlen(krbname);
1333
1334	if (has_nfs_from_opt == 0) {
1335		if (vfs_getopt(mp->mnt_optnew,
1336		    "dirpath", (void **)&name, NULL) == 0)
1337			strlcpy(dirpath, name, sizeof (dirpath));
1338		else
1339			dirpath[0] = '\0';
1340		dirlen = strlen(dirpath);
1341	}
1342
1343	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1344		if (vfs_getopt(mp->mnt_optnew, "addr",
1345		    (void **)&args.addr, &args.addrlen) == 0) {
1346			if (args.addrlen > SOCK_MAXADDRLEN) {
1347				error = ENAMETOOLONG;
1348				goto out;
1349			}
1350			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1351			bcopy(args.addr, nam, args.addrlen);
1352			nam->sa_len = args.addrlen;
1353		} else {
1354			vfs_mount_error(mp, "No server address");
1355			error = EINVAL;
1356			goto out;
1357		}
1358	}
1359
1360	args.fh = nfh;
1361	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1362	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1363	    nametimeo, negnametimeo, minvers, newflag, tlscertname);
1364out:
1365	if (!error) {
1366		MNT_ILOCK(mp);
1367		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1368		    MNTK_USES_BCACHE;
1369		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1370			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1371		MNT_IUNLOCK(mp);
1372	}
1373	free(hst, M_TEMP);
1374	return (error);
1375}
1376
1377/*
1378 * VFS Operations.
1379 *
1380 * mount system call
1381 * It seems a bit dumb to copyinstr() the host and path here and then
1382 * bcopy() them in mountnfs(), but I wanted to detect errors before
1383 * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1384 * an error after that means that I have to release the mbuf.
1385 */
1386/* ARGSUSED */
1387static int
1388nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1389{
1390	int error;
1391	struct nfs_args args;
1392
1393	error = copyin(data, &args, sizeof (struct nfs_args));
1394	if (error)
1395		return error;
1396
1397	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1398
1399	error = kernel_mount(ma, flags);
1400	return (error);
1401}
1402
1403/*
1404 * Common code for mount and mountroot
1405 */
1406static int
1407mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1408    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1409    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1410    struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1411    int minvers, uint32_t newflag, char *tlscertname)
1412{
1413	struct nfsmount *nmp;
1414	struct nfsnode *np;
1415	int error, trycnt, ret;
1416	struct nfsvattr nfsva;
1417	struct nfsclclient *clp;
1418	struct nfsclds *dsp, *tdsp;
1419	uint32_t lease;
1420	static u_int64_t clval = 0;
1421#ifdef KERN_TLS
1422	u_int maxlen;
1423#endif
1424
1425	NFSCL_DEBUG(3, "in mnt\n");
1426	clp = NULL;
1427	if (mp->mnt_flag & MNT_UPDATE) {
1428		nmp = VFSTONFS(mp);
1429		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1430		free(nam, M_SONAME);
1431		free(tlscertname, M_NEWNFSMNT);
1432		return (0);
1433	} else {
1434		/* NFS-over-TLS requires that rpctls be functioning. */
1435		if ((newflag & NFSMNT_TLS) != 0) {
1436			error = EINVAL;
1437#ifdef KERN_TLS
1438			/* KERN_TLS is only supported for TCP. */
1439			if (argp->sotype == SOCK_STREAM &&
1440			    rpctls_getinfo(&maxlen, true, false))
1441				error = 0;
1442#endif
1443			if (error != 0) {
1444				free(nam, M_SONAME);
1445				free(tlscertname, M_NEWNFSMNT);
1446				return (error);
1447			}
1448		}
1449		nmp = malloc(sizeof (struct nfsmount) +
1450		    krbnamelen + dirlen + srvkrbnamelen + 2,
1451		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1452		nmp->nm_tlscertname = tlscertname;
1453		nmp->nm_newflag = newflag;
1454		TAILQ_INIT(&nmp->nm_bufq);
1455		TAILQ_INIT(&nmp->nm_sess);
1456		if (clval == 0)
1457			clval = (u_int64_t)nfsboottime.tv_sec;
1458		nmp->nm_clval = clval++;
1459		nmp->nm_krbnamelen = krbnamelen;
1460		nmp->nm_dirpathlen = dirlen;
1461		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1462		if (td->td_ucred->cr_uid != (uid_t)0) {
1463			/*
1464			 * nm_uid is used to get KerberosV credentials for
1465			 * the nfsv4 state handling operations if there is
1466			 * no host based principal set. Use the uid of
1467			 * this user if not root, since they are doing the
1468			 * mount. I don't think setting this for root will
1469			 * work, since root normally does not have user
1470			 * credentials in a credentials cache.
1471			 */
1472			nmp->nm_uid = td->td_ucred->cr_uid;
1473		} else {
1474			/*
1475			 * Just set to -1, so it won't be used.
1476			 */
1477			nmp->nm_uid = (uid_t)-1;
1478		}
1479
1480		/* Copy and null terminate all the names */
1481		if (nmp->nm_krbnamelen > 0) {
1482			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1483			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1484		}
1485		if (nmp->nm_dirpathlen > 0) {
1486			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1487			    nmp->nm_dirpathlen);
1488			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1489			    + 1] = '\0';
1490		}
1491		if (nmp->nm_srvkrbnamelen > 0) {
1492			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1493			    nmp->nm_srvkrbnamelen);
1494			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1495			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1496		}
1497		nmp->nm_sockreq.nr_cred = crhold(cred);
1498		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1499		mp->mnt_data = nmp;
1500		nmp->nm_getinfo = nfs_getnlminfo;
1501		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1502	}
1503	vfs_getnewfsid(mp);
1504	nmp->nm_mountp = mp;
1505	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1506
1507	/*
1508	 * Since nfs_decode_args() might optionally set them, these
1509	 * need to be set to defaults before the call, so that the
1510	 * optional settings aren't overwritten.
1511	 */
1512	nmp->nm_nametimeo = nametimeo;
1513	nmp->nm_negnametimeo = negnametimeo;
1514	nmp->nm_timeo = NFS_TIMEO;
1515	nmp->nm_retry = NFS_RETRANS;
1516	nmp->nm_readahead = NFS_DEFRAHEAD;
1517
1518	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1519	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1520	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1521		nmp->nm_wcommitsize *= 2;
1522	nmp->nm_wcommitsize *= 256;
1523
1524	if ((argp->flags & NFSMNT_NFSV4) != 0)
1525		nmp->nm_minorvers = minvers;
1526	else
1527		nmp->nm_minorvers = 0;
1528
1529	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1530
1531	/*
1532	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1533	 * high, depending on whether we end up with negative offsets in
1534	 * the client or server somewhere.  2GB-1 may be safer.
1535	 *
1536	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1537	 * that we can handle until we find out otherwise.
1538	 */
1539	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1540		nmp->nm_maxfilesize = 0xffffffffLL;
1541	else
1542		nmp->nm_maxfilesize = OFF_MAX;
1543
1544	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1545		nmp->nm_wsize = NFS_WSIZE;
1546		nmp->nm_rsize = NFS_RSIZE;
1547		nmp->nm_readdirsize = NFS_READDIRSIZE;
1548	}
1549	nmp->nm_numgrps = NFS_MAXGRPS;
1550	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1551	if (nmp->nm_tprintf_delay < 0)
1552		nmp->nm_tprintf_delay = 0;
1553	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1554	if (nmp->nm_tprintf_initial_delay < 0)
1555		nmp->nm_tprintf_initial_delay = 0;
1556	nmp->nm_fhsize = argp->fhsize;
1557	if (nmp->nm_fhsize > 0)
1558		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1559	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1560	nmp->nm_nam = nam;
1561	/* Set up the sockets and per-host congestion */
1562	nmp->nm_sotype = argp->sotype;
1563	nmp->nm_soproto = argp->proto;
1564	nmp->nm_sockreq.nr_prog = NFS_PROG;
1565	if ((argp->flags & NFSMNT_NFSV4))
1566		nmp->nm_sockreq.nr_vers = NFS_VER4;
1567	else if ((argp->flags & NFSMNT_NFSV3))
1568		nmp->nm_sockreq.nr_vers = NFS_VER3;
1569	else
1570		nmp->nm_sockreq.nr_vers = NFS_VER2;
1571
1572	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0, false)))
1573		goto bad;
1574	/* For NFSv4.1, get the clientid now. */
1575	if (nmp->nm_minorvers > 0) {
1576		NFSCL_DEBUG(3, "at getcl\n");
1577		error = nfscl_getcl(mp, cred, td, 0, &clp);
1578		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1579		if (error != 0)
1580			goto bad;
1581	}
1582
1583	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1584	    nmp->nm_dirpathlen > 0) {
1585		NFSCL_DEBUG(3, "in dirp\n");
1586		/*
1587		 * If the fhsize on the mount point == 0 for V4, the mount
1588		 * path needs to be looked up.
1589		 */
1590		trycnt = 3;
1591		do {
1592			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1593			    cred, td);
1594			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1595			if (error)
1596				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1597		} while (error && --trycnt > 0);
1598		if (error)
1599			goto bad;
1600	}
1601
1602	/*
1603	 * A reference count is needed on the nfsnode representing the
1604	 * remote root.  If this object is not persistent, then backward
1605	 * traversals of the mount point (i.e. "..") will not work if
1606	 * the nfsnode gets flushed out of the cache. Ufs does not have
1607	 * this problem, because one can identify root inodes by their
1608	 * number == UFS_ROOTINO (2).
1609	 */
1610	if (nmp->nm_fhsize > 0) {
1611		/*
1612		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1613		 * non-zero for the root vnode. f_iosize will be set correctly
1614		 * by nfs_statfs() before any I/O occurs.
1615		 */
1616		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1617		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1618		    LK_EXCLUSIVE);
1619		if (error)
1620			goto bad;
1621		*vpp = NFSTOV(np);
1622
1623		/*
1624		 * Get file attributes and transfer parameters for the
1625		 * mountpoint.  This has the side effect of filling in
1626		 * (*vpp)->v_type with the correct value.
1627		 */
1628		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1629		    cred, td, &nfsva, NULL, &lease);
1630		if (ret) {
1631			/*
1632			 * Just set default values to get things going.
1633			 */
1634			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1635			nfsva.na_vattr.va_type = VDIR;
1636			nfsva.na_vattr.va_mode = 0777;
1637			nfsva.na_vattr.va_nlink = 100;
1638			nfsva.na_vattr.va_uid = (uid_t)0;
1639			nfsva.na_vattr.va_gid = (gid_t)0;
1640			nfsva.na_vattr.va_fileid = 2;
1641			nfsva.na_vattr.va_gen = 1;
1642			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1643			nfsva.na_vattr.va_size = 512 * 1024;
1644			lease = 60;
1645		}
1646		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1647		if (nmp->nm_minorvers > 0) {
1648			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1649			NFSLOCKCLSTATE();
1650			clp->nfsc_renew = NFSCL_RENEW(lease);
1651			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1652			clp->nfsc_clientidrev++;
1653			if (clp->nfsc_clientidrev == 0)
1654				clp->nfsc_clientidrev++;
1655			NFSUNLOCKCLSTATE();
1656			/*
1657			 * Mount will succeed, so the renew thread can be
1658			 * started now.
1659			 */
1660			nfscl_start_renewthread(clp);
1661			nfscl_clientrelease(clp);
1662		}
1663		if (argp->flags & NFSMNT_NFSV3)
1664			ncl_fsinfo(nmp, *vpp, cred, td);
1665
1666		/* Mark if the mount point supports NFSv4 ACLs. */
1667		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1668		    ret == 0 &&
1669		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1670			MNT_ILOCK(mp);
1671			mp->mnt_flag |= MNT_NFS4ACLS;
1672			MNT_IUNLOCK(mp);
1673		}
1674
1675		/*
1676		 * Lose the lock but keep the ref.
1677		 */
1678		NFSVOPUNLOCK(*vpp);
1679		vfs_cache_root_set(mp, *vpp);
1680		return (0);
1681	}
1682	error = EIO;
1683
1684bad:
1685	if (clp != NULL)
1686		nfscl_clientrelease(clp);
1687	newnfs_disconnect(&nmp->nm_sockreq);
1688	crfree(nmp->nm_sockreq.nr_cred);
1689	if (nmp->nm_sockreq.nr_auth != NULL)
1690		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1691	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1692	mtx_destroy(&nmp->nm_mtx);
1693	if (nmp->nm_clp != NULL) {
1694		NFSLOCKCLSTATE();
1695		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1696		NFSUNLOCKCLSTATE();
1697		free(nmp->nm_clp, M_NFSCLCLIENT);
1698	}
1699	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1700		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1701		    dsp->nfsclds_sockp != NULL)
1702			newnfs_disconnect(dsp->nfsclds_sockp);
1703		nfscl_freenfsclds(dsp);
1704	}
1705	free(nmp->nm_tlscertname, M_NEWNFSMNT);
1706	free(nmp, M_NEWNFSMNT);
1707	free(nam, M_SONAME);
1708	return (error);
1709}
1710
1711/*
1712 * unmount system call
1713 */
1714static int
1715nfs_unmount(struct mount *mp, int mntflags)
1716{
1717	struct thread *td;
1718	struct nfsmount *nmp;
1719	int error, flags = 0, i, trycnt = 0;
1720	struct nfsclds *dsp, *tdsp;
1721
1722	td = curthread;
1723
1724	if (mntflags & MNT_FORCE)
1725		flags |= FORCECLOSE;
1726	nmp = VFSTONFS(mp);
1727	error = 0;
1728	/*
1729	 * Goes something like this..
1730	 * - Call vflush() to clear out vnodes for this filesystem
1731	 * - Close the socket
1732	 * - Free up the data structures
1733	 */
1734	/* In the forced case, cancel any outstanding requests. */
1735	if (mntflags & MNT_FORCE) {
1736		NFSDDSLOCK();
1737		if (nfsv4_findmirror(nmp) != NULL)
1738			error = ENXIO;
1739		NFSDDSUNLOCK();
1740		if (error)
1741			goto out;
1742		error = newnfs_nmcancelreqs(nmp);
1743		if (error)
1744			goto out;
1745		/* For a forced close, get rid of the renew thread now */
1746		nfscl_umount(nmp, td);
1747	}
1748	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1749	do {
1750		error = vflush(mp, 1, flags, td);
1751		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1752			(void) nfs_catnap(PSOCK, error, "newndm");
1753	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1754	if (error)
1755		goto out;
1756
1757	/*
1758	 * We are now committed to the unmount.
1759	 */
1760	if ((mntflags & MNT_FORCE) == 0)
1761		nfscl_umount(nmp, td);
1762	else {
1763		mtx_lock(&nmp->nm_mtx);
1764		nmp->nm_privflag |= NFSMNTP_FORCEDISM;
1765		mtx_unlock(&nmp->nm_mtx);
1766	}
1767	/* Make sure no nfsiods are assigned to this mount. */
1768	NFSLOCKIOD();
1769	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1770		if (ncl_iodmount[i] == nmp) {
1771			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1772			ncl_iodmount[i] = NULL;
1773		}
1774	NFSUNLOCKIOD();
1775
1776	/*
1777	 * We can now set mnt_data to NULL and wait for
1778	 * nfssvc(NFSSVC_FORCEDISM) to complete.
1779	 */
1780	mtx_lock(&mountlist_mtx);
1781	mtx_lock(&nmp->nm_mtx);
1782	mp->mnt_data = NULL;
1783	mtx_unlock(&mountlist_mtx);
1784	while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0)
1785		msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
1786	mtx_unlock(&nmp->nm_mtx);
1787
1788	newnfs_disconnect(&nmp->nm_sockreq);
1789	crfree(nmp->nm_sockreq.nr_cred);
1790	free(nmp->nm_nam, M_SONAME);
1791	if (nmp->nm_sockreq.nr_auth != NULL)
1792		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1793	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1794	mtx_destroy(&nmp->nm_mtx);
1795	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1796		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1797		    dsp->nfsclds_sockp != NULL)
1798			newnfs_disconnect(dsp->nfsclds_sockp);
1799		nfscl_freenfsclds(dsp);
1800	}
1801	free(nmp->nm_tlscertname, M_NEWNFSMNT);
1802	free(nmp, M_NEWNFSMNT);
1803out:
1804	return (error);
1805}
1806
1807/*
1808 * Return root of a filesystem
1809 */
1810static int
1811nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1812{
1813	struct vnode *vp;
1814	struct nfsmount *nmp;
1815	struct nfsnode *np;
1816	int error;
1817
1818	nmp = VFSTONFS(mp);
1819	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1820	if (error)
1821		return error;
1822	vp = NFSTOV(np);
1823	/*
1824	 * Get transfer parameters and attributes for root vnode once.
1825	 */
1826	mtx_lock(&nmp->nm_mtx);
1827	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1828		mtx_unlock(&nmp->nm_mtx);
1829		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1830	} else
1831		mtx_unlock(&nmp->nm_mtx);
1832	if (vp->v_type == VNON)
1833	    vp->v_type = VDIR;
1834	vp->v_vflag |= VV_ROOT;
1835	*vpp = vp;
1836	return (0);
1837}
1838
1839/*
1840 * Flush out the buffer cache
1841 */
1842/* ARGSUSED */
1843static int
1844nfs_sync(struct mount *mp, int waitfor)
1845{
1846	struct vnode *vp, *mvp;
1847	struct thread *td;
1848	int error, allerror = 0;
1849
1850	td = curthread;
1851
1852	MNT_ILOCK(mp);
1853	/*
1854	 * If a forced dismount is in progress, return from here so that
1855	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1856	 * calling VFS_UNMOUNT().
1857	 */
1858	if (NFSCL_FORCEDISM(mp)) {
1859		MNT_IUNLOCK(mp);
1860		return (EBADF);
1861	}
1862	MNT_IUNLOCK(mp);
1863
1864	/*
1865	 * Force stale buffer cache information to be flushed.
1866	 */
1867loop:
1868	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1869		/* XXX Racy bv_cnt check. */
1870		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1871		    waitfor == MNT_LAZY) {
1872			VI_UNLOCK(vp);
1873			continue;
1874		}
1875		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
1876			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1877			goto loop;
1878		}
1879		error = VOP_FSYNC(vp, waitfor, td);
1880		if (error)
1881			allerror = error;
1882		NFSVOPUNLOCK(vp);
1883		vrele(vp);
1884	}
1885	return (allerror);
1886}
1887
1888static int
1889nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1890{
1891	struct nfsmount *nmp = VFSTONFS(mp);
1892	struct vfsquery vq;
1893	int error;
1894
1895	bzero(&vq, sizeof(vq));
1896	switch (op) {
1897#if 0
1898	case VFS_CTL_NOLOCKS:
1899		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1900 		if (req->oldptr != NULL) {
1901 			error = SYSCTL_OUT(req, &val, sizeof(val));
1902 			if (error)
1903 				return (error);
1904 		}
1905 		if (req->newptr != NULL) {
1906 			error = SYSCTL_IN(req, &val, sizeof(val));
1907 			if (error)
1908 				return (error);
1909			if (val)
1910				nmp->nm_flag |= NFSMNT_NOLOCKS;
1911			else
1912				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1913 		}
1914		break;
1915#endif
1916	case VFS_CTL_QUERY:
1917		mtx_lock(&nmp->nm_mtx);
1918		if (nmp->nm_state & NFSSTA_TIMEO)
1919			vq.vq_flags |= VQ_NOTRESP;
1920		mtx_unlock(&nmp->nm_mtx);
1921#if 0
1922		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1923		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1924			vq.vq_flags |= VQ_NOTRESPLOCK;
1925#endif
1926		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1927		break;
1928 	case VFS_CTL_TIMEO:
1929 		if (req->oldptr != NULL) {
1930 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1931 			    sizeof(nmp->nm_tprintf_initial_delay));
1932 			if (error)
1933 				return (error);
1934 		}
1935 		if (req->newptr != NULL) {
1936			error = vfs_suser(mp, req->td);
1937			if (error)
1938				return (error);
1939 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1940 			    sizeof(nmp->nm_tprintf_initial_delay));
1941 			if (error)
1942 				return (error);
1943 			if (nmp->nm_tprintf_initial_delay < 0)
1944 				nmp->nm_tprintf_initial_delay = 0;
1945 		}
1946		break;
1947	default:
1948		return (ENOTSUP);
1949	}
1950	return (0);
1951}
1952
1953/*
1954 * Purge any RPCs in progress, so that they will all return errors.
1955 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1956 * forced dismount.
1957 */
1958static void
1959nfs_purge(struct mount *mp)
1960{
1961	struct nfsmount *nmp = VFSTONFS(mp);
1962
1963	newnfs_nmcancelreqs(nmp);
1964}
1965
1966/*
1967 * Extract the information needed by the nlm from the nfs vnode.
1968 */
1969static void
1970nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1971    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1972    struct timeval *timeop)
1973{
1974	struct nfsmount *nmp;
1975	struct nfsnode *np = VTONFS(vp);
1976
1977	nmp = VFSTONFS(vp->v_mount);
1978	if (fhlenp != NULL)
1979		*fhlenp = (size_t)np->n_fhp->nfh_len;
1980	if (fhp != NULL)
1981		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1982	if (sp != NULL)
1983		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1984	if (is_v3p != NULL)
1985		*is_v3p = NFS_ISV3(vp);
1986	if (sizep != NULL)
1987		*sizep = np->n_size;
1988	if (timeop != NULL) {
1989		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1990		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1991	}
1992}
1993
1994/*
1995 * This function prints out an option name, based on the conditional
1996 * argument.
1997 */
1998static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1999    char *opt, char **buf, size_t *blen)
2000{
2001	int len;
2002
2003	if (testval != 0 && *blen > strlen(opt)) {
2004		len = snprintf(*buf, *blen, "%s", opt);
2005		if (len != strlen(opt))
2006			printf("EEK!!\n");
2007		*buf += len;
2008		*blen -= len;
2009	}
2010}
2011
2012/*
2013 * This function printf out an options integer value.
2014 */
2015static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
2016    char *opt, char **buf, size_t *blen)
2017{
2018	int len;
2019
2020	if (*blen > strlen(opt) + 1) {
2021		/* Could result in truncated output string. */
2022		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
2023		if (len < *blen) {
2024			*buf += len;
2025			*blen -= len;
2026		}
2027	}
2028}
2029
2030/*
2031 * Load the option flags and values into the buffer.
2032 */
2033void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
2034{
2035	char *buf;
2036	size_t blen;
2037
2038	buf = buffer;
2039	blen = buflen;
2040	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
2041	    &blen);
2042	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
2043		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
2044		    &blen);
2045		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
2046		    &buf, &blen);
2047		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
2048		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
2049	}
2050	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
2051	    &blen);
2052	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
2053	    "nfsv2", &buf, &blen);
2054	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
2055	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
2056	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
2057	    &buf, &blen);
2058	nfscl_printopt(nmp, (nmp->nm_newflag & NFSMNT_TLS) != 0, ",tls", &buf,
2059	    &blen);
2060	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
2061	    &buf, &blen);
2062	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
2063	    &blen);
2064	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
2065	    &blen);
2066	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
2067	    &blen);
2068	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
2069	    &blen);
2070	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
2071	    &blen);
2072	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
2073	    ",noncontigwr", &buf, &blen);
2074	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2075	    0, ",lockd", &buf, &blen);
2076	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2077	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
2078	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
2079	    &buf, &blen);
2080	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
2081	    &buf, &blen);
2082	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2083	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
2084	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2085	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
2086	    &buf, &blen);
2087	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2088	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
2089	    &buf, &blen);
2090	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
2091	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
2092	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2093	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2094	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2095	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2096	    &blen);
2097	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2098	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2099	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2100	    &blen);
2101	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2102	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2103	    &blen);
2104	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2105	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2106}
2107