nfs_clvfsops.c revision 298788
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 298788 2016-04-29 16:07:25Z pfg $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/mount.h>
55#include <sys/proc.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <netinet/in.h>
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <nfs/nfsdiskless.h>
76
77FEATURE(nfscl, "NFSv4 client");
78
79extern int nfscl_ticks;
80extern struct timeval nfsboottime;
81extern struct nfsstats	newnfsstats;
82extern int nfsrv_useacl;
83extern int nfscl_debuglevel;
84extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86extern struct mtx ncl_iod_mutex;
87NFSCLSTATEMUTEX;
88
89MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
90MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
91
92SYSCTL_DECL(_vfs_nfs);
93static int nfs_ip_paranoia = 1;
94SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95    &nfs_ip_paranoia, 0, "");
96static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99/* how long between console messages "nfs server foo not responding" */
100static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103#ifdef NFS_DEBUG
104int nfs_debug;
105SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
106    "Toggle debug flag");
107#endif
108
109static int	nfs_mountroot(struct mount *);
110static void	nfs_sec_name(char *, int *);
111static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
112		    struct nfs_args *argp, const char *, struct ucred *,
113		    struct thread *);
114static int	mountnfs(struct nfs_args *, struct mount *,
115		    struct sockaddr *, char *, u_char *, int, u_char *, int,
116		    u_char *, int, struct vnode **, struct ucred *,
117		    struct thread *, int, int, int);
118static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
119		    struct sockaddr_storage *, int *, off_t *,
120		    struct timeval *);
121static vfs_mount_t nfs_mount;
122static vfs_cmount_t nfs_cmount;
123static vfs_unmount_t nfs_unmount;
124static vfs_root_t nfs_root;
125static vfs_statfs_t nfs_statfs;
126static vfs_sync_t nfs_sync;
127static vfs_sysctl_t nfs_sysctl;
128static vfs_purge_t nfs_purge;
129
130/*
131 * nfs vfs operations.
132 */
133static struct vfsops nfs_vfsops = {
134	.vfs_init =		ncl_init,
135	.vfs_mount =		nfs_mount,
136	.vfs_cmount =		nfs_cmount,
137	.vfs_root =		nfs_root,
138	.vfs_statfs =		nfs_statfs,
139	.vfs_sync =		nfs_sync,
140	.vfs_uninit =		ncl_uninit,
141	.vfs_unmount =		nfs_unmount,
142	.vfs_sysctl =		nfs_sysctl,
143	.vfs_purge =		nfs_purge,
144};
145VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
146
147/* So that loader and kldload(2) can find us, wherever we are.. */
148MODULE_VERSION(nfs, 1);
149MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
150MODULE_DEPEND(nfs, krpc, 1, 1, 1);
151MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
152MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
153
154/*
155 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
156 * can be shared by both NFS clients. It is declared here so that it
157 * will be defined for kernels built without NFS_ROOT, although it
158 * isn't used in that case.
159 */
160#if !defined(NFS_ROOT)
161struct nfs_diskless	nfs_diskless = { { { 0 } } };
162struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
163int			nfs_diskless_valid = 0;
164#endif
165
166SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
167    &nfs_diskless_valid, 0,
168    "Has the diskless struct been filled correctly");
169
170SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
171    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
172
173SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
174    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
175    "%Ssockaddr_in", "Diskless root nfs address");
176
177
178void		newnfsargs_ntoh(struct nfs_args *);
179static int	nfs_mountdiskless(char *,
180		    struct sockaddr_in *, struct nfs_args *,
181		    struct thread *, struct vnode **, struct mount *);
182static void	nfs_convert_diskless(void);
183static void	nfs_convert_oargs(struct nfs_args *args,
184		    struct onfs_args *oargs);
185
186int
187newnfs_iosize(struct nfsmount *nmp)
188{
189	int iosize, maxio;
190
191	/* First, set the upper limit for iosize */
192	if (nmp->nm_flag & NFSMNT_NFSV4) {
193		maxio = NFS_MAXBSIZE;
194	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
195		if (nmp->nm_sotype == SOCK_DGRAM)
196			maxio = NFS_MAXDGRAMDATA;
197		else
198			maxio = NFS_MAXBSIZE;
199	} else {
200		maxio = NFS_V2MAXDATA;
201	}
202	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
203		nmp->nm_rsize = maxio;
204	if (nmp->nm_rsize > NFS_MAXBSIZE)
205		nmp->nm_rsize = NFS_MAXBSIZE;
206	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
207		nmp->nm_readdirsize = maxio;
208	if (nmp->nm_readdirsize > nmp->nm_rsize)
209		nmp->nm_readdirsize = nmp->nm_rsize;
210	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
211		nmp->nm_wsize = maxio;
212	if (nmp->nm_wsize > NFS_MAXBSIZE)
213		nmp->nm_wsize = NFS_MAXBSIZE;
214
215	/*
216	 * Calculate the size used for io buffers.  Use the larger
217	 * of the two sizes to minimise nfs requests but make sure
218	 * that it is at least one VM page to avoid wasting buffer
219	 * space.  It must also be at least NFS_DIRBLKSIZ, since
220	 * that is the buffer size used for directories.
221	 */
222	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
223	iosize = imax(iosize, PAGE_SIZE);
224	iosize = imax(iosize, NFS_DIRBLKSIZ);
225	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
226	return (iosize);
227}
228
229static void
230nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
231{
232
233	args->version = NFS_ARGSVERSION;
234	args->addr = oargs->addr;
235	args->addrlen = oargs->addrlen;
236	args->sotype = oargs->sotype;
237	args->proto = oargs->proto;
238	args->fh = oargs->fh;
239	args->fhsize = oargs->fhsize;
240	args->flags = oargs->flags;
241	args->wsize = oargs->wsize;
242	args->rsize = oargs->rsize;
243	args->readdirsize = oargs->readdirsize;
244	args->timeo = oargs->timeo;
245	args->retrans = oargs->retrans;
246	args->readahead = oargs->readahead;
247	args->hostname = oargs->hostname;
248}
249
250static void
251nfs_convert_diskless(void)
252{
253
254	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
255		sizeof(struct ifaliasreq));
256	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
257		sizeof(struct sockaddr_in));
258	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
259	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
260		nfsv3_diskless.root_fhsize = NFSX_MYFH;
261		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
262	} else {
263		nfsv3_diskless.root_fhsize = NFSX_V2FH;
264		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
265	}
266	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
267		sizeof(struct sockaddr_in));
268	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
269	nfsv3_diskless.root_time = nfs_diskless.root_time;
270	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
271		MAXHOSTNAMELEN);
272	nfs_diskless_valid = 3;
273}
274
275/*
276 * nfs statfs call
277 */
278static int
279nfs_statfs(struct mount *mp, struct statfs *sbp)
280{
281	struct vnode *vp;
282	struct thread *td;
283	struct nfsmount *nmp = VFSTONFS(mp);
284	struct nfsvattr nfsva;
285	struct nfsfsinfo fs;
286	struct nfsstatfs sb;
287	int error = 0, attrflag, gotfsinfo = 0, ret;
288	struct nfsnode *np;
289
290	td = curthread;
291
292	error = vfs_busy(mp, MBF_NOWAIT);
293	if (error)
294		return (error);
295	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
296	if (error) {
297		vfs_unbusy(mp);
298		return (error);
299	}
300	vp = NFSTOV(np);
301	mtx_lock(&nmp->nm_mtx);
302	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
303		mtx_unlock(&nmp->nm_mtx);
304		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
305		    &attrflag, NULL);
306		if (!error)
307			gotfsinfo = 1;
308	} else
309		mtx_unlock(&nmp->nm_mtx);
310	if (!error)
311		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
312		    &attrflag, NULL);
313	if (error != 0)
314		NFSCL_DEBUG(2, "statfs=%d\n", error);
315	if (attrflag == 0) {
316		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
317		    td->td_ucred, td, &nfsva, NULL, NULL);
318		if (ret) {
319			/*
320			 * Just set default values to get things going.
321			 */
322			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
323			nfsva.na_vattr.va_type = VDIR;
324			nfsva.na_vattr.va_mode = 0777;
325			nfsva.na_vattr.va_nlink = 100;
326			nfsva.na_vattr.va_uid = (uid_t)0;
327			nfsva.na_vattr.va_gid = (gid_t)0;
328			nfsva.na_vattr.va_fileid = 2;
329			nfsva.na_vattr.va_gen = 1;
330			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
331			nfsva.na_vattr.va_size = 512 * 1024;
332		}
333	}
334	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
335	if (!error) {
336	    mtx_lock(&nmp->nm_mtx);
337	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
338		nfscl_loadfsinfo(nmp, &fs);
339	    nfscl_loadsbinfo(nmp, &sb, sbp);
340	    sbp->f_iosize = newnfs_iosize(nmp);
341	    mtx_unlock(&nmp->nm_mtx);
342	    if (sbp != &mp->mnt_stat) {
343		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
344		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
345	    }
346	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
347	} else if (NFS_ISV4(vp)) {
348		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
349	}
350	vput(vp);
351	vfs_unbusy(mp);
352	return (error);
353}
354
355/*
356 * nfs version 3 fsinfo rpc call
357 */
358int
359ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
360    struct thread *td)
361{
362	struct nfsfsinfo fs;
363	struct nfsvattr nfsva;
364	int error, attrflag;
365
366	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
367	if (!error) {
368		if (attrflag)
369			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
370			    1);
371		mtx_lock(&nmp->nm_mtx);
372		nfscl_loadfsinfo(nmp, &fs);
373		mtx_unlock(&nmp->nm_mtx);
374	}
375	return (error);
376}
377
378/*
379 * Mount a remote root fs via. nfs. This depends on the info in the
380 * nfs_diskless structure that has been filled in properly by some primary
381 * bootstrap.
382 * It goes something like this:
383 * - do enough of "ifconfig" by calling ifioctl() so that the system
384 *   can talk to the server
385 * - If nfs_diskless.mygateway is filled in, use that address as
386 *   a default gateway.
387 * - build the rootfs mount point and call mountnfs() to do the rest.
388 *
389 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
390 * structure, as well as other global NFS client variables here, as
391 * nfs_mountroot() will be called once in the boot before any other NFS
392 * client activity occurs.
393 */
394static int
395nfs_mountroot(struct mount *mp)
396{
397	struct thread *td = curthread;
398	struct nfsv3_diskless *nd = &nfsv3_diskless;
399	struct socket *so;
400	struct vnode *vp;
401	struct ifreq ir;
402	int error;
403	u_long l;
404	char buf[128];
405	char *cp;
406
407#if defined(BOOTP_NFSROOT) && defined(BOOTP)
408	bootpc_init();		/* use bootp to get nfs_diskless filled in */
409#elif defined(NFS_ROOT)
410	nfs_setup_diskless();
411#endif
412
413	if (nfs_diskless_valid == 0)
414		return (-1);
415	if (nfs_diskless_valid == 1)
416		nfs_convert_diskless();
417
418	/*
419	 * XXX splnet, so networks will receive...
420	 */
421	splnet();
422
423	/*
424	 * Do enough of ifconfig(8) so that the critical net interface can
425	 * talk to the server.
426	 */
427	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
428	    td->td_ucred, td);
429	if (error)
430		panic("nfs_mountroot: socreate(%04x): %d",
431			nd->myif.ifra_addr.sa_family, error);
432
433#if 0 /* XXX Bad idea */
434	/*
435	 * We might not have been told the right interface, so we pass
436	 * over the first ten interfaces of the same kind, until we get
437	 * one of them configured.
438	 */
439
440	for (i = strlen(nd->myif.ifra_name) - 1;
441		nd->myif.ifra_name[i] >= '0' &&
442		nd->myif.ifra_name[i] <= '9';
443		nd->myif.ifra_name[i] ++) {
444		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
445		if(!error)
446			break;
447	}
448#endif
449	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
450	if (error)
451		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
452	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
453		ir.ifr_mtu = strtol(cp, NULL, 10);
454		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
455		freeenv(cp);
456		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
457		if (error)
458			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
459	}
460	soclose(so);
461
462	/*
463	 * If the gateway field is filled in, set it as the default route.
464	 * Note that pxeboot will set a default route of 0 if the route
465	 * is not set by the DHCP server.  Check also for a value of 0
466	 * to avoid panicking inappropriately in that situation.
467	 */
468	if (nd->mygateway.sin_len != 0 &&
469	    nd->mygateway.sin_addr.s_addr != 0) {
470		struct sockaddr_in mask, sin;
471
472		bzero((caddr_t)&mask, sizeof(mask));
473		sin = mask;
474		sin.sin_family = AF_INET;
475		sin.sin_len = sizeof(sin);
476                /* XXX MRT use table 0 for this sort of thing */
477		CURVNET_SET(TD_TO_VNET(td));
478		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
479		    (struct sockaddr *)&nd->mygateway,
480		    (struct sockaddr *)&mask,
481		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
482		CURVNET_RESTORE();
483		if (error)
484			panic("nfs_mountroot: RTM_ADD: %d", error);
485	}
486
487	/*
488	 * Create the rootfs mount point.
489	 */
490	nd->root_args.fh = nd->root_fh;
491	nd->root_args.fhsize = nd->root_fhsize;
492	l = ntohl(nd->root_saddr.sin_addr.s_addr);
493	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
494		(l >> 24) & 0xff, (l >> 16) & 0xff,
495		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
496	printf("NFS ROOT: %s\n", buf);
497	nd->root_args.hostname = buf;
498	if ((error = nfs_mountdiskless(buf,
499	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
500		return (error);
501	}
502
503	/*
504	 * This is not really an nfs issue, but it is much easier to
505	 * set hostname here and then let the "/etc/rc.xxx" files
506	 * mount the right /var based upon its preset value.
507	 */
508	mtx_lock(&prison0.pr_mtx);
509	strlcpy(prison0.pr_hostname, nd->my_hostnam,
510	    sizeof(prison0.pr_hostname));
511	mtx_unlock(&prison0.pr_mtx);
512	inittodr(ntohl(nd->root_time));
513	return (0);
514}
515
516/*
517 * Internal version of mount system call for diskless setup.
518 */
519static int
520nfs_mountdiskless(char *path,
521    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
522    struct vnode **vpp, struct mount *mp)
523{
524	struct sockaddr *nam;
525	int dirlen, error;
526	char *dirpath;
527
528	/*
529	 * Find the directory path in "path", which also has the server's
530	 * name/ip address in it.
531	 */
532	dirpath = strchr(path, ':');
533	if (dirpath != NULL)
534		dirlen = strlen(++dirpath);
535	else
536		dirlen = 0;
537	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
538	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
539	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
540	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
541		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
542		return (error);
543	}
544	return (0);
545}
546
547static void
548nfs_sec_name(char *sec, int *flagsp)
549{
550	if (!strcmp(sec, "krb5"))
551		*flagsp |= NFSMNT_KERB;
552	else if (!strcmp(sec, "krb5i"))
553		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
554	else if (!strcmp(sec, "krb5p"))
555		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
556}
557
558static void
559nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
560    const char *hostname, struct ucred *cred, struct thread *td)
561{
562	int s;
563	int adjsock;
564	char *p;
565
566	s = splnet();
567
568	/*
569	 * Set read-only flag if requested; otherwise, clear it if this is
570	 * an update.  If this is not an update, then either the read-only
571	 * flag is already clear, or this is a root mount and it was set
572	 * intentionally at some previous point.
573	 */
574	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
575		MNT_ILOCK(mp);
576		mp->mnt_flag |= MNT_RDONLY;
577		MNT_IUNLOCK(mp);
578	} else if (mp->mnt_flag & MNT_UPDATE) {
579		MNT_ILOCK(mp);
580		mp->mnt_flag &= ~MNT_RDONLY;
581		MNT_IUNLOCK(mp);
582	}
583
584	/*
585	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
586	 * no sense in that context.  Also, set up appropriate retransmit
587	 * and soft timeout behavior.
588	 */
589	if (argp->sotype == SOCK_STREAM) {
590		nmp->nm_flag &= ~NFSMNT_NOCONN;
591		nmp->nm_timeo = NFS_MAXTIMEO;
592		if ((argp->flags & NFSMNT_NFSV4) != 0)
593			nmp->nm_retry = INT_MAX;
594		else
595			nmp->nm_retry = NFS_RETRANS_TCP;
596	}
597
598	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
599	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
600		argp->flags &= ~NFSMNT_RDIRPLUS;
601		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
602	}
603
604	/* Re-bind if rsrvd port requested and wasn't on one */
605	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
606		  && (argp->flags & NFSMNT_RESVPORT);
607	/* Also re-bind if we're switching to/from a connected UDP socket */
608	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
609		    (argp->flags & NFSMNT_NOCONN));
610
611	/* Update flags atomically.  Don't change the lock bits. */
612	nmp->nm_flag = argp->flags | nmp->nm_flag;
613	splx(s);
614
615	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
616		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
617		if (nmp->nm_timeo < NFS_MINTIMEO)
618			nmp->nm_timeo = NFS_MINTIMEO;
619		else if (nmp->nm_timeo > NFS_MAXTIMEO)
620			nmp->nm_timeo = NFS_MAXTIMEO;
621	}
622
623	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
624		nmp->nm_retry = argp->retrans;
625		if (nmp->nm_retry > NFS_MAXREXMIT)
626			nmp->nm_retry = NFS_MAXREXMIT;
627	}
628
629	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
630		nmp->nm_wsize = argp->wsize;
631		/*
632		 * Clip at the power of 2 below the size. There is an
633		 * issue (not isolated) that causes intermittent page
634		 * faults if this is not done.
635		 */
636		if (nmp->nm_wsize > NFS_FABLKSIZE)
637			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
638		else
639			nmp->nm_wsize = NFS_FABLKSIZE;
640	}
641
642	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
643		nmp->nm_rsize = argp->rsize;
644		/*
645		 * Clip at the power of 2 below the size. There is an
646		 * issue (not isolated) that causes intermittent page
647		 * faults if this is not done.
648		 */
649		if (nmp->nm_rsize > NFS_FABLKSIZE)
650			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
651		else
652			nmp->nm_rsize = NFS_FABLKSIZE;
653	}
654
655	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
656		nmp->nm_readdirsize = argp->readdirsize;
657	}
658
659	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
660		nmp->nm_acregmin = argp->acregmin;
661	else
662		nmp->nm_acregmin = NFS_MINATTRTIMO;
663	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
664		nmp->nm_acregmax = argp->acregmax;
665	else
666		nmp->nm_acregmax = NFS_MAXATTRTIMO;
667	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
668		nmp->nm_acdirmin = argp->acdirmin;
669	else
670		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
671	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
672		nmp->nm_acdirmax = argp->acdirmax;
673	else
674		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
675	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
676		nmp->nm_acdirmin = nmp->nm_acdirmax;
677	if (nmp->nm_acregmin > nmp->nm_acregmax)
678		nmp->nm_acregmin = nmp->nm_acregmax;
679
680	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
681		if (argp->readahead <= NFS_MAXRAHEAD)
682			nmp->nm_readahead = argp->readahead;
683		else
684			nmp->nm_readahead = NFS_MAXRAHEAD;
685	}
686	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
687		if (argp->wcommitsize < nmp->nm_wsize)
688			nmp->nm_wcommitsize = nmp->nm_wsize;
689		else
690			nmp->nm_wcommitsize = argp->wcommitsize;
691	}
692
693	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
694		    (nmp->nm_soproto != argp->proto));
695
696	if (nmp->nm_client != NULL && adjsock) {
697		int haslock = 0, error = 0;
698
699		if (nmp->nm_sotype == SOCK_STREAM) {
700			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
701			if (!error)
702				haslock = 1;
703		}
704		if (!error) {
705		    newnfs_disconnect(&nmp->nm_sockreq);
706		    if (haslock)
707			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
708		    nmp->nm_sotype = argp->sotype;
709		    nmp->nm_soproto = argp->proto;
710		    if (nmp->nm_sotype == SOCK_DGRAM)
711			while (newnfs_connect(nmp, &nmp->nm_sockreq,
712			    cred, td, 0)) {
713				printf("newnfs_args: retrying connect\n");
714				(void) nfs_catnap(PSOCK, 0, "nfscon");
715			}
716		}
717	} else {
718		nmp->nm_sotype = argp->sotype;
719		nmp->nm_soproto = argp->proto;
720	}
721
722	if (hostname != NULL) {
723		strlcpy(nmp->nm_hostname, hostname,
724		    sizeof(nmp->nm_hostname));
725		p = strchr(nmp->nm_hostname, ':');
726		if (p != NULL)
727			*p = '\0';
728	}
729}
730
731static const char *nfs_opts[] = { "from", "nfs_args",
732    "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
733    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
734    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
735    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
736    "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
737    "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
738    "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
739    "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
740    "pnfs", "wcommitsize",
741    NULL };
742
743/*
744 * VFS Operations.
745 *
746 * mount system call
747 * It seems a bit dumb to copyinstr() the host and path here and then
748 * bcopy() them in mountnfs(), but I wanted to detect errors before
749 * doing the sockargs() call because sockargs() allocates an mbuf and
750 * an error after that means that I have to release the mbuf.
751 */
752/* ARGSUSED */
753static int
754nfs_mount(struct mount *mp)
755{
756	struct nfs_args args = {
757	    .version = NFS_ARGSVERSION,
758	    .addr = NULL,
759	    .addrlen = sizeof (struct sockaddr_in),
760	    .sotype = SOCK_STREAM,
761	    .proto = 0,
762	    .fh = NULL,
763	    .fhsize = 0,
764	    .flags = NFSMNT_RESVPORT,
765	    .wsize = NFS_WSIZE,
766	    .rsize = NFS_RSIZE,
767	    .readdirsize = NFS_READDIRSIZE,
768	    .timeo = 10,
769	    .retrans = NFS_RETRANS,
770	    .readahead = NFS_DEFRAHEAD,
771	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
772	    .hostname = NULL,
773	    .acregmin = NFS_MINATTRTIMO,
774	    .acregmax = NFS_MAXATTRTIMO,
775	    .acdirmin = NFS_MINDIRATTRTIMO,
776	    .acdirmax = NFS_MAXDIRATTRTIMO,
777	};
778	int error = 0, ret, len;
779	struct sockaddr *nam = NULL;
780	struct vnode *vp;
781	struct thread *td;
782	char hst[MNAMELEN];
783	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
784	char *cp, *opt, *name, *secname;
785	int nametimeo = NFS_DEFAULT_NAMETIMEO;
786	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
787	int minvers = 0;
788	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
789	size_t hstlen;
790
791	has_nfs_args_opt = 0;
792	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
793		error = EINVAL;
794		goto out;
795	}
796
797	td = curthread;
798	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
799		error = nfs_mountroot(mp);
800		goto out;
801	}
802
803	nfscl_init();
804
805	/*
806	 * The old mount_nfs program passed the struct nfs_args
807	 * from userspace to kernel.  The new mount_nfs program
808	 * passes string options via nmount() from userspace to kernel
809	 * and we populate the struct nfs_args in the kernel.
810	 */
811	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
812		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
813		    sizeof(args));
814		if (error != 0)
815			goto out;
816
817		if (args.version != NFS_ARGSVERSION) {
818			error = EPROGMISMATCH;
819			goto out;
820		}
821		has_nfs_args_opt = 1;
822	}
823
824	/* Handle the new style options. */
825	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
826		args.acdirmin = args.acdirmax =
827		    args.acregmin = args.acregmax = 0;
828		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
829		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
830	}
831	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
832		args.flags |= NFSMNT_NOCONN;
833	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
834		args.flags &= ~NFSMNT_NOCONN;
835	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
836		args.flags |= NFSMNT_NOLOCKD;
837	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
838		args.flags &= ~NFSMNT_NOLOCKD;
839	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
840		args.flags |= NFSMNT_INT;
841	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
842		args.flags |= NFSMNT_RDIRPLUS;
843	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
844		args.flags |= NFSMNT_RESVPORT;
845	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
846		args.flags &= ~NFSMNT_RESVPORT;
847	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
848		args.flags |= NFSMNT_SOFT;
849	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
850		args.flags &= ~NFSMNT_SOFT;
851	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
852		args.sotype = SOCK_DGRAM;
853	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
854		args.sotype = SOCK_DGRAM;
855	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
856		args.sotype = SOCK_STREAM;
857	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
858		args.flags |= NFSMNT_NFSV3;
859	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
860		args.flags |= NFSMNT_NFSV4;
861		args.sotype = SOCK_STREAM;
862	}
863	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
864		args.flags |= NFSMNT_ALLGSSNAME;
865	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
866		args.flags |= NFSMNT_NOCTO;
867	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
868		args.flags |= NFSMNT_NONCONTIGWR;
869	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
870		args.flags |= NFSMNT_PNFS;
871	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
872		if (opt == NULL) {
873			vfs_mount_error(mp, "illegal readdirsize");
874			error = EINVAL;
875			goto out;
876		}
877		ret = sscanf(opt, "%d", &args.readdirsize);
878		if (ret != 1 || args.readdirsize <= 0) {
879			vfs_mount_error(mp, "illegal readdirsize: %s",
880			    opt);
881			error = EINVAL;
882			goto out;
883		}
884		args.flags |= NFSMNT_READDIRSIZE;
885	}
886	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
887		if (opt == NULL) {
888			vfs_mount_error(mp, "illegal readahead");
889			error = EINVAL;
890			goto out;
891		}
892		ret = sscanf(opt, "%d", &args.readahead);
893		if (ret != 1 || args.readahead <= 0) {
894			vfs_mount_error(mp, "illegal readahead: %s",
895			    opt);
896			error = EINVAL;
897			goto out;
898		}
899		args.flags |= NFSMNT_READAHEAD;
900	}
901	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
902		if (opt == NULL) {
903			vfs_mount_error(mp, "illegal wsize");
904			error = EINVAL;
905			goto out;
906		}
907		ret = sscanf(opt, "%d", &args.wsize);
908		if (ret != 1 || args.wsize <= 0) {
909			vfs_mount_error(mp, "illegal wsize: %s",
910			    opt);
911			error = EINVAL;
912			goto out;
913		}
914		args.flags |= NFSMNT_WSIZE;
915	}
916	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
917		if (opt == NULL) {
918			vfs_mount_error(mp, "illegal rsize");
919			error = EINVAL;
920			goto out;
921		}
922		ret = sscanf(opt, "%d", &args.rsize);
923		if (ret != 1 || args.rsize <= 0) {
924			vfs_mount_error(mp, "illegal wsize: %s",
925			    opt);
926			error = EINVAL;
927			goto out;
928		}
929		args.flags |= NFSMNT_RSIZE;
930	}
931	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
932		if (opt == NULL) {
933			vfs_mount_error(mp, "illegal retrans");
934			error = EINVAL;
935			goto out;
936		}
937		ret = sscanf(opt, "%d", &args.retrans);
938		if (ret != 1 || args.retrans <= 0) {
939			vfs_mount_error(mp, "illegal retrans: %s",
940			    opt);
941			error = EINVAL;
942			goto out;
943		}
944		args.flags |= NFSMNT_RETRANS;
945	}
946	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
947		ret = sscanf(opt, "%d", &args.acregmin);
948		if (ret != 1 || args.acregmin < 0) {
949			vfs_mount_error(mp, "illegal actimeo: %s",
950			    opt);
951			error = EINVAL;
952			goto out;
953		}
954		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
955		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
956		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
957	}
958	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
959		ret = sscanf(opt, "%d", &args.acregmin);
960		if (ret != 1 || args.acregmin < 0) {
961			vfs_mount_error(mp, "illegal acregmin: %s",
962			    opt);
963			error = EINVAL;
964			goto out;
965		}
966		args.flags |= NFSMNT_ACREGMIN;
967	}
968	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
969		ret = sscanf(opt, "%d", &args.acregmax);
970		if (ret != 1 || args.acregmax < 0) {
971			vfs_mount_error(mp, "illegal acregmax: %s",
972			    opt);
973			error = EINVAL;
974			goto out;
975		}
976		args.flags |= NFSMNT_ACREGMAX;
977	}
978	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
979		ret = sscanf(opt, "%d", &args.acdirmin);
980		if (ret != 1 || args.acdirmin < 0) {
981			vfs_mount_error(mp, "illegal acdirmin: %s",
982			    opt);
983			error = EINVAL;
984			goto out;
985		}
986		args.flags |= NFSMNT_ACDIRMIN;
987	}
988	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
989		ret = sscanf(opt, "%d", &args.acdirmax);
990		if (ret != 1 || args.acdirmax < 0) {
991			vfs_mount_error(mp, "illegal acdirmax: %s",
992			    opt);
993			error = EINVAL;
994			goto out;
995		}
996		args.flags |= NFSMNT_ACDIRMAX;
997	}
998	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
999		ret = sscanf(opt, "%d", &args.wcommitsize);
1000		if (ret != 1 || args.wcommitsize < 0) {
1001			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1002			error = EINVAL;
1003			goto out;
1004		}
1005		args.flags |= NFSMNT_WCOMMITSIZE;
1006	}
1007	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1008		ret = sscanf(opt, "%d", &args.timeo);
1009		if (ret != 1 || args.timeo <= 0) {
1010			vfs_mount_error(mp, "illegal timeo: %s",
1011			    opt);
1012			error = EINVAL;
1013			goto out;
1014		}
1015		args.flags |= NFSMNT_TIMEO;
1016	}
1017	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1018		ret = sscanf(opt, "%d", &args.timeo);
1019		if (ret != 1 || args.timeo <= 0) {
1020			vfs_mount_error(mp, "illegal timeout: %s",
1021			    opt);
1022			error = EINVAL;
1023			goto out;
1024		}
1025		args.flags |= NFSMNT_TIMEO;
1026	}
1027	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1028		ret = sscanf(opt, "%d", &nametimeo);
1029		if (ret != 1 || nametimeo < 0) {
1030			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1031			error = EINVAL;
1032			goto out;
1033		}
1034	}
1035	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1036	    == 0) {
1037		ret = sscanf(opt, "%d", &negnametimeo);
1038		if (ret != 1 || negnametimeo < 0) {
1039			vfs_mount_error(mp, "illegal negnametimeo: %s",
1040			    opt);
1041			error = EINVAL;
1042			goto out;
1043		}
1044	}
1045	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1046	    0) {
1047		ret = sscanf(opt, "%d", &minvers);
1048		if (ret != 1 || minvers < 0 || minvers > 1 ||
1049		    (args.flags & NFSMNT_NFSV4) == 0) {
1050			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1051			error = EINVAL;
1052			goto out;
1053		}
1054	}
1055	if (vfs_getopt(mp->mnt_optnew, "sec",
1056		(void **) &secname, NULL) == 0)
1057		nfs_sec_name(secname, &args.flags);
1058
1059	if (mp->mnt_flag & MNT_UPDATE) {
1060		struct nfsmount *nmp = VFSTONFS(mp);
1061
1062		if (nmp == NULL) {
1063			error = EIO;
1064			goto out;
1065		}
1066
1067		/*
1068		 * If a change from TCP->UDP is done and there are thread(s)
1069		 * that have I/O RPC(s) in progress with a transfer size
1070		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1071		 * hung, retrying the RPC(s) forever. Usually these threads
1072		 * will be seen doing an uninterruptible sleep on wait channel
1073		 * "nfsreq".
1074		 */
1075		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1076			tprintf(td->td_proc, LOG_WARNING,
1077	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1078
1079		/*
1080		 * When doing an update, we can't change version,
1081		 * security, switch lockd strategies or change cookie
1082		 * translation
1083		 */
1084		args.flags = (args.flags &
1085		    ~(NFSMNT_NFSV3 |
1086		      NFSMNT_NFSV4 |
1087		      NFSMNT_KERB |
1088		      NFSMNT_INTEGRITY |
1089		      NFSMNT_PRIVACY |
1090		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1091		    (nmp->nm_flag &
1092			(NFSMNT_NFSV3 |
1093			 NFSMNT_NFSV4 |
1094			 NFSMNT_KERB |
1095			 NFSMNT_INTEGRITY |
1096			 NFSMNT_PRIVACY |
1097			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1098		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1099		goto out;
1100	}
1101
1102	/*
1103	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1104	 * or no-connection mode for those protocols that support
1105	 * no-connection mode (the flag will be cleared later for protocols
1106	 * that do not support no-connection mode).  This will allow a client
1107	 * to receive replies from a different IP then the request was
1108	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1109	 * not 0.
1110	 */
1111	if (nfs_ip_paranoia == 0)
1112		args.flags |= NFSMNT_NOCONN;
1113
1114	if (has_nfs_args_opt != 0) {
1115		/*
1116		 * In the 'nfs_args' case, the pointers in the args
1117		 * structure are in userland - we copy them in here.
1118		 */
1119		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1120			vfs_mount_error(mp, "Bad file handle");
1121			error = EINVAL;
1122			goto out;
1123		}
1124		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1125		    args.fhsize);
1126		if (error != 0)
1127			goto out;
1128		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1129		if (error != 0)
1130			goto out;
1131		bzero(&hst[hstlen], MNAMELEN - hstlen);
1132		args.hostname = hst;
1133		/* sockargs() call must be after above copyin() calls */
1134		error = getsockaddr(&nam, (caddr_t)args.addr,
1135		    args.addrlen);
1136		if (error != 0)
1137			goto out;
1138	} else {
1139		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1140		    &args.fhsize) == 0) {
1141			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1142				vfs_mount_error(mp, "Bad file handle");
1143				error = EINVAL;
1144				goto out;
1145			}
1146			bcopy(args.fh, nfh, args.fhsize);
1147		} else {
1148			args.fhsize = 0;
1149		}
1150		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1151		    (void **)&args.hostname, &len);
1152		if (args.hostname == NULL) {
1153			vfs_mount_error(mp, "Invalid hostname");
1154			error = EINVAL;
1155			goto out;
1156		}
1157		bcopy(args.hostname, hst, MNAMELEN);
1158		hst[MNAMELEN - 1] = '\0';
1159	}
1160
1161	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1162		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1163	else {
1164		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1165		cp = strchr(srvkrbname, ':');
1166		if (cp != NULL)
1167			*cp = '\0';
1168	}
1169	srvkrbnamelen = strlen(srvkrbname);
1170
1171	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1172		strlcpy(krbname, name, sizeof (krbname));
1173	else
1174		krbname[0] = '\0';
1175	krbnamelen = strlen(krbname);
1176
1177	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1178		strlcpy(dirpath, name, sizeof (dirpath));
1179	else
1180		dirpath[0] = '\0';
1181	dirlen = strlen(dirpath);
1182
1183	if (has_nfs_args_opt == 0) {
1184		if (vfs_getopt(mp->mnt_optnew, "addr",
1185		    (void **)&args.addr, &args.addrlen) == 0) {
1186			if (args.addrlen > SOCK_MAXADDRLEN) {
1187				error = ENAMETOOLONG;
1188				goto out;
1189			}
1190			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1191			bcopy(args.addr, nam, args.addrlen);
1192			nam->sa_len = args.addrlen;
1193		} else {
1194			vfs_mount_error(mp, "No server address");
1195			error = EINVAL;
1196			goto out;
1197		}
1198	}
1199
1200	args.fh = nfh;
1201	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1202	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1203	    nametimeo, negnametimeo, minvers);
1204out:
1205	if (!error) {
1206		MNT_ILOCK(mp);
1207		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1208		    MNTK_USES_BCACHE;
1209		MNT_IUNLOCK(mp);
1210	}
1211	return (error);
1212}
1213
1214
1215/*
1216 * VFS Operations.
1217 *
1218 * mount system call
1219 * It seems a bit dumb to copyinstr() the host and path here and then
1220 * bcopy() them in mountnfs(), but I wanted to detect errors before
1221 * doing the sockargs() call because sockargs() allocates an mbuf and
1222 * an error after that means that I have to release the mbuf.
1223 */
1224/* ARGSUSED */
1225static int
1226nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1227{
1228	int error;
1229	struct nfs_args args;
1230
1231	error = copyin(data, &args, sizeof (struct nfs_args));
1232	if (error)
1233		return error;
1234
1235	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1236
1237	error = kernel_mount(ma, flags);
1238	return (error);
1239}
1240
1241/*
1242 * Common code for mount and mountroot
1243 */
1244static int
1245mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1246    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1247    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1248    struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1249    int minvers)
1250{
1251	struct nfsmount *nmp;
1252	struct nfsnode *np;
1253	int error, trycnt, ret;
1254	struct nfsvattr nfsva;
1255	struct nfsclclient *clp;
1256	struct nfsclds *dsp, *tdsp;
1257	uint32_t lease;
1258	static u_int64_t clval = 0;
1259
1260	NFSCL_DEBUG(3, "in mnt\n");
1261	clp = NULL;
1262	if (mp->mnt_flag & MNT_UPDATE) {
1263		nmp = VFSTONFS(mp);
1264		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1265		FREE(nam, M_SONAME);
1266		return (0);
1267	} else {
1268		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1269		    krbnamelen + dirlen + srvkrbnamelen + 2,
1270		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1271		TAILQ_INIT(&nmp->nm_bufq);
1272		if (clval == 0)
1273			clval = (u_int64_t)nfsboottime.tv_sec;
1274		nmp->nm_clval = clval++;
1275		nmp->nm_krbnamelen = krbnamelen;
1276		nmp->nm_dirpathlen = dirlen;
1277		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1278		if (td->td_ucred->cr_uid != (uid_t)0) {
1279			/*
1280			 * nm_uid is used to get KerberosV credentials for
1281			 * the nfsv4 state handling operations if there is
1282			 * no host based principal set. Use the uid of
1283			 * this user if not root, since they are doing the
1284			 * mount. I don't think setting this for root will
1285			 * work, since root normally does not have user
1286			 * credentials in a credentials cache.
1287			 */
1288			nmp->nm_uid = td->td_ucred->cr_uid;
1289		} else {
1290			/*
1291			 * Just set to -1, so it won't be used.
1292			 */
1293			nmp->nm_uid = (uid_t)-1;
1294		}
1295
1296		/* Copy and null terminate all the names */
1297		if (nmp->nm_krbnamelen > 0) {
1298			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1299			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1300		}
1301		if (nmp->nm_dirpathlen > 0) {
1302			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1303			    nmp->nm_dirpathlen);
1304			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1305			    + 1] = '\0';
1306		}
1307		if (nmp->nm_srvkrbnamelen > 0) {
1308			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1309			    nmp->nm_srvkrbnamelen);
1310			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1311			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1312		}
1313		nmp->nm_sockreq.nr_cred = crhold(cred);
1314		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1315		mp->mnt_data = nmp;
1316		nmp->nm_getinfo = nfs_getnlminfo;
1317		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1318	}
1319	vfs_getnewfsid(mp);
1320	nmp->nm_mountp = mp;
1321	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1322
1323	/*
1324	 * Since nfs_decode_args() might optionally set them, these
1325	 * need to be set to defaults before the call, so that the
1326	 * optional settings aren't overwritten.
1327	 */
1328	nmp->nm_nametimeo = nametimeo;
1329	nmp->nm_negnametimeo = negnametimeo;
1330	nmp->nm_timeo = NFS_TIMEO;
1331	nmp->nm_retry = NFS_RETRANS;
1332	nmp->nm_readahead = NFS_DEFRAHEAD;
1333
1334	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1335	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1336	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1337		nmp->nm_wcommitsize *= 2;
1338	nmp->nm_wcommitsize *= 256;
1339
1340	if ((argp->flags & NFSMNT_NFSV4) != 0)
1341		nmp->nm_minorvers = minvers;
1342	else
1343		nmp->nm_minorvers = 0;
1344
1345	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1346
1347	/*
1348	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1349	 * high, depending on whether we end up with negative offsets in
1350	 * the client or server somewhere.  2GB-1 may be safer.
1351	 *
1352	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1353	 * that we can handle until we find out otherwise.
1354	 */
1355	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1356		nmp->nm_maxfilesize = 0xffffffffLL;
1357	else
1358		nmp->nm_maxfilesize = OFF_MAX;
1359
1360	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1361		nmp->nm_wsize = NFS_WSIZE;
1362		nmp->nm_rsize = NFS_RSIZE;
1363		nmp->nm_readdirsize = NFS_READDIRSIZE;
1364	}
1365	nmp->nm_numgrps = NFS_MAXGRPS;
1366	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1367	if (nmp->nm_tprintf_delay < 0)
1368		nmp->nm_tprintf_delay = 0;
1369	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1370	if (nmp->nm_tprintf_initial_delay < 0)
1371		nmp->nm_tprintf_initial_delay = 0;
1372	nmp->nm_fhsize = argp->fhsize;
1373	if (nmp->nm_fhsize > 0)
1374		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1375	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1376	nmp->nm_nam = nam;
1377	/* Set up the sockets and per-host congestion */
1378	nmp->nm_sotype = argp->sotype;
1379	nmp->nm_soproto = argp->proto;
1380	nmp->nm_sockreq.nr_prog = NFS_PROG;
1381	if ((argp->flags & NFSMNT_NFSV4))
1382		nmp->nm_sockreq.nr_vers = NFS_VER4;
1383	else if ((argp->flags & NFSMNT_NFSV3))
1384		nmp->nm_sockreq.nr_vers = NFS_VER3;
1385	else
1386		nmp->nm_sockreq.nr_vers = NFS_VER2;
1387
1388
1389	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1390		goto bad;
1391	/* For NFSv4.1, get the clientid now. */
1392	if (nmp->nm_minorvers > 0) {
1393		NFSCL_DEBUG(3, "at getcl\n");
1394		error = nfscl_getcl(mp, cred, td, 0, &clp);
1395		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1396		if (error != 0)
1397			goto bad;
1398	}
1399
1400	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1401	    nmp->nm_dirpathlen > 0) {
1402		NFSCL_DEBUG(3, "in dirp\n");
1403		/*
1404		 * If the fhsize on the mount point == 0 for V4, the mount
1405		 * path needs to be looked up.
1406		 */
1407		trycnt = 3;
1408		do {
1409			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1410			    cred, td);
1411			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1412			if (error)
1413				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1414		} while (error && --trycnt > 0);
1415		if (error) {
1416			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1417			goto bad;
1418		}
1419	}
1420
1421	/*
1422	 * A reference count is needed on the nfsnode representing the
1423	 * remote root.  If this object is not persistent, then backward
1424	 * traversals of the mount point (i.e. "..") will not work if
1425	 * the nfsnode gets flushed out of the cache. Ufs does not have
1426	 * this problem, because one can identify root inodes by their
1427	 * number == ROOTINO (2).
1428	 */
1429	if (nmp->nm_fhsize > 0) {
1430		/*
1431		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1432		 * non-zero for the root vnode. f_iosize will be set correctly
1433		 * by nfs_statfs() before any I/O occurs.
1434		 */
1435		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1436		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1437		    LK_EXCLUSIVE);
1438		if (error)
1439			goto bad;
1440		*vpp = NFSTOV(np);
1441
1442		/*
1443		 * Get file attributes and transfer parameters for the
1444		 * mountpoint.  This has the side effect of filling in
1445		 * (*vpp)->v_type with the correct value.
1446		 */
1447		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1448		    cred, td, &nfsva, NULL, &lease);
1449		if (ret) {
1450			/*
1451			 * Just set default values to get things going.
1452			 */
1453			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1454			nfsva.na_vattr.va_type = VDIR;
1455			nfsva.na_vattr.va_mode = 0777;
1456			nfsva.na_vattr.va_nlink = 100;
1457			nfsva.na_vattr.va_uid = (uid_t)0;
1458			nfsva.na_vattr.va_gid = (gid_t)0;
1459			nfsva.na_vattr.va_fileid = 2;
1460			nfsva.na_vattr.va_gen = 1;
1461			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1462			nfsva.na_vattr.va_size = 512 * 1024;
1463			lease = 60;
1464		}
1465		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1466		if (nmp->nm_minorvers > 0) {
1467			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1468			NFSLOCKCLSTATE();
1469			clp->nfsc_renew = NFSCL_RENEW(lease);
1470			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1471			clp->nfsc_clientidrev++;
1472			if (clp->nfsc_clientidrev == 0)
1473				clp->nfsc_clientidrev++;
1474			NFSUNLOCKCLSTATE();
1475			/*
1476			 * Mount will succeed, so the renew thread can be
1477			 * started now.
1478			 */
1479			nfscl_start_renewthread(clp);
1480			nfscl_clientrelease(clp);
1481		}
1482		if (argp->flags & NFSMNT_NFSV3)
1483			ncl_fsinfo(nmp, *vpp, cred, td);
1484
1485		/* Mark if the mount point supports NFSv4 ACLs. */
1486		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1487		    ret == 0 &&
1488		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1489			MNT_ILOCK(mp);
1490			mp->mnt_flag |= MNT_NFS4ACLS;
1491			MNT_IUNLOCK(mp);
1492		}
1493
1494		/*
1495		 * Lose the lock but keep the ref.
1496		 */
1497		NFSVOPUNLOCK(*vpp, 0);
1498		return (0);
1499	}
1500	error = EIO;
1501
1502bad:
1503	if (clp != NULL)
1504		nfscl_clientrelease(clp);
1505	newnfs_disconnect(&nmp->nm_sockreq);
1506	crfree(nmp->nm_sockreq.nr_cred);
1507	if (nmp->nm_sockreq.nr_auth != NULL)
1508		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1509	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1510	mtx_destroy(&nmp->nm_mtx);
1511	if (nmp->nm_clp != NULL) {
1512		NFSLOCKCLSTATE();
1513		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1514		NFSUNLOCKCLSTATE();
1515		free(nmp->nm_clp, M_NFSCLCLIENT);
1516	}
1517	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1518		nfscl_freenfsclds(dsp);
1519	FREE(nmp, M_NEWNFSMNT);
1520	FREE(nam, M_SONAME);
1521	return (error);
1522}
1523
1524/*
1525 * unmount system call
1526 */
1527static int
1528nfs_unmount(struct mount *mp, int mntflags)
1529{
1530	struct thread *td;
1531	struct nfsmount *nmp;
1532	int error, flags = 0, i, trycnt = 0;
1533	struct nfsclds *dsp, *tdsp;
1534
1535	td = curthread;
1536
1537	if (mntflags & MNT_FORCE)
1538		flags |= FORCECLOSE;
1539	nmp = VFSTONFS(mp);
1540	/*
1541	 * Goes something like this..
1542	 * - Call vflush() to clear out vnodes for this filesystem
1543	 * - Close the socket
1544	 * - Free up the data structures
1545	 */
1546	/* In the forced case, cancel any outstanding requests. */
1547	if (mntflags & MNT_FORCE) {
1548		error = newnfs_nmcancelreqs(nmp);
1549		if (error)
1550			goto out;
1551		/* For a forced close, get rid of the renew thread now */
1552		nfscl_umount(nmp, td);
1553	}
1554	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1555	do {
1556		error = vflush(mp, 1, flags, td);
1557		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1558			(void) nfs_catnap(PSOCK, error, "newndm");
1559	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1560	if (error)
1561		goto out;
1562
1563	/*
1564	 * We are now committed to the unmount.
1565	 */
1566	if ((mntflags & MNT_FORCE) == 0)
1567		nfscl_umount(nmp, td);
1568	/* Make sure no nfsiods are assigned to this mount. */
1569	mtx_lock(&ncl_iod_mutex);
1570	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1571		if (ncl_iodmount[i] == nmp) {
1572			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1573			ncl_iodmount[i] = NULL;
1574		}
1575	mtx_unlock(&ncl_iod_mutex);
1576	newnfs_disconnect(&nmp->nm_sockreq);
1577	crfree(nmp->nm_sockreq.nr_cred);
1578	FREE(nmp->nm_nam, M_SONAME);
1579	if (nmp->nm_sockreq.nr_auth != NULL)
1580		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1581	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1582	mtx_destroy(&nmp->nm_mtx);
1583	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1584		nfscl_freenfsclds(dsp);
1585	FREE(nmp, M_NEWNFSMNT);
1586out:
1587	return (error);
1588}
1589
1590/*
1591 * Return root of a filesystem
1592 */
1593static int
1594nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1595{
1596	struct vnode *vp;
1597	struct nfsmount *nmp;
1598	struct nfsnode *np;
1599	int error;
1600
1601	nmp = VFSTONFS(mp);
1602	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1603	if (error)
1604		return error;
1605	vp = NFSTOV(np);
1606	/*
1607	 * Get transfer parameters and attributes for root vnode once.
1608	 */
1609	mtx_lock(&nmp->nm_mtx);
1610	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1611		mtx_unlock(&nmp->nm_mtx);
1612		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1613	} else
1614		mtx_unlock(&nmp->nm_mtx);
1615	if (vp->v_type == VNON)
1616	    vp->v_type = VDIR;
1617	vp->v_vflag |= VV_ROOT;
1618	*vpp = vp;
1619	return (0);
1620}
1621
1622/*
1623 * Flush out the buffer cache
1624 */
1625/* ARGSUSED */
1626static int
1627nfs_sync(struct mount *mp, int waitfor)
1628{
1629	struct vnode *vp, *mvp;
1630	struct thread *td;
1631	int error, allerror = 0;
1632
1633	td = curthread;
1634
1635	MNT_ILOCK(mp);
1636	/*
1637	 * If a forced dismount is in progress, return from here so that
1638	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1639	 * calling VFS_UNMOUNT().
1640	 */
1641	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1642		MNT_IUNLOCK(mp);
1643		return (EBADF);
1644	}
1645	MNT_IUNLOCK(mp);
1646
1647	/*
1648	 * Force stale buffer cache information to be flushed.
1649	 */
1650loop:
1651	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1652		/* XXX Racy bv_cnt check. */
1653		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1654		    waitfor == MNT_LAZY) {
1655			VI_UNLOCK(vp);
1656			continue;
1657		}
1658		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1659			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1660			goto loop;
1661		}
1662		error = VOP_FSYNC(vp, waitfor, td);
1663		if (error)
1664			allerror = error;
1665		NFSVOPUNLOCK(vp, 0);
1666		vrele(vp);
1667	}
1668	return (allerror);
1669}
1670
1671static int
1672nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1673{
1674	struct nfsmount *nmp = VFSTONFS(mp);
1675	struct vfsquery vq;
1676	int error;
1677
1678	bzero(&vq, sizeof(vq));
1679	switch (op) {
1680#if 0
1681	case VFS_CTL_NOLOCKS:
1682		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1683 		if (req->oldptr != NULL) {
1684 			error = SYSCTL_OUT(req, &val, sizeof(val));
1685 			if (error)
1686 				return (error);
1687 		}
1688 		if (req->newptr != NULL) {
1689 			error = SYSCTL_IN(req, &val, sizeof(val));
1690 			if (error)
1691 				return (error);
1692			if (val)
1693				nmp->nm_flag |= NFSMNT_NOLOCKS;
1694			else
1695				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1696 		}
1697		break;
1698#endif
1699	case VFS_CTL_QUERY:
1700		mtx_lock(&nmp->nm_mtx);
1701		if (nmp->nm_state & NFSSTA_TIMEO)
1702			vq.vq_flags |= VQ_NOTRESP;
1703		mtx_unlock(&nmp->nm_mtx);
1704#if 0
1705		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1706		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1707			vq.vq_flags |= VQ_NOTRESPLOCK;
1708#endif
1709		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1710		break;
1711 	case VFS_CTL_TIMEO:
1712 		if (req->oldptr != NULL) {
1713 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1714 			    sizeof(nmp->nm_tprintf_initial_delay));
1715 			if (error)
1716 				return (error);
1717 		}
1718 		if (req->newptr != NULL) {
1719			error = vfs_suser(mp, req->td);
1720			if (error)
1721				return (error);
1722 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1723 			    sizeof(nmp->nm_tprintf_initial_delay));
1724 			if (error)
1725 				return (error);
1726 			if (nmp->nm_tprintf_initial_delay < 0)
1727 				nmp->nm_tprintf_initial_delay = 0;
1728 		}
1729		break;
1730	default:
1731		return (ENOTSUP);
1732	}
1733	return (0);
1734}
1735
1736/*
1737 * Purge any RPCs in progress, so that they will all return errors.
1738 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1739 * forced dismount.
1740 */
1741static void
1742nfs_purge(struct mount *mp)
1743{
1744	struct nfsmount *nmp = VFSTONFS(mp);
1745
1746	newnfs_nmcancelreqs(nmp);
1747}
1748
1749/*
1750 * Extract the information needed by the nlm from the nfs vnode.
1751 */
1752static void
1753nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1754    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1755    struct timeval *timeop)
1756{
1757	struct nfsmount *nmp;
1758	struct nfsnode *np = VTONFS(vp);
1759
1760	nmp = VFSTONFS(vp->v_mount);
1761	if (fhlenp != NULL)
1762		*fhlenp = (size_t)np->n_fhp->nfh_len;
1763	if (fhp != NULL)
1764		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1765	if (sp != NULL)
1766		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1767	if (is_v3p != NULL)
1768		*is_v3p = NFS_ISV3(vp);
1769	if (sizep != NULL)
1770		*sizep = np->n_size;
1771	if (timeop != NULL) {
1772		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1773		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1774	}
1775}
1776
1777/*
1778 * This function prints out an option name, based on the conditional
1779 * argument.
1780 */
1781static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1782    char *opt, char **buf, size_t *blen)
1783{
1784	int len;
1785
1786	if (testval != 0 && *blen > strlen(opt)) {
1787		len = snprintf(*buf, *blen, "%s", opt);
1788		if (len != strlen(opt))
1789			printf("EEK!!\n");
1790		*buf += len;
1791		*blen -= len;
1792	}
1793}
1794
1795/*
1796 * This function printf out an options integer value.
1797 */
1798static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1799    char *opt, char **buf, size_t *blen)
1800{
1801	int len;
1802
1803	if (*blen > strlen(opt) + 1) {
1804		/* Could result in truncated output string. */
1805		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1806		if (len < *blen) {
1807			*buf += len;
1808			*blen -= len;
1809		}
1810	}
1811}
1812
1813/*
1814 * Load the option flags and values into the buffer.
1815 */
1816void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1817{
1818	char *buf;
1819	size_t blen;
1820
1821	buf = buffer;
1822	blen = buflen;
1823	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1824	    &blen);
1825	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1826		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1827		    &blen);
1828		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1829		    &buf, &blen);
1830	}
1831	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1832	    &blen);
1833	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1834	    "nfsv2", &buf, &blen);
1835	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1836	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1837	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1838	    &buf, &blen);
1839	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1840	    &buf, &blen);
1841	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1842	    &blen);
1843	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1844	    &blen);
1845	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1846	    &blen);
1847	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1848	    &blen);
1849	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1850	    &blen);
1851	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1852	    ",noncontigwr", &buf, &blen);
1853	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1854	    0, ",lockd", &buf, &blen);
1855	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1856	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1857	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1858	    &buf, &blen);
1859	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1860	    &buf, &blen);
1861	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1862	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1863	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1864	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1865	    &buf, &blen);
1866	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1867	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1868	    &buf, &blen);
1869	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1870	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1871	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1872	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1873	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1874	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1875	    &blen);
1876	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1877	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1878	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1879	    &blen);
1880	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1881	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1882	    &blen);
1883	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1884	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1885}
1886
1887