nfs_clvfsops.c revision 301564
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 301564 2016-06-07 20:00:20Z cem $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/mount.h>
55#include <sys/proc.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <netinet/in.h>
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <nfs/nfsdiskless.h>
76
77FEATURE(nfscl, "NFSv4 client");
78
79extern int nfscl_ticks;
80extern struct timeval nfsboottime;
81extern struct nfsstats	newnfsstats;
82extern int nfsrv_useacl;
83extern int nfscl_debuglevel;
84extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86extern struct mtx ncl_iod_mutex;
87NFSCLSTATEMUTEX;
88
89MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
90MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
91
92SYSCTL_DECL(_vfs_nfs);
93static int nfs_ip_paranoia = 1;
94SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95    &nfs_ip_paranoia, 0, "");
96static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99/* how long between console messages "nfs server foo not responding" */
100static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103#ifdef NFS_DEBUG
104int nfs_debug;
105SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
106    "Toggle debug flag");
107#endif
108
109static int	nfs_mountroot(struct mount *);
110static void	nfs_sec_name(char *, int *);
111static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
112		    struct nfs_args *argp, const char *, struct ucred *,
113		    struct thread *);
114static int	mountnfs(struct nfs_args *, struct mount *,
115		    struct sockaddr *, char *, u_char *, int, u_char *, int,
116		    u_char *, int, struct vnode **, struct ucred *,
117		    struct thread *, int, int, int);
118static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
119		    struct sockaddr_storage *, int *, off_t *,
120		    struct timeval *);
121static vfs_mount_t nfs_mount;
122static vfs_cmount_t nfs_cmount;
123static vfs_unmount_t nfs_unmount;
124static vfs_root_t nfs_root;
125static vfs_statfs_t nfs_statfs;
126static vfs_sync_t nfs_sync;
127static vfs_sysctl_t nfs_sysctl;
128static vfs_purge_t nfs_purge;
129
130/*
131 * nfs vfs operations.
132 */
133static struct vfsops nfs_vfsops = {
134	.vfs_init =		ncl_init,
135	.vfs_mount =		nfs_mount,
136	.vfs_cmount =		nfs_cmount,
137	.vfs_root =		nfs_root,
138	.vfs_statfs =		nfs_statfs,
139	.vfs_sync =		nfs_sync,
140	.vfs_uninit =		ncl_uninit,
141	.vfs_unmount =		nfs_unmount,
142	.vfs_sysctl =		nfs_sysctl,
143	.vfs_purge =		nfs_purge,
144};
145VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
146
147/* So that loader and kldload(2) can find us, wherever we are.. */
148MODULE_VERSION(nfs, 1);
149MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
150MODULE_DEPEND(nfs, krpc, 1, 1, 1);
151MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
152MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
153
154/*
155 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
156 * can be shared by both NFS clients. It is declared here so that it
157 * will be defined for kernels built without NFS_ROOT, although it
158 * isn't used in that case.
159 */
160#if !defined(NFS_ROOT)
161struct nfs_diskless	nfs_diskless = { { { 0 } } };
162struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
163int			nfs_diskless_valid = 0;
164#endif
165
166SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
167    &nfs_diskless_valid, 0,
168    "Has the diskless struct been filled correctly");
169
170SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
171    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
172
173SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
174    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
175    "%Ssockaddr_in", "Diskless root nfs address");
176
177
178void		newnfsargs_ntoh(struct nfs_args *);
179static int	nfs_mountdiskless(char *,
180		    struct sockaddr_in *, struct nfs_args *,
181		    struct thread *, struct vnode **, struct mount *);
182static void	nfs_convert_diskless(void);
183static void	nfs_convert_oargs(struct nfs_args *args,
184		    struct onfs_args *oargs);
185
186int
187newnfs_iosize(struct nfsmount *nmp)
188{
189	int iosize, maxio;
190
191	/* First, set the upper limit for iosize */
192	if (nmp->nm_flag & NFSMNT_NFSV4) {
193		maxio = NFS_MAXBSIZE;
194	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
195		if (nmp->nm_sotype == SOCK_DGRAM)
196			maxio = NFS_MAXDGRAMDATA;
197		else
198			maxio = NFS_MAXBSIZE;
199	} else {
200		maxio = NFS_V2MAXDATA;
201	}
202	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
203		nmp->nm_rsize = maxio;
204	if (nmp->nm_rsize > NFS_MAXBSIZE)
205		nmp->nm_rsize = NFS_MAXBSIZE;
206	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
207		nmp->nm_readdirsize = maxio;
208	if (nmp->nm_readdirsize > nmp->nm_rsize)
209		nmp->nm_readdirsize = nmp->nm_rsize;
210	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
211		nmp->nm_wsize = maxio;
212	if (nmp->nm_wsize > NFS_MAXBSIZE)
213		nmp->nm_wsize = NFS_MAXBSIZE;
214
215	/*
216	 * Calculate the size used for io buffers.  Use the larger
217	 * of the two sizes to minimise nfs requests but make sure
218	 * that it is at least one VM page to avoid wasting buffer
219	 * space.  It must also be at least NFS_DIRBLKSIZ, since
220	 * that is the buffer size used for directories.
221	 */
222	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
223	iosize = imax(iosize, PAGE_SIZE);
224	iosize = imax(iosize, NFS_DIRBLKSIZ);
225	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
226	return (iosize);
227}
228
229static void
230nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
231{
232
233	args->version = NFS_ARGSVERSION;
234	args->addr = oargs->addr;
235	args->addrlen = oargs->addrlen;
236	args->sotype = oargs->sotype;
237	args->proto = oargs->proto;
238	args->fh = oargs->fh;
239	args->fhsize = oargs->fhsize;
240	args->flags = oargs->flags;
241	args->wsize = oargs->wsize;
242	args->rsize = oargs->rsize;
243	args->readdirsize = oargs->readdirsize;
244	args->timeo = oargs->timeo;
245	args->retrans = oargs->retrans;
246	args->readahead = oargs->readahead;
247	args->hostname = oargs->hostname;
248}
249
250static void
251nfs_convert_diskless(void)
252{
253
254	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
255		sizeof(struct ifaliasreq));
256	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
257		sizeof(struct sockaddr_in));
258	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
259	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
260		nfsv3_diskless.root_fhsize = NFSX_MYFH;
261		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
262	} else {
263		nfsv3_diskless.root_fhsize = NFSX_V2FH;
264		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
265	}
266	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
267		sizeof(struct sockaddr_in));
268	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
269	nfsv3_diskless.root_time = nfs_diskless.root_time;
270	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
271		MAXHOSTNAMELEN);
272	nfs_diskless_valid = 3;
273}
274
275/*
276 * nfs statfs call
277 */
278static int
279nfs_statfs(struct mount *mp, struct statfs *sbp)
280{
281	struct vnode *vp;
282	struct thread *td;
283	struct nfsmount *nmp = VFSTONFS(mp);
284	struct nfsvattr nfsva;
285	struct nfsfsinfo fs;
286	struct nfsstatfs sb;
287	int error = 0, attrflag, gotfsinfo = 0, ret;
288	struct nfsnode *np;
289
290	td = curthread;
291
292	error = vfs_busy(mp, MBF_NOWAIT);
293	if (error)
294		return (error);
295	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
296	if (error) {
297		vfs_unbusy(mp);
298		return (error);
299	}
300	vp = NFSTOV(np);
301	mtx_lock(&nmp->nm_mtx);
302	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
303		mtx_unlock(&nmp->nm_mtx);
304		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
305		    &attrflag, NULL);
306		if (!error)
307			gotfsinfo = 1;
308	} else
309		mtx_unlock(&nmp->nm_mtx);
310	if (!error)
311		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
312		    &attrflag, NULL);
313	if (error != 0)
314		NFSCL_DEBUG(2, "statfs=%d\n", error);
315	if (attrflag == 0) {
316		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
317		    td->td_ucred, td, &nfsva, NULL, NULL);
318		if (ret) {
319			/*
320			 * Just set default values to get things going.
321			 */
322			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
323			nfsva.na_vattr.va_type = VDIR;
324			nfsva.na_vattr.va_mode = 0777;
325			nfsva.na_vattr.va_nlink = 100;
326			nfsva.na_vattr.va_uid = (uid_t)0;
327			nfsva.na_vattr.va_gid = (gid_t)0;
328			nfsva.na_vattr.va_fileid = 2;
329			nfsva.na_vattr.va_gen = 1;
330			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
331			nfsva.na_vattr.va_size = 512 * 1024;
332		}
333	}
334	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
335	if (!error) {
336	    mtx_lock(&nmp->nm_mtx);
337	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
338		nfscl_loadfsinfo(nmp, &fs);
339	    nfscl_loadsbinfo(nmp, &sb, sbp);
340	    sbp->f_iosize = newnfs_iosize(nmp);
341	    mtx_unlock(&nmp->nm_mtx);
342	    if (sbp != &mp->mnt_stat) {
343		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
344		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
345	    }
346	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
347	} else if (NFS_ISV4(vp)) {
348		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
349	}
350	vput(vp);
351	vfs_unbusy(mp);
352	return (error);
353}
354
355/*
356 * nfs version 3 fsinfo rpc call
357 */
358int
359ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
360    struct thread *td)
361{
362	struct nfsfsinfo fs;
363	struct nfsvattr nfsva;
364	int error, attrflag;
365
366	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
367	if (!error) {
368		if (attrflag)
369			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
370			    1);
371		mtx_lock(&nmp->nm_mtx);
372		nfscl_loadfsinfo(nmp, &fs);
373		mtx_unlock(&nmp->nm_mtx);
374	}
375	return (error);
376}
377
378/*
379 * Mount a remote root fs via. nfs. This depends on the info in the
380 * nfs_diskless structure that has been filled in properly by some primary
381 * bootstrap.
382 * It goes something like this:
383 * - do enough of "ifconfig" by calling ifioctl() so that the system
384 *   can talk to the server
385 * - If nfs_diskless.mygateway is filled in, use that address as
386 *   a default gateway.
387 * - build the rootfs mount point and call mountnfs() to do the rest.
388 *
389 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
390 * structure, as well as other global NFS client variables here, as
391 * nfs_mountroot() will be called once in the boot before any other NFS
392 * client activity occurs.
393 */
394static int
395nfs_mountroot(struct mount *mp)
396{
397	struct thread *td = curthread;
398	struct nfsv3_diskless *nd = &nfsv3_diskless;
399	struct socket *so;
400	struct vnode *vp;
401	struct ifreq ir;
402	int error;
403	u_long l;
404	char buf[128];
405	char *cp;
406
407#if defined(BOOTP_NFSROOT) && defined(BOOTP)
408	bootpc_init();		/* use bootp to get nfs_diskless filled in */
409#elif defined(NFS_ROOT)
410	nfs_setup_diskless();
411#endif
412
413	if (nfs_diskless_valid == 0)
414		return (-1);
415	if (nfs_diskless_valid == 1)
416		nfs_convert_diskless();
417
418	/*
419	 * XXX splnet, so networks will receive...
420	 */
421	splnet();
422
423	/*
424	 * Do enough of ifconfig(8) so that the critical net interface can
425	 * talk to the server.
426	 */
427	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
428	    td->td_ucred, td);
429	if (error)
430		panic("nfs_mountroot: socreate(%04x): %d",
431			nd->myif.ifra_addr.sa_family, error);
432
433#if 0 /* XXX Bad idea */
434	/*
435	 * We might not have been told the right interface, so we pass
436	 * over the first ten interfaces of the same kind, until we get
437	 * one of them configured.
438	 */
439
440	for (i = strlen(nd->myif.ifra_name) - 1;
441		nd->myif.ifra_name[i] >= '0' &&
442		nd->myif.ifra_name[i] <= '9';
443		nd->myif.ifra_name[i] ++) {
444		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
445		if(!error)
446			break;
447	}
448#endif
449	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
450	if (error)
451		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
452	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
453		ir.ifr_mtu = strtol(cp, NULL, 10);
454		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
455		freeenv(cp);
456		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
457		if (error)
458			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
459	}
460	soclose(so);
461
462	/*
463	 * If the gateway field is filled in, set it as the default route.
464	 * Note that pxeboot will set a default route of 0 if the route
465	 * is not set by the DHCP server.  Check also for a value of 0
466	 * to avoid panicking inappropriately in that situation.
467	 */
468	if (nd->mygateway.sin_len != 0 &&
469	    nd->mygateway.sin_addr.s_addr != 0) {
470		struct sockaddr_in mask, sin;
471
472		bzero((caddr_t)&mask, sizeof(mask));
473		sin = mask;
474		sin.sin_family = AF_INET;
475		sin.sin_len = sizeof(sin);
476                /* XXX MRT use table 0 for this sort of thing */
477		CURVNET_SET(TD_TO_VNET(td));
478		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
479		    (struct sockaddr *)&nd->mygateway,
480		    (struct sockaddr *)&mask,
481		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
482		CURVNET_RESTORE();
483		if (error)
484			panic("nfs_mountroot: RTM_ADD: %d", error);
485	}
486
487	/*
488	 * Create the rootfs mount point.
489	 */
490	nd->root_args.fh = nd->root_fh;
491	nd->root_args.fhsize = nd->root_fhsize;
492	l = ntohl(nd->root_saddr.sin_addr.s_addr);
493	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
494		(l >> 24) & 0xff, (l >> 16) & 0xff,
495		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
496	printf("NFS ROOT: %s\n", buf);
497	nd->root_args.hostname = buf;
498	if ((error = nfs_mountdiskless(buf,
499	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
500		return (error);
501	}
502
503	/*
504	 * This is not really an nfs issue, but it is much easier to
505	 * set hostname here and then let the "/etc/rc.xxx" files
506	 * mount the right /var based upon its preset value.
507	 */
508	mtx_lock(&prison0.pr_mtx);
509	strlcpy(prison0.pr_hostname, nd->my_hostnam,
510	    sizeof(prison0.pr_hostname));
511	mtx_unlock(&prison0.pr_mtx);
512	inittodr(ntohl(nd->root_time));
513	return (0);
514}
515
516/*
517 * Internal version of mount system call for diskless setup.
518 */
519static int
520nfs_mountdiskless(char *path,
521    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
522    struct vnode **vpp, struct mount *mp)
523{
524	struct sockaddr *nam;
525	int dirlen, error;
526	char *dirpath;
527
528	/*
529	 * Find the directory path in "path", which also has the server's
530	 * name/ip address in it.
531	 */
532	dirpath = strchr(path, ':');
533	if (dirpath != NULL)
534		dirlen = strlen(++dirpath);
535	else
536		dirlen = 0;
537	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
538	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
539	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
540	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
541		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
542		return (error);
543	}
544	return (0);
545}
546
547static void
548nfs_sec_name(char *sec, int *flagsp)
549{
550	if (!strcmp(sec, "krb5"))
551		*flagsp |= NFSMNT_KERB;
552	else if (!strcmp(sec, "krb5i"))
553		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
554	else if (!strcmp(sec, "krb5p"))
555		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
556}
557
558static void
559nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
560    const char *hostname, struct ucred *cred, struct thread *td)
561{
562	int s;
563	int adjsock;
564	char *p;
565
566	s = splnet();
567
568	/*
569	 * Set read-only flag if requested; otherwise, clear it if this is
570	 * an update.  If this is not an update, then either the read-only
571	 * flag is already clear, or this is a root mount and it was set
572	 * intentionally at some previous point.
573	 */
574	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
575		MNT_ILOCK(mp);
576		mp->mnt_flag |= MNT_RDONLY;
577		MNT_IUNLOCK(mp);
578	} else if (mp->mnt_flag & MNT_UPDATE) {
579		MNT_ILOCK(mp);
580		mp->mnt_flag &= ~MNT_RDONLY;
581		MNT_IUNLOCK(mp);
582	}
583
584	/*
585	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
586	 * no sense in that context.  Also, set up appropriate retransmit
587	 * and soft timeout behavior.
588	 */
589	if (argp->sotype == SOCK_STREAM) {
590		nmp->nm_flag &= ~NFSMNT_NOCONN;
591		nmp->nm_timeo = NFS_MAXTIMEO;
592		if ((argp->flags & NFSMNT_NFSV4) != 0)
593			nmp->nm_retry = INT_MAX;
594		else
595			nmp->nm_retry = NFS_RETRANS_TCP;
596	}
597
598	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
599	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
600		argp->flags &= ~NFSMNT_RDIRPLUS;
601		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
602	}
603
604	/* Re-bind if rsrvd port requested and wasn't on one */
605	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
606		  && (argp->flags & NFSMNT_RESVPORT);
607	/* Also re-bind if we're switching to/from a connected UDP socket */
608	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
609		    (argp->flags & NFSMNT_NOCONN));
610
611	/* Update flags atomically.  Don't change the lock bits. */
612	nmp->nm_flag = argp->flags | nmp->nm_flag;
613	splx(s);
614
615	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
616		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
617		if (nmp->nm_timeo < NFS_MINTIMEO)
618			nmp->nm_timeo = NFS_MINTIMEO;
619		else if (nmp->nm_timeo > NFS_MAXTIMEO)
620			nmp->nm_timeo = NFS_MAXTIMEO;
621	}
622
623	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
624		nmp->nm_retry = argp->retrans;
625		if (nmp->nm_retry > NFS_MAXREXMIT)
626			nmp->nm_retry = NFS_MAXREXMIT;
627	}
628
629	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
630		nmp->nm_wsize = argp->wsize;
631		/*
632		 * Clip at the power of 2 below the size. There is an
633		 * issue (not isolated) that causes intermittent page
634		 * faults if this is not done.
635		 */
636		if (nmp->nm_wsize > NFS_FABLKSIZE)
637			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
638		else
639			nmp->nm_wsize = NFS_FABLKSIZE;
640	}
641
642	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
643		nmp->nm_rsize = argp->rsize;
644		/*
645		 * Clip at the power of 2 below the size. There is an
646		 * issue (not isolated) that causes intermittent page
647		 * faults if this is not done.
648		 */
649		if (nmp->nm_rsize > NFS_FABLKSIZE)
650			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
651		else
652			nmp->nm_rsize = NFS_FABLKSIZE;
653	}
654
655	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
656		nmp->nm_readdirsize = argp->readdirsize;
657	}
658
659	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
660		nmp->nm_acregmin = argp->acregmin;
661	else
662		nmp->nm_acregmin = NFS_MINATTRTIMO;
663	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
664		nmp->nm_acregmax = argp->acregmax;
665	else
666		nmp->nm_acregmax = NFS_MAXATTRTIMO;
667	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
668		nmp->nm_acdirmin = argp->acdirmin;
669	else
670		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
671	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
672		nmp->nm_acdirmax = argp->acdirmax;
673	else
674		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
675	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
676		nmp->nm_acdirmin = nmp->nm_acdirmax;
677	if (nmp->nm_acregmin > nmp->nm_acregmax)
678		nmp->nm_acregmin = nmp->nm_acregmax;
679
680	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
681		if (argp->readahead <= NFS_MAXRAHEAD)
682			nmp->nm_readahead = argp->readahead;
683		else
684			nmp->nm_readahead = NFS_MAXRAHEAD;
685	}
686	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
687		if (argp->wcommitsize < nmp->nm_wsize)
688			nmp->nm_wcommitsize = nmp->nm_wsize;
689		else
690			nmp->nm_wcommitsize = argp->wcommitsize;
691	}
692
693	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
694		    (nmp->nm_soproto != argp->proto));
695
696	if (nmp->nm_client != NULL && adjsock) {
697		int haslock = 0, error = 0;
698
699		if (nmp->nm_sotype == SOCK_STREAM) {
700			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
701			if (!error)
702				haslock = 1;
703		}
704		if (!error) {
705		    newnfs_disconnect(&nmp->nm_sockreq);
706		    if (haslock)
707			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
708		    nmp->nm_sotype = argp->sotype;
709		    nmp->nm_soproto = argp->proto;
710		    if (nmp->nm_sotype == SOCK_DGRAM)
711			while (newnfs_connect(nmp, &nmp->nm_sockreq,
712			    cred, td, 0)) {
713				printf("newnfs_args: retrying connect\n");
714				(void) nfs_catnap(PSOCK, 0, "nfscon");
715			}
716		}
717	} else {
718		nmp->nm_sotype = argp->sotype;
719		nmp->nm_soproto = argp->proto;
720	}
721
722	if (hostname != NULL) {
723		strlcpy(nmp->nm_hostname, hostname,
724		    sizeof(nmp->nm_hostname));
725		p = strchr(nmp->nm_hostname, ':');
726		if (p != NULL)
727			*p = '\0';
728	}
729}
730
731static const char *nfs_opts[] = { "from", "nfs_args",
732    "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
733    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
734    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
735    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
736    "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
737    "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
738    "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
739    "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
740    "pnfs", "wcommitsize",
741    NULL };
742
743/*
744 * Parse the "from" mountarg, passed by the generic mount(8) program
745 * or the mountroot code.  This is used when rerooting into NFS.
746 *
747 * Note that the "hostname" is actually a "hostname:/share/path" string.
748 */
749static int
750nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
751    struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
752{
753	char nam[MNAMELEN + 1];
754	char *delimp, *hostp, *spec;
755	int error, have_bracket = 0, offset, rv, speclen;
756	struct sockaddr_in *sin;
757	size_t len;
758
759	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
760	if (error != 0)
761		return (error);
762
763	/*
764	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
765	 */
766        if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
767            *(delimp + 1) == ':') {
768                hostp = spec + 1;
769                spec = delimp + 2;
770                have_bracket = 1;
771        } else if ((delimp = strrchr(spec, ':')) != NULL) {
772                hostp = spec;
773                spec = delimp + 1;
774        } else if ((delimp = strrchr(spec, '@')) != NULL) {
775                printf("%s: path@server syntax is deprecated, "
776		    "use server:path\n", __func__);
777                hostp = delimp + 1;
778        } else {
779                printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
780                return (EINVAL);
781        }
782        *delimp = '\0';
783
784        /*
785         * If there has been a trailing slash at mounttime it seems
786         * that some mountd implementations fail to remove the mount
787         * entries from their mountlist while unmounting.
788         */
789        for (speclen = strlen(spec);
790                speclen > 1 && spec[speclen - 1] == '/';
791                speclen--)
792                spec[speclen - 1] = '\0';
793        if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
794                printf("%s: %s:%s: name too long", __func__, hostp, spec);
795                return (EINVAL);
796        }
797	/* Make both '@' and ':' notations equal */
798	if (*hostp != '\0') {
799		len = strlen(hostp);
800		offset = 0;
801		if (have_bracket)
802			nam[offset++] = '[';
803		memmove(nam + offset, hostp, len);
804		if (have_bracket)
805			nam[len + offset++] = ']';
806		nam[len + offset++] = ':';
807		memmove(nam + len + offset, spec, speclen);
808		nam[len + speclen + offset] = '\0';
809	} else
810		nam[0] = '\0';
811
812	/*
813	 * XXX: IPv6
814	 */
815	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
816	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
817	if (rv != 1) {
818		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
819		    __func__, hostp, rv);
820		free(sin, M_SONAME);
821		return (EINVAL);
822	}
823
824	sin->sin_len = sizeof(*sin);
825	sin->sin_family = AF_INET;
826	/*
827	 * XXX: hardcoded port number.
828	 */
829	sin->sin_port = htons(2049);
830
831	*hostnamep = strdup(nam, M_NEWNFSMNT);
832	*sinp = sin;
833	strlcpy(dirpath, spec, dirpathsize);
834	*dirlenp = strlen(dirpath);
835
836	return (0);
837}
838
839/*
840 * VFS Operations.
841 *
842 * mount system call
843 * It seems a bit dumb to copyinstr() the host and path here and then
844 * bcopy() them in mountnfs(), but I wanted to detect errors before
845 * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
846 * an error after that means that I have to release the mbuf.
847 */
848/* ARGSUSED */
849static int
850nfs_mount(struct mount *mp)
851{
852	struct nfs_args args = {
853	    .version = NFS_ARGSVERSION,
854	    .addr = NULL,
855	    .addrlen = sizeof (struct sockaddr_in),
856	    .sotype = SOCK_STREAM,
857	    .proto = 0,
858	    .fh = NULL,
859	    .fhsize = 0,
860	    .flags = NFSMNT_RESVPORT,
861	    .wsize = NFS_WSIZE,
862	    .rsize = NFS_RSIZE,
863	    .readdirsize = NFS_READDIRSIZE,
864	    .timeo = 10,
865	    .retrans = NFS_RETRANS,
866	    .readahead = NFS_DEFRAHEAD,
867	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
868	    .hostname = NULL,
869	    .acregmin = NFS_MINATTRTIMO,
870	    .acregmax = NFS_MAXATTRTIMO,
871	    .acdirmin = NFS_MINDIRATTRTIMO,
872	    .acdirmax = NFS_MAXDIRATTRTIMO,
873	};
874	int error = 0, ret, len;
875	struct sockaddr *nam = NULL;
876	struct vnode *vp;
877	struct thread *td;
878	char hst[MNAMELEN];
879	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
880	char *cp, *opt, *name, *secname;
881	int nametimeo = NFS_DEFAULT_NAMETIMEO;
882	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
883	int minvers = 0;
884	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
885	    krbnamelen, srvkrbnamelen;
886	size_t hstlen;
887
888	has_nfs_args_opt = 0;
889	has_nfs_from_opt = 0;
890	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
891		error = EINVAL;
892		goto out;
893	}
894
895	td = curthread;
896	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
897	    nfs_diskless_valid != 0) {
898		error = nfs_mountroot(mp);
899		goto out;
900	}
901
902	nfscl_init();
903
904	/*
905	 * The old mount_nfs program passed the struct nfs_args
906	 * from userspace to kernel.  The new mount_nfs program
907	 * passes string options via nmount() from userspace to kernel
908	 * and we populate the struct nfs_args in the kernel.
909	 */
910	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
911		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
912		    sizeof(args));
913		if (error != 0)
914			goto out;
915
916		if (args.version != NFS_ARGSVERSION) {
917			error = EPROGMISMATCH;
918			goto out;
919		}
920		has_nfs_args_opt = 1;
921	}
922
923	/* Handle the new style options. */
924	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
925		args.acdirmin = args.acdirmax =
926		    args.acregmin = args.acregmax = 0;
927		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
928		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
929	}
930	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
931		args.flags |= NFSMNT_NOCONN;
932	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
933		args.flags &= ~NFSMNT_NOCONN;
934	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
935		args.flags |= NFSMNT_NOLOCKD;
936	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
937		args.flags &= ~NFSMNT_NOLOCKD;
938	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
939		args.flags |= NFSMNT_INT;
940	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
941		args.flags |= NFSMNT_RDIRPLUS;
942	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
943		args.flags |= NFSMNT_RESVPORT;
944	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
945		args.flags &= ~NFSMNT_RESVPORT;
946	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
947		args.flags |= NFSMNT_SOFT;
948	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
949		args.flags &= ~NFSMNT_SOFT;
950	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
951		args.sotype = SOCK_DGRAM;
952	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
953		args.sotype = SOCK_DGRAM;
954	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
955		args.sotype = SOCK_STREAM;
956	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
957		args.flags |= NFSMNT_NFSV3;
958	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
959		args.flags |= NFSMNT_NFSV4;
960		args.sotype = SOCK_STREAM;
961	}
962	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
963		args.flags |= NFSMNT_ALLGSSNAME;
964	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
965		args.flags |= NFSMNT_NOCTO;
966	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
967		args.flags |= NFSMNT_NONCONTIGWR;
968	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
969		args.flags |= NFSMNT_PNFS;
970	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
971		if (opt == NULL) {
972			vfs_mount_error(mp, "illegal readdirsize");
973			error = EINVAL;
974			goto out;
975		}
976		ret = sscanf(opt, "%d", &args.readdirsize);
977		if (ret != 1 || args.readdirsize <= 0) {
978			vfs_mount_error(mp, "illegal readdirsize: %s",
979			    opt);
980			error = EINVAL;
981			goto out;
982		}
983		args.flags |= NFSMNT_READDIRSIZE;
984	}
985	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
986		if (opt == NULL) {
987			vfs_mount_error(mp, "illegal readahead");
988			error = EINVAL;
989			goto out;
990		}
991		ret = sscanf(opt, "%d", &args.readahead);
992		if (ret != 1 || args.readahead <= 0) {
993			vfs_mount_error(mp, "illegal readahead: %s",
994			    opt);
995			error = EINVAL;
996			goto out;
997		}
998		args.flags |= NFSMNT_READAHEAD;
999	}
1000	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1001		if (opt == NULL) {
1002			vfs_mount_error(mp, "illegal wsize");
1003			error = EINVAL;
1004			goto out;
1005		}
1006		ret = sscanf(opt, "%d", &args.wsize);
1007		if (ret != 1 || args.wsize <= 0) {
1008			vfs_mount_error(mp, "illegal wsize: %s",
1009			    opt);
1010			error = EINVAL;
1011			goto out;
1012		}
1013		args.flags |= NFSMNT_WSIZE;
1014	}
1015	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1016		if (opt == NULL) {
1017			vfs_mount_error(mp, "illegal rsize");
1018			error = EINVAL;
1019			goto out;
1020		}
1021		ret = sscanf(opt, "%d", &args.rsize);
1022		if (ret != 1 || args.rsize <= 0) {
1023			vfs_mount_error(mp, "illegal wsize: %s",
1024			    opt);
1025			error = EINVAL;
1026			goto out;
1027		}
1028		args.flags |= NFSMNT_RSIZE;
1029	}
1030	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1031		if (opt == NULL) {
1032			vfs_mount_error(mp, "illegal retrans");
1033			error = EINVAL;
1034			goto out;
1035		}
1036		ret = sscanf(opt, "%d", &args.retrans);
1037		if (ret != 1 || args.retrans <= 0) {
1038			vfs_mount_error(mp, "illegal retrans: %s",
1039			    opt);
1040			error = EINVAL;
1041			goto out;
1042		}
1043		args.flags |= NFSMNT_RETRANS;
1044	}
1045	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1046		ret = sscanf(opt, "%d", &args.acregmin);
1047		if (ret != 1 || args.acregmin < 0) {
1048			vfs_mount_error(mp, "illegal actimeo: %s",
1049			    opt);
1050			error = EINVAL;
1051			goto out;
1052		}
1053		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1054		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1055		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1056	}
1057	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1058		ret = sscanf(opt, "%d", &args.acregmin);
1059		if (ret != 1 || args.acregmin < 0) {
1060			vfs_mount_error(mp, "illegal acregmin: %s",
1061			    opt);
1062			error = EINVAL;
1063			goto out;
1064		}
1065		args.flags |= NFSMNT_ACREGMIN;
1066	}
1067	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1068		ret = sscanf(opt, "%d", &args.acregmax);
1069		if (ret != 1 || args.acregmax < 0) {
1070			vfs_mount_error(mp, "illegal acregmax: %s",
1071			    opt);
1072			error = EINVAL;
1073			goto out;
1074		}
1075		args.flags |= NFSMNT_ACREGMAX;
1076	}
1077	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1078		ret = sscanf(opt, "%d", &args.acdirmin);
1079		if (ret != 1 || args.acdirmin < 0) {
1080			vfs_mount_error(mp, "illegal acdirmin: %s",
1081			    opt);
1082			error = EINVAL;
1083			goto out;
1084		}
1085		args.flags |= NFSMNT_ACDIRMIN;
1086	}
1087	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1088		ret = sscanf(opt, "%d", &args.acdirmax);
1089		if (ret != 1 || args.acdirmax < 0) {
1090			vfs_mount_error(mp, "illegal acdirmax: %s",
1091			    opt);
1092			error = EINVAL;
1093			goto out;
1094		}
1095		args.flags |= NFSMNT_ACDIRMAX;
1096	}
1097	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1098		ret = sscanf(opt, "%d", &args.wcommitsize);
1099		if (ret != 1 || args.wcommitsize < 0) {
1100			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1101			error = EINVAL;
1102			goto out;
1103		}
1104		args.flags |= NFSMNT_WCOMMITSIZE;
1105	}
1106	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1107		ret = sscanf(opt, "%d", &args.timeo);
1108		if (ret != 1 || args.timeo <= 0) {
1109			vfs_mount_error(mp, "illegal timeo: %s",
1110			    opt);
1111			error = EINVAL;
1112			goto out;
1113		}
1114		args.flags |= NFSMNT_TIMEO;
1115	}
1116	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1117		ret = sscanf(opt, "%d", &args.timeo);
1118		if (ret != 1 || args.timeo <= 0) {
1119			vfs_mount_error(mp, "illegal timeout: %s",
1120			    opt);
1121			error = EINVAL;
1122			goto out;
1123		}
1124		args.flags |= NFSMNT_TIMEO;
1125	}
1126	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1127		ret = sscanf(opt, "%d", &nametimeo);
1128		if (ret != 1 || nametimeo < 0) {
1129			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1130			error = EINVAL;
1131			goto out;
1132		}
1133	}
1134	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1135	    == 0) {
1136		ret = sscanf(opt, "%d", &negnametimeo);
1137		if (ret != 1 || negnametimeo < 0) {
1138			vfs_mount_error(mp, "illegal negnametimeo: %s",
1139			    opt);
1140			error = EINVAL;
1141			goto out;
1142		}
1143	}
1144	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1145	    0) {
1146		ret = sscanf(opt, "%d", &minvers);
1147		if (ret != 1 || minvers < 0 || minvers > 1 ||
1148		    (args.flags & NFSMNT_NFSV4) == 0) {
1149			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1150			error = EINVAL;
1151			goto out;
1152		}
1153	}
1154	if (vfs_getopt(mp->mnt_optnew, "sec",
1155		(void **) &secname, NULL) == 0)
1156		nfs_sec_name(secname, &args.flags);
1157
1158	if (mp->mnt_flag & MNT_UPDATE) {
1159		struct nfsmount *nmp = VFSTONFS(mp);
1160
1161		if (nmp == NULL) {
1162			error = EIO;
1163			goto out;
1164		}
1165
1166		/*
1167		 * If a change from TCP->UDP is done and there are thread(s)
1168		 * that have I/O RPC(s) in progress with a transfer size
1169		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1170		 * hung, retrying the RPC(s) forever. Usually these threads
1171		 * will be seen doing an uninterruptible sleep on wait channel
1172		 * "nfsreq".
1173		 */
1174		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1175			tprintf(td->td_proc, LOG_WARNING,
1176	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1177
1178		/*
1179		 * When doing an update, we can't change version,
1180		 * security, switch lockd strategies or change cookie
1181		 * translation
1182		 */
1183		args.flags = (args.flags &
1184		    ~(NFSMNT_NFSV3 |
1185		      NFSMNT_NFSV4 |
1186		      NFSMNT_KERB |
1187		      NFSMNT_INTEGRITY |
1188		      NFSMNT_PRIVACY |
1189		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1190		    (nmp->nm_flag &
1191			(NFSMNT_NFSV3 |
1192			 NFSMNT_NFSV4 |
1193			 NFSMNT_KERB |
1194			 NFSMNT_INTEGRITY |
1195			 NFSMNT_PRIVACY |
1196			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1197		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1198		goto out;
1199	}
1200
1201	/*
1202	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1203	 * or no-connection mode for those protocols that support
1204	 * no-connection mode (the flag will be cleared later for protocols
1205	 * that do not support no-connection mode).  This will allow a client
1206	 * to receive replies from a different IP then the request was
1207	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1208	 * not 0.
1209	 */
1210	if (nfs_ip_paranoia == 0)
1211		args.flags |= NFSMNT_NOCONN;
1212
1213	if (has_nfs_args_opt != 0) {
1214		/*
1215		 * In the 'nfs_args' case, the pointers in the args
1216		 * structure are in userland - we copy them in here.
1217		 */
1218		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1219			vfs_mount_error(mp, "Bad file handle");
1220			error = EINVAL;
1221			goto out;
1222		}
1223		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1224		    args.fhsize);
1225		if (error != 0)
1226			goto out;
1227		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1228		if (error != 0)
1229			goto out;
1230		bzero(&hst[hstlen], MNAMELEN - hstlen);
1231		args.hostname = hst;
1232		/* getsockaddr() call must be after above copyin() calls */
1233		error = getsockaddr(&nam, (caddr_t)args.addr,
1234		    args.addrlen);
1235		if (error != 0)
1236			goto out;
1237	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1238	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1239	    sizeof(dirpath), &dirlen) == 0) {
1240		has_nfs_from_opt = 1;
1241		bcopy(args.hostname, hst, MNAMELEN);
1242		hst[MNAMELEN - 1] = '\0';
1243
1244		/*
1245		 * This only works with NFSv4 for now.
1246		 */
1247		args.fhsize = 0;
1248		args.flags |= NFSMNT_NFSV4;
1249		args.sotype = SOCK_STREAM;
1250	} else {
1251		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1252		    &args.fhsize) == 0) {
1253			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1254				vfs_mount_error(mp, "Bad file handle");
1255				error = EINVAL;
1256				goto out;
1257			}
1258			bcopy(args.fh, nfh, args.fhsize);
1259		} else {
1260			args.fhsize = 0;
1261		}
1262		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1263		    (void **)&args.hostname, &len);
1264		if (args.hostname == NULL) {
1265			vfs_mount_error(mp, "Invalid hostname");
1266			error = EINVAL;
1267			goto out;
1268		}
1269		bcopy(args.hostname, hst, MNAMELEN);
1270		hst[MNAMELEN - 1] = '\0';
1271	}
1272
1273	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1274		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1275	else {
1276		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1277		cp = strchr(srvkrbname, ':');
1278		if (cp != NULL)
1279			*cp = '\0';
1280	}
1281	srvkrbnamelen = strlen(srvkrbname);
1282
1283	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1284		strlcpy(krbname, name, sizeof (krbname));
1285	else
1286		krbname[0] = '\0';
1287	krbnamelen = strlen(krbname);
1288
1289	if (has_nfs_from_opt == 0) {
1290		if (vfs_getopt(mp->mnt_optnew,
1291		    "dirpath", (void **)&name, NULL) == 0)
1292			strlcpy(dirpath, name, sizeof (dirpath));
1293		else
1294			dirpath[0] = '\0';
1295		dirlen = strlen(dirpath);
1296	}
1297
1298	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1299		if (vfs_getopt(mp->mnt_optnew, "addr",
1300		    (void **)&args.addr, &args.addrlen) == 0) {
1301			if (args.addrlen > SOCK_MAXADDRLEN) {
1302				error = ENAMETOOLONG;
1303				goto out;
1304			}
1305			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1306			bcopy(args.addr, nam, args.addrlen);
1307			nam->sa_len = args.addrlen;
1308		} else {
1309			vfs_mount_error(mp, "No server address");
1310			error = EINVAL;
1311			goto out;
1312		}
1313	}
1314
1315	args.fh = nfh;
1316	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1317	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1318	    nametimeo, negnametimeo, minvers);
1319out:
1320	if (!error) {
1321		MNT_ILOCK(mp);
1322		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1323		    MNTK_USES_BCACHE;
1324		MNT_IUNLOCK(mp);
1325	}
1326	return (error);
1327}
1328
1329
1330/*
1331 * VFS Operations.
1332 *
1333 * mount system call
1334 * It seems a bit dumb to copyinstr() the host and path here and then
1335 * bcopy() them in mountnfs(), but I wanted to detect errors before
1336 * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1337 * an error after that means that I have to release the mbuf.
1338 */
1339/* ARGSUSED */
1340static int
1341nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1342{
1343	int error;
1344	struct nfs_args args;
1345
1346	error = copyin(data, &args, sizeof (struct nfs_args));
1347	if (error)
1348		return error;
1349
1350	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1351
1352	error = kernel_mount(ma, flags);
1353	return (error);
1354}
1355
1356/*
1357 * Common code for mount and mountroot
1358 */
1359static int
1360mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1361    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1362    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1363    struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1364    int minvers)
1365{
1366	struct nfsmount *nmp;
1367	struct nfsnode *np;
1368	int error, trycnt, ret;
1369	struct nfsvattr nfsva;
1370	struct nfsclclient *clp;
1371	struct nfsclds *dsp, *tdsp;
1372	uint32_t lease;
1373	static u_int64_t clval = 0;
1374
1375	NFSCL_DEBUG(3, "in mnt\n");
1376	clp = NULL;
1377	if (mp->mnt_flag & MNT_UPDATE) {
1378		nmp = VFSTONFS(mp);
1379		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1380		FREE(nam, M_SONAME);
1381		return (0);
1382	} else {
1383		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1384		    krbnamelen + dirlen + srvkrbnamelen + 2,
1385		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1386		TAILQ_INIT(&nmp->nm_bufq);
1387		if (clval == 0)
1388			clval = (u_int64_t)nfsboottime.tv_sec;
1389		nmp->nm_clval = clval++;
1390		nmp->nm_krbnamelen = krbnamelen;
1391		nmp->nm_dirpathlen = dirlen;
1392		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1393		if (td->td_ucred->cr_uid != (uid_t)0) {
1394			/*
1395			 * nm_uid is used to get KerberosV credentials for
1396			 * the nfsv4 state handling operations if there is
1397			 * no host based principal set. Use the uid of
1398			 * this user if not root, since they are doing the
1399			 * mount. I don't think setting this for root will
1400			 * work, since root normally does not have user
1401			 * credentials in a credentials cache.
1402			 */
1403			nmp->nm_uid = td->td_ucred->cr_uid;
1404		} else {
1405			/*
1406			 * Just set to -1, so it won't be used.
1407			 */
1408			nmp->nm_uid = (uid_t)-1;
1409		}
1410
1411		/* Copy and null terminate all the names */
1412		if (nmp->nm_krbnamelen > 0) {
1413			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1414			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1415		}
1416		if (nmp->nm_dirpathlen > 0) {
1417			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1418			    nmp->nm_dirpathlen);
1419			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1420			    + 1] = '\0';
1421		}
1422		if (nmp->nm_srvkrbnamelen > 0) {
1423			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1424			    nmp->nm_srvkrbnamelen);
1425			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1426			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1427		}
1428		nmp->nm_sockreq.nr_cred = crhold(cred);
1429		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1430		mp->mnt_data = nmp;
1431		nmp->nm_getinfo = nfs_getnlminfo;
1432		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1433	}
1434	vfs_getnewfsid(mp);
1435	nmp->nm_mountp = mp;
1436	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1437
1438	/*
1439	 * Since nfs_decode_args() might optionally set them, these
1440	 * need to be set to defaults before the call, so that the
1441	 * optional settings aren't overwritten.
1442	 */
1443	nmp->nm_nametimeo = nametimeo;
1444	nmp->nm_negnametimeo = negnametimeo;
1445	nmp->nm_timeo = NFS_TIMEO;
1446	nmp->nm_retry = NFS_RETRANS;
1447	nmp->nm_readahead = NFS_DEFRAHEAD;
1448
1449	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1450	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1451	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1452		nmp->nm_wcommitsize *= 2;
1453	nmp->nm_wcommitsize *= 256;
1454
1455	if ((argp->flags & NFSMNT_NFSV4) != 0)
1456		nmp->nm_minorvers = minvers;
1457	else
1458		nmp->nm_minorvers = 0;
1459
1460	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1461
1462	/*
1463	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1464	 * high, depending on whether we end up with negative offsets in
1465	 * the client or server somewhere.  2GB-1 may be safer.
1466	 *
1467	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1468	 * that we can handle until we find out otherwise.
1469	 */
1470	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1471		nmp->nm_maxfilesize = 0xffffffffLL;
1472	else
1473		nmp->nm_maxfilesize = OFF_MAX;
1474
1475	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1476		nmp->nm_wsize = NFS_WSIZE;
1477		nmp->nm_rsize = NFS_RSIZE;
1478		nmp->nm_readdirsize = NFS_READDIRSIZE;
1479	}
1480	nmp->nm_numgrps = NFS_MAXGRPS;
1481	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1482	if (nmp->nm_tprintf_delay < 0)
1483		nmp->nm_tprintf_delay = 0;
1484	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1485	if (nmp->nm_tprintf_initial_delay < 0)
1486		nmp->nm_tprintf_initial_delay = 0;
1487	nmp->nm_fhsize = argp->fhsize;
1488	if (nmp->nm_fhsize > 0)
1489		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1490	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1491	nmp->nm_nam = nam;
1492	/* Set up the sockets and per-host congestion */
1493	nmp->nm_sotype = argp->sotype;
1494	nmp->nm_soproto = argp->proto;
1495	nmp->nm_sockreq.nr_prog = NFS_PROG;
1496	if ((argp->flags & NFSMNT_NFSV4))
1497		nmp->nm_sockreq.nr_vers = NFS_VER4;
1498	else if ((argp->flags & NFSMNT_NFSV3))
1499		nmp->nm_sockreq.nr_vers = NFS_VER3;
1500	else
1501		nmp->nm_sockreq.nr_vers = NFS_VER2;
1502
1503
1504	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1505		goto bad;
1506	/* For NFSv4.1, get the clientid now. */
1507	if (nmp->nm_minorvers > 0) {
1508		NFSCL_DEBUG(3, "at getcl\n");
1509		error = nfscl_getcl(mp, cred, td, 0, &clp);
1510		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1511		if (error != 0)
1512			goto bad;
1513	}
1514
1515	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1516	    nmp->nm_dirpathlen > 0) {
1517		NFSCL_DEBUG(3, "in dirp\n");
1518		/*
1519		 * If the fhsize on the mount point == 0 for V4, the mount
1520		 * path needs to be looked up.
1521		 */
1522		trycnt = 3;
1523		do {
1524			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1525			    cred, td);
1526			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1527			if (error)
1528				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1529		} while (error && --trycnt > 0);
1530		if (error) {
1531			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1532			goto bad;
1533		}
1534	}
1535
1536	/*
1537	 * A reference count is needed on the nfsnode representing the
1538	 * remote root.  If this object is not persistent, then backward
1539	 * traversals of the mount point (i.e. "..") will not work if
1540	 * the nfsnode gets flushed out of the cache. Ufs does not have
1541	 * this problem, because one can identify root inodes by their
1542	 * number == ROOTINO (2).
1543	 */
1544	if (nmp->nm_fhsize > 0) {
1545		/*
1546		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1547		 * non-zero for the root vnode. f_iosize will be set correctly
1548		 * by nfs_statfs() before any I/O occurs.
1549		 */
1550		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1551		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1552		    LK_EXCLUSIVE);
1553		if (error)
1554			goto bad;
1555		*vpp = NFSTOV(np);
1556
1557		/*
1558		 * Get file attributes and transfer parameters for the
1559		 * mountpoint.  This has the side effect of filling in
1560		 * (*vpp)->v_type with the correct value.
1561		 */
1562		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1563		    cred, td, &nfsva, NULL, &lease);
1564		if (ret) {
1565			/*
1566			 * Just set default values to get things going.
1567			 */
1568			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1569			nfsva.na_vattr.va_type = VDIR;
1570			nfsva.na_vattr.va_mode = 0777;
1571			nfsva.na_vattr.va_nlink = 100;
1572			nfsva.na_vattr.va_uid = (uid_t)0;
1573			nfsva.na_vattr.va_gid = (gid_t)0;
1574			nfsva.na_vattr.va_fileid = 2;
1575			nfsva.na_vattr.va_gen = 1;
1576			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1577			nfsva.na_vattr.va_size = 512 * 1024;
1578			lease = 60;
1579		}
1580		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1581		if (nmp->nm_minorvers > 0) {
1582			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1583			NFSLOCKCLSTATE();
1584			clp->nfsc_renew = NFSCL_RENEW(lease);
1585			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1586			clp->nfsc_clientidrev++;
1587			if (clp->nfsc_clientidrev == 0)
1588				clp->nfsc_clientidrev++;
1589			NFSUNLOCKCLSTATE();
1590			/*
1591			 * Mount will succeed, so the renew thread can be
1592			 * started now.
1593			 */
1594			nfscl_start_renewthread(clp);
1595			nfscl_clientrelease(clp);
1596		}
1597		if (argp->flags & NFSMNT_NFSV3)
1598			ncl_fsinfo(nmp, *vpp, cred, td);
1599
1600		/* Mark if the mount point supports NFSv4 ACLs. */
1601		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1602		    ret == 0 &&
1603		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1604			MNT_ILOCK(mp);
1605			mp->mnt_flag |= MNT_NFS4ACLS;
1606			MNT_IUNLOCK(mp);
1607		}
1608
1609		/*
1610		 * Lose the lock but keep the ref.
1611		 */
1612		NFSVOPUNLOCK(*vpp, 0);
1613		return (0);
1614	}
1615	error = EIO;
1616
1617bad:
1618	if (clp != NULL)
1619		nfscl_clientrelease(clp);
1620	newnfs_disconnect(&nmp->nm_sockreq);
1621	crfree(nmp->nm_sockreq.nr_cred);
1622	if (nmp->nm_sockreq.nr_auth != NULL)
1623		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1624	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1625	mtx_destroy(&nmp->nm_mtx);
1626	if (nmp->nm_clp != NULL) {
1627		NFSLOCKCLSTATE();
1628		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1629		NFSUNLOCKCLSTATE();
1630		free(nmp->nm_clp, M_NFSCLCLIENT);
1631	}
1632	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1633		nfscl_freenfsclds(dsp);
1634	FREE(nmp, M_NEWNFSMNT);
1635	FREE(nam, M_SONAME);
1636	return (error);
1637}
1638
1639/*
1640 * unmount system call
1641 */
1642static int
1643nfs_unmount(struct mount *mp, int mntflags)
1644{
1645	struct thread *td;
1646	struct nfsmount *nmp;
1647	int error, flags = 0, i, trycnt = 0;
1648	struct nfsclds *dsp, *tdsp;
1649
1650	td = curthread;
1651
1652	if (mntflags & MNT_FORCE)
1653		flags |= FORCECLOSE;
1654	nmp = VFSTONFS(mp);
1655	/*
1656	 * Goes something like this..
1657	 * - Call vflush() to clear out vnodes for this filesystem
1658	 * - Close the socket
1659	 * - Free up the data structures
1660	 */
1661	/* In the forced case, cancel any outstanding requests. */
1662	if (mntflags & MNT_FORCE) {
1663		error = newnfs_nmcancelreqs(nmp);
1664		if (error)
1665			goto out;
1666		/* For a forced close, get rid of the renew thread now */
1667		nfscl_umount(nmp, td);
1668	}
1669	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1670	do {
1671		error = vflush(mp, 1, flags, td);
1672		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1673			(void) nfs_catnap(PSOCK, error, "newndm");
1674	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1675	if (error)
1676		goto out;
1677
1678	/*
1679	 * We are now committed to the unmount.
1680	 */
1681	if ((mntflags & MNT_FORCE) == 0)
1682		nfscl_umount(nmp, td);
1683	/* Make sure no nfsiods are assigned to this mount. */
1684	mtx_lock(&ncl_iod_mutex);
1685	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1686		if (ncl_iodmount[i] == nmp) {
1687			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1688			ncl_iodmount[i] = NULL;
1689		}
1690	mtx_unlock(&ncl_iod_mutex);
1691	newnfs_disconnect(&nmp->nm_sockreq);
1692	crfree(nmp->nm_sockreq.nr_cred);
1693	FREE(nmp->nm_nam, M_SONAME);
1694	if (nmp->nm_sockreq.nr_auth != NULL)
1695		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1696	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1697	mtx_destroy(&nmp->nm_mtx);
1698	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1699		nfscl_freenfsclds(dsp);
1700	FREE(nmp, M_NEWNFSMNT);
1701out:
1702	return (error);
1703}
1704
1705/*
1706 * Return root of a filesystem
1707 */
1708static int
1709nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1710{
1711	struct vnode *vp;
1712	struct nfsmount *nmp;
1713	struct nfsnode *np;
1714	int error;
1715
1716	nmp = VFSTONFS(mp);
1717	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1718	if (error)
1719		return error;
1720	vp = NFSTOV(np);
1721	/*
1722	 * Get transfer parameters and attributes for root vnode once.
1723	 */
1724	mtx_lock(&nmp->nm_mtx);
1725	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1726		mtx_unlock(&nmp->nm_mtx);
1727		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1728	} else
1729		mtx_unlock(&nmp->nm_mtx);
1730	if (vp->v_type == VNON)
1731	    vp->v_type = VDIR;
1732	vp->v_vflag |= VV_ROOT;
1733	*vpp = vp;
1734	return (0);
1735}
1736
1737/*
1738 * Flush out the buffer cache
1739 */
1740/* ARGSUSED */
1741static int
1742nfs_sync(struct mount *mp, int waitfor)
1743{
1744	struct vnode *vp, *mvp;
1745	struct thread *td;
1746	int error, allerror = 0;
1747
1748	td = curthread;
1749
1750	MNT_ILOCK(mp);
1751	/*
1752	 * If a forced dismount is in progress, return from here so that
1753	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1754	 * calling VFS_UNMOUNT().
1755	 */
1756	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1757		MNT_IUNLOCK(mp);
1758		return (EBADF);
1759	}
1760	MNT_IUNLOCK(mp);
1761
1762	/*
1763	 * Force stale buffer cache information to be flushed.
1764	 */
1765loop:
1766	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1767		/* XXX Racy bv_cnt check. */
1768		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1769		    waitfor == MNT_LAZY) {
1770			VI_UNLOCK(vp);
1771			continue;
1772		}
1773		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1774			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1775			goto loop;
1776		}
1777		error = VOP_FSYNC(vp, waitfor, td);
1778		if (error)
1779			allerror = error;
1780		NFSVOPUNLOCK(vp, 0);
1781		vrele(vp);
1782	}
1783	return (allerror);
1784}
1785
1786static int
1787nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1788{
1789	struct nfsmount *nmp = VFSTONFS(mp);
1790	struct vfsquery vq;
1791	int error;
1792
1793	bzero(&vq, sizeof(vq));
1794	switch (op) {
1795#if 0
1796	case VFS_CTL_NOLOCKS:
1797		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1798 		if (req->oldptr != NULL) {
1799 			error = SYSCTL_OUT(req, &val, sizeof(val));
1800 			if (error)
1801 				return (error);
1802 		}
1803 		if (req->newptr != NULL) {
1804 			error = SYSCTL_IN(req, &val, sizeof(val));
1805 			if (error)
1806 				return (error);
1807			if (val)
1808				nmp->nm_flag |= NFSMNT_NOLOCKS;
1809			else
1810				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1811 		}
1812		break;
1813#endif
1814	case VFS_CTL_QUERY:
1815		mtx_lock(&nmp->nm_mtx);
1816		if (nmp->nm_state & NFSSTA_TIMEO)
1817			vq.vq_flags |= VQ_NOTRESP;
1818		mtx_unlock(&nmp->nm_mtx);
1819#if 0
1820		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1821		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1822			vq.vq_flags |= VQ_NOTRESPLOCK;
1823#endif
1824		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1825		break;
1826 	case VFS_CTL_TIMEO:
1827 		if (req->oldptr != NULL) {
1828 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1829 			    sizeof(nmp->nm_tprintf_initial_delay));
1830 			if (error)
1831 				return (error);
1832 		}
1833 		if (req->newptr != NULL) {
1834			error = vfs_suser(mp, req->td);
1835			if (error)
1836				return (error);
1837 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1838 			    sizeof(nmp->nm_tprintf_initial_delay));
1839 			if (error)
1840 				return (error);
1841 			if (nmp->nm_tprintf_initial_delay < 0)
1842 				nmp->nm_tprintf_initial_delay = 0;
1843 		}
1844		break;
1845	default:
1846		return (ENOTSUP);
1847	}
1848	return (0);
1849}
1850
1851/*
1852 * Purge any RPCs in progress, so that they will all return errors.
1853 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1854 * forced dismount.
1855 */
1856static void
1857nfs_purge(struct mount *mp)
1858{
1859	struct nfsmount *nmp = VFSTONFS(mp);
1860
1861	newnfs_nmcancelreqs(nmp);
1862}
1863
1864/*
1865 * Extract the information needed by the nlm from the nfs vnode.
1866 */
1867static void
1868nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1869    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1870    struct timeval *timeop)
1871{
1872	struct nfsmount *nmp;
1873	struct nfsnode *np = VTONFS(vp);
1874
1875	nmp = VFSTONFS(vp->v_mount);
1876	if (fhlenp != NULL)
1877		*fhlenp = (size_t)np->n_fhp->nfh_len;
1878	if (fhp != NULL)
1879		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1880	if (sp != NULL)
1881		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1882	if (is_v3p != NULL)
1883		*is_v3p = NFS_ISV3(vp);
1884	if (sizep != NULL)
1885		*sizep = np->n_size;
1886	if (timeop != NULL) {
1887		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1888		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1889	}
1890}
1891
1892/*
1893 * This function prints out an option name, based on the conditional
1894 * argument.
1895 */
1896static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1897    char *opt, char **buf, size_t *blen)
1898{
1899	int len;
1900
1901	if (testval != 0 && *blen > strlen(opt)) {
1902		len = snprintf(*buf, *blen, "%s", opt);
1903		if (len != strlen(opt))
1904			printf("EEK!!\n");
1905		*buf += len;
1906		*blen -= len;
1907	}
1908}
1909
1910/*
1911 * This function printf out an options integer value.
1912 */
1913static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1914    char *opt, char **buf, size_t *blen)
1915{
1916	int len;
1917
1918	if (*blen > strlen(opt) + 1) {
1919		/* Could result in truncated output string. */
1920		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1921		if (len < *blen) {
1922			*buf += len;
1923			*blen -= len;
1924		}
1925	}
1926}
1927
1928/*
1929 * Load the option flags and values into the buffer.
1930 */
1931void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1932{
1933	char *buf;
1934	size_t blen;
1935
1936	buf = buffer;
1937	blen = buflen;
1938	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1939	    &blen);
1940	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1941		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1942		    &blen);
1943		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1944		    &buf, &blen);
1945	}
1946	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1947	    &blen);
1948	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1949	    "nfsv2", &buf, &blen);
1950	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1951	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1952	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1953	    &buf, &blen);
1954	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1955	    &buf, &blen);
1956	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1957	    &blen);
1958	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1959	    &blen);
1960	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1961	    &blen);
1962	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1963	    &blen);
1964	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1965	    &blen);
1966	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1967	    ",noncontigwr", &buf, &blen);
1968	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1969	    0, ",lockd", &buf, &blen);
1970	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1971	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1972	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1973	    &buf, &blen);
1974	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1975	    &buf, &blen);
1976	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1977	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1978	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1979	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1980	    &buf, &blen);
1981	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1982	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1983	    &buf, &blen);
1984	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1985	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1986	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1987	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1988	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1989	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1990	    &blen);
1991	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1992	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1993	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1994	    &blen);
1995	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1996	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1997	    &blen);
1998	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1999	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2000}
2001
2002