nfs_clvfsops.c revision 330897
1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1993, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: stable/11/sys/fs/nfsclient/nfs_clvfsops.c 330897 2018-03-14 03:19:51Z eadler $");
39
40
41#include "opt_bootp.h"
42#include "opt_nfsroot.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/bio.h>
48#include <sys/buf.h>
49#include <sys/clock.h>
50#include <sys/jail.h>
51#include <sys/limits.h>
52#include <sys/lock.h>
53#include <sys/malloc.h>
54#include <sys/mbuf.h>
55#include <sys/module.h>
56#include <sys/mount.h>
57#include <sys/proc.h>
58#include <sys/socket.h>
59#include <sys/socketvar.h>
60#include <sys/sockio.h>
61#include <sys/sysctl.h>
62#include <sys/vnode.h>
63#include <sys/signalvar.h>
64
65#include <vm/vm.h>
66#include <vm/vm_extern.h>
67#include <vm/uma.h>
68
69#include <net/if.h>
70#include <net/route.h>
71#include <netinet/in.h>
72
73#include <fs/nfs/nfsport.h>
74#include <fs/nfsclient/nfsnode.h>
75#include <fs/nfsclient/nfsmount.h>
76#include <fs/nfsclient/nfs.h>
77#include <nfs/nfsdiskless.h>
78
79FEATURE(nfscl, "NFSv4 client");
80
81extern int nfscl_ticks;
82extern struct timeval nfsboottime;
83extern int nfsrv_useacl;
84extern int nfscl_debuglevel;
85extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
86extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
87extern struct mtx ncl_iod_mutex;
88NFSCLSTATEMUTEX;
89
90MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
91MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
92
93SYSCTL_DECL(_vfs_nfs);
94static int nfs_ip_paranoia = 1;
95SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
96    &nfs_ip_paranoia, 0, "");
97static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
98SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
99        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
100/* how long between console messages "nfs server foo not responding" */
101static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
102SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
103        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
104#ifdef NFS_DEBUG
105int nfs_debug;
106SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
107    "Toggle debug flag");
108#endif
109
110static int	nfs_mountroot(struct mount *);
111static void	nfs_sec_name(char *, int *);
112static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
113		    struct nfs_args *argp, const char *, struct ucred *,
114		    struct thread *);
115static int	mountnfs(struct nfs_args *, struct mount *,
116		    struct sockaddr *, char *, u_char *, int, u_char *, int,
117		    u_char *, int, struct vnode **, struct ucred *,
118		    struct thread *, int, int, int);
119static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
120		    struct sockaddr_storage *, int *, off_t *,
121		    struct timeval *);
122static vfs_mount_t nfs_mount;
123static vfs_cmount_t nfs_cmount;
124static vfs_unmount_t nfs_unmount;
125static vfs_root_t nfs_root;
126static vfs_statfs_t nfs_statfs;
127static vfs_sync_t nfs_sync;
128static vfs_sysctl_t nfs_sysctl;
129static vfs_purge_t nfs_purge;
130
131/*
132 * nfs vfs operations.
133 */
134static struct vfsops nfs_vfsops = {
135	.vfs_init =		ncl_init,
136	.vfs_mount =		nfs_mount,
137	.vfs_cmount =		nfs_cmount,
138	.vfs_root =		nfs_root,
139	.vfs_statfs =		nfs_statfs,
140	.vfs_sync =		nfs_sync,
141	.vfs_uninit =		ncl_uninit,
142	.vfs_unmount =		nfs_unmount,
143	.vfs_sysctl =		nfs_sysctl,
144	.vfs_purge =		nfs_purge,
145};
146VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
147
148/* So that loader and kldload(2) can find us, wherever we are.. */
149MODULE_VERSION(nfs, 1);
150MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
151MODULE_DEPEND(nfs, krpc, 1, 1, 1);
152MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
153MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
154
155/*
156 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
157 * can be shared by both NFS clients. It is declared here so that it
158 * will be defined for kernels built without NFS_ROOT, although it
159 * isn't used in that case.
160 */
161#if !defined(NFS_ROOT)
162struct nfs_diskless	nfs_diskless = { { { 0 } } };
163struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
164int			nfs_diskless_valid = 0;
165#endif
166
167SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
168    &nfs_diskless_valid, 0,
169    "Has the diskless struct been filled correctly");
170
171SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
172    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
173
174SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
175    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
176    "%Ssockaddr_in", "Diskless root nfs address");
177
178
179void		newnfsargs_ntoh(struct nfs_args *);
180static int	nfs_mountdiskless(char *,
181		    struct sockaddr_in *, struct nfs_args *,
182		    struct thread *, struct vnode **, struct mount *);
183static void	nfs_convert_diskless(void);
184static void	nfs_convert_oargs(struct nfs_args *args,
185		    struct onfs_args *oargs);
186
187int
188newnfs_iosize(struct nfsmount *nmp)
189{
190	int iosize, maxio;
191
192	/* First, set the upper limit for iosize */
193	if (nmp->nm_flag & NFSMNT_NFSV4) {
194		maxio = NFS_MAXBSIZE;
195	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
196		if (nmp->nm_sotype == SOCK_DGRAM)
197			maxio = NFS_MAXDGRAMDATA;
198		else
199			maxio = NFS_MAXBSIZE;
200	} else {
201		maxio = NFS_V2MAXDATA;
202	}
203	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
204		nmp->nm_rsize = maxio;
205	if (nmp->nm_rsize > NFS_MAXBSIZE)
206		nmp->nm_rsize = NFS_MAXBSIZE;
207	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
208		nmp->nm_readdirsize = maxio;
209	if (nmp->nm_readdirsize > nmp->nm_rsize)
210		nmp->nm_readdirsize = nmp->nm_rsize;
211	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
212		nmp->nm_wsize = maxio;
213	if (nmp->nm_wsize > NFS_MAXBSIZE)
214		nmp->nm_wsize = NFS_MAXBSIZE;
215
216	/*
217	 * Calculate the size used for io buffers.  Use the larger
218	 * of the two sizes to minimise nfs requests but make sure
219	 * that it is at least one VM page to avoid wasting buffer
220	 * space.  It must also be at least NFS_DIRBLKSIZ, since
221	 * that is the buffer size used for directories.
222	 */
223	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
224	iosize = imax(iosize, PAGE_SIZE);
225	iosize = imax(iosize, NFS_DIRBLKSIZ);
226	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
227	return (iosize);
228}
229
230static void
231nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
232{
233
234	args->version = NFS_ARGSVERSION;
235	args->addr = oargs->addr;
236	args->addrlen = oargs->addrlen;
237	args->sotype = oargs->sotype;
238	args->proto = oargs->proto;
239	args->fh = oargs->fh;
240	args->fhsize = oargs->fhsize;
241	args->flags = oargs->flags;
242	args->wsize = oargs->wsize;
243	args->rsize = oargs->rsize;
244	args->readdirsize = oargs->readdirsize;
245	args->timeo = oargs->timeo;
246	args->retrans = oargs->retrans;
247	args->readahead = oargs->readahead;
248	args->hostname = oargs->hostname;
249}
250
251static void
252nfs_convert_diskless(void)
253{
254
255	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
256		sizeof(struct ifaliasreq));
257	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
258		sizeof(struct sockaddr_in));
259	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
260	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
261		nfsv3_diskless.root_fhsize = NFSX_MYFH;
262		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
263	} else {
264		nfsv3_diskless.root_fhsize = NFSX_V2FH;
265		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
266	}
267	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
268		sizeof(struct sockaddr_in));
269	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
270	nfsv3_diskless.root_time = nfs_diskless.root_time;
271	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
272		MAXHOSTNAMELEN);
273	nfs_diskless_valid = 3;
274}
275
276/*
277 * nfs statfs call
278 */
279static int
280nfs_statfs(struct mount *mp, struct statfs *sbp)
281{
282	struct vnode *vp;
283	struct thread *td;
284	struct nfsmount *nmp = VFSTONFS(mp);
285	struct nfsvattr nfsva;
286	struct nfsfsinfo fs;
287	struct nfsstatfs sb;
288	int error = 0, attrflag, gotfsinfo = 0, ret;
289	struct nfsnode *np;
290
291	td = curthread;
292
293	error = vfs_busy(mp, MBF_NOWAIT);
294	if (error)
295		return (error);
296	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
297	if (error) {
298		vfs_unbusy(mp);
299		return (error);
300	}
301	vp = NFSTOV(np);
302	mtx_lock(&nmp->nm_mtx);
303	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
304		mtx_unlock(&nmp->nm_mtx);
305		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
306		    &attrflag, NULL);
307		if (!error)
308			gotfsinfo = 1;
309	} else
310		mtx_unlock(&nmp->nm_mtx);
311	if (!error)
312		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
313		    &attrflag, NULL);
314	if (error != 0)
315		NFSCL_DEBUG(2, "statfs=%d\n", error);
316	if (attrflag == 0) {
317		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
318		    td->td_ucred, td, &nfsva, NULL, NULL);
319		if (ret) {
320			/*
321			 * Just set default values to get things going.
322			 */
323			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
324			nfsva.na_vattr.va_type = VDIR;
325			nfsva.na_vattr.va_mode = 0777;
326			nfsva.na_vattr.va_nlink = 100;
327			nfsva.na_vattr.va_uid = (uid_t)0;
328			nfsva.na_vattr.va_gid = (gid_t)0;
329			nfsva.na_vattr.va_fileid = 2;
330			nfsva.na_vattr.va_gen = 1;
331			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
332			nfsva.na_vattr.va_size = 512 * 1024;
333		}
334	}
335	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
336	if (!error) {
337	    mtx_lock(&nmp->nm_mtx);
338	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
339		nfscl_loadfsinfo(nmp, &fs);
340	    nfscl_loadsbinfo(nmp, &sb, sbp);
341	    sbp->f_iosize = newnfs_iosize(nmp);
342	    mtx_unlock(&nmp->nm_mtx);
343	    if (sbp != &mp->mnt_stat) {
344		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
345		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
346	    }
347	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
348	} else if (NFS_ISV4(vp)) {
349		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
350	}
351	vput(vp);
352	vfs_unbusy(mp);
353	return (error);
354}
355
356/*
357 * nfs version 3 fsinfo rpc call
358 */
359int
360ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
361    struct thread *td)
362{
363	struct nfsfsinfo fs;
364	struct nfsvattr nfsva;
365	int error, attrflag;
366
367	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
368	if (!error) {
369		if (attrflag)
370			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
371			    1);
372		mtx_lock(&nmp->nm_mtx);
373		nfscl_loadfsinfo(nmp, &fs);
374		mtx_unlock(&nmp->nm_mtx);
375	}
376	return (error);
377}
378
379/*
380 * Mount a remote root fs via. nfs. This depends on the info in the
381 * nfs_diskless structure that has been filled in properly by some primary
382 * bootstrap.
383 * It goes something like this:
384 * - do enough of "ifconfig" by calling ifioctl() so that the system
385 *   can talk to the server
386 * - If nfs_diskless.mygateway is filled in, use that address as
387 *   a default gateway.
388 * - build the rootfs mount point and call mountnfs() to do the rest.
389 *
390 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
391 * structure, as well as other global NFS client variables here, as
392 * nfs_mountroot() will be called once in the boot before any other NFS
393 * client activity occurs.
394 */
395static int
396nfs_mountroot(struct mount *mp)
397{
398	struct thread *td = curthread;
399	struct nfsv3_diskless *nd = &nfsv3_diskless;
400	struct socket *so;
401	struct vnode *vp;
402	struct ifreq ir;
403	int error;
404	u_long l;
405	char buf[128];
406	char *cp;
407
408#if defined(BOOTP_NFSROOT) && defined(BOOTP)
409	bootpc_init();		/* use bootp to get nfs_diskless filled in */
410#elif defined(NFS_ROOT)
411	nfs_setup_diskless();
412#endif
413
414	if (nfs_diskless_valid == 0)
415		return (-1);
416	if (nfs_diskless_valid == 1)
417		nfs_convert_diskless();
418
419	/*
420	 * Do enough of ifconfig(8) so that the critical net interface can
421	 * talk to the server.
422	 */
423	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
424	    td->td_ucred, td);
425	if (error)
426		panic("nfs_mountroot: socreate(%04x): %d",
427			nd->myif.ifra_addr.sa_family, error);
428
429#if 0 /* XXX Bad idea */
430	/*
431	 * We might not have been told the right interface, so we pass
432	 * over the first ten interfaces of the same kind, until we get
433	 * one of them configured.
434	 */
435
436	for (i = strlen(nd->myif.ifra_name) - 1;
437		nd->myif.ifra_name[i] >= '0' &&
438		nd->myif.ifra_name[i] <= '9';
439		nd->myif.ifra_name[i] ++) {
440		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
441		if(!error)
442			break;
443	}
444#endif
445	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
446	if (error)
447		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
448	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
449		ir.ifr_mtu = strtol(cp, NULL, 10);
450		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
451		freeenv(cp);
452		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
453		if (error)
454			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
455	}
456	soclose(so);
457
458	/*
459	 * If the gateway field is filled in, set it as the default route.
460	 * Note that pxeboot will set a default route of 0 if the route
461	 * is not set by the DHCP server.  Check also for a value of 0
462	 * to avoid panicking inappropriately in that situation.
463	 */
464	if (nd->mygateway.sin_len != 0 &&
465	    nd->mygateway.sin_addr.s_addr != 0) {
466		struct sockaddr_in mask, sin;
467
468		bzero((caddr_t)&mask, sizeof(mask));
469		sin = mask;
470		sin.sin_family = AF_INET;
471		sin.sin_len = sizeof(sin);
472                /* XXX MRT use table 0 for this sort of thing */
473		CURVNET_SET(TD_TO_VNET(td));
474		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
475		    (struct sockaddr *)&nd->mygateway,
476		    (struct sockaddr *)&mask,
477		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
478		CURVNET_RESTORE();
479		if (error)
480			panic("nfs_mountroot: RTM_ADD: %d", error);
481	}
482
483	/*
484	 * Create the rootfs mount point.
485	 */
486	nd->root_args.fh = nd->root_fh;
487	nd->root_args.fhsize = nd->root_fhsize;
488	l = ntohl(nd->root_saddr.sin_addr.s_addr);
489	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
490		(l >> 24) & 0xff, (l >> 16) & 0xff,
491		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
492	printf("NFS ROOT: %s\n", buf);
493	nd->root_args.hostname = buf;
494	if ((error = nfs_mountdiskless(buf,
495	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
496		return (error);
497	}
498
499	/*
500	 * This is not really an nfs issue, but it is much easier to
501	 * set hostname here and then let the "/etc/rc.xxx" files
502	 * mount the right /var based upon its preset value.
503	 */
504	mtx_lock(&prison0.pr_mtx);
505	strlcpy(prison0.pr_hostname, nd->my_hostnam,
506	    sizeof(prison0.pr_hostname));
507	mtx_unlock(&prison0.pr_mtx);
508	inittodr(ntohl(nd->root_time));
509	return (0);
510}
511
512/*
513 * Internal version of mount system call for diskless setup.
514 */
515static int
516nfs_mountdiskless(char *path,
517    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
518    struct vnode **vpp, struct mount *mp)
519{
520	struct sockaddr *nam;
521	int dirlen, error;
522	char *dirpath;
523
524	/*
525	 * Find the directory path in "path", which also has the server's
526	 * name/ip address in it.
527	 */
528	dirpath = strchr(path, ':');
529	if (dirpath != NULL)
530		dirlen = strlen(++dirpath);
531	else
532		dirlen = 0;
533	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
534	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
535	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
536	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
537		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
538		return (error);
539	}
540	return (0);
541}
542
543static void
544nfs_sec_name(char *sec, int *flagsp)
545{
546	if (!strcmp(sec, "krb5"))
547		*flagsp |= NFSMNT_KERB;
548	else if (!strcmp(sec, "krb5i"))
549		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
550	else if (!strcmp(sec, "krb5p"))
551		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
552}
553
554static void
555nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
556    const char *hostname, struct ucred *cred, struct thread *td)
557{
558	int adjsock;
559	char *p;
560
561	/*
562	 * Set read-only flag if requested; otherwise, clear it if this is
563	 * an update.  If this is not an update, then either the read-only
564	 * flag is already clear, or this is a root mount and it was set
565	 * intentionally at some previous point.
566	 */
567	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
568		MNT_ILOCK(mp);
569		mp->mnt_flag |= MNT_RDONLY;
570		MNT_IUNLOCK(mp);
571	} else if (mp->mnt_flag & MNT_UPDATE) {
572		MNT_ILOCK(mp);
573		mp->mnt_flag &= ~MNT_RDONLY;
574		MNT_IUNLOCK(mp);
575	}
576
577	/*
578	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
579	 * no sense in that context.  Also, set up appropriate retransmit
580	 * and soft timeout behavior.
581	 */
582	if (argp->sotype == SOCK_STREAM) {
583		nmp->nm_flag &= ~NFSMNT_NOCONN;
584		nmp->nm_timeo = NFS_MAXTIMEO;
585		if ((argp->flags & NFSMNT_NFSV4) != 0)
586			nmp->nm_retry = INT_MAX;
587		else
588			nmp->nm_retry = NFS_RETRANS_TCP;
589	}
590
591	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
592	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
593		argp->flags &= ~NFSMNT_RDIRPLUS;
594		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
595	}
596
597	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
598	if (nmp->nm_minorvers == 0) {
599		argp->flags &= ~NFSMNT_ONEOPENOWN;
600		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
601	}
602
603	/* Re-bind if rsrvd port requested and wasn't on one */
604	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
605		  && (argp->flags & NFSMNT_RESVPORT);
606	/* Also re-bind if we're switching to/from a connected UDP socket */
607	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
608		    (argp->flags & NFSMNT_NOCONN));
609
610	/* Update flags atomically.  Don't change the lock bits. */
611	nmp->nm_flag = argp->flags | nmp->nm_flag;
612
613	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
614		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
615		if (nmp->nm_timeo < NFS_MINTIMEO)
616			nmp->nm_timeo = NFS_MINTIMEO;
617		else if (nmp->nm_timeo > NFS_MAXTIMEO)
618			nmp->nm_timeo = NFS_MAXTIMEO;
619	}
620
621	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
622		nmp->nm_retry = argp->retrans;
623		if (nmp->nm_retry > NFS_MAXREXMIT)
624			nmp->nm_retry = NFS_MAXREXMIT;
625	}
626
627	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
628		nmp->nm_wsize = argp->wsize;
629		/*
630		 * Clip at the power of 2 below the size. There is an
631		 * issue (not isolated) that causes intermittent page
632		 * faults if this is not done.
633		 */
634		if (nmp->nm_wsize > NFS_FABLKSIZE)
635			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
636		else
637			nmp->nm_wsize = NFS_FABLKSIZE;
638	}
639
640	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
641		nmp->nm_rsize = argp->rsize;
642		/*
643		 * Clip at the power of 2 below the size. There is an
644		 * issue (not isolated) that causes intermittent page
645		 * faults if this is not done.
646		 */
647		if (nmp->nm_rsize > NFS_FABLKSIZE)
648			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
649		else
650			nmp->nm_rsize = NFS_FABLKSIZE;
651	}
652
653	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
654		nmp->nm_readdirsize = argp->readdirsize;
655	}
656
657	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
658		nmp->nm_acregmin = argp->acregmin;
659	else
660		nmp->nm_acregmin = NFS_MINATTRTIMO;
661	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
662		nmp->nm_acregmax = argp->acregmax;
663	else
664		nmp->nm_acregmax = NFS_MAXATTRTIMO;
665	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
666		nmp->nm_acdirmin = argp->acdirmin;
667	else
668		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
669	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
670		nmp->nm_acdirmax = argp->acdirmax;
671	else
672		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
673	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
674		nmp->nm_acdirmin = nmp->nm_acdirmax;
675	if (nmp->nm_acregmin > nmp->nm_acregmax)
676		nmp->nm_acregmin = nmp->nm_acregmax;
677
678	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
679		if (argp->readahead <= NFS_MAXRAHEAD)
680			nmp->nm_readahead = argp->readahead;
681		else
682			nmp->nm_readahead = NFS_MAXRAHEAD;
683	}
684	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
685		if (argp->wcommitsize < nmp->nm_wsize)
686			nmp->nm_wcommitsize = nmp->nm_wsize;
687		else
688			nmp->nm_wcommitsize = argp->wcommitsize;
689	}
690
691	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
692		    (nmp->nm_soproto != argp->proto));
693
694	if (nmp->nm_client != NULL && adjsock) {
695		int haslock = 0, error = 0;
696
697		if (nmp->nm_sotype == SOCK_STREAM) {
698			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
699			if (!error)
700				haslock = 1;
701		}
702		if (!error) {
703		    newnfs_disconnect(&nmp->nm_sockreq);
704		    if (haslock)
705			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
706		    nmp->nm_sotype = argp->sotype;
707		    nmp->nm_soproto = argp->proto;
708		    if (nmp->nm_sotype == SOCK_DGRAM)
709			while (newnfs_connect(nmp, &nmp->nm_sockreq,
710			    cred, td, 0)) {
711				printf("newnfs_args: retrying connect\n");
712				(void) nfs_catnap(PSOCK, 0, "nfscon");
713			}
714		}
715	} else {
716		nmp->nm_sotype = argp->sotype;
717		nmp->nm_soproto = argp->proto;
718	}
719
720	if (hostname != NULL) {
721		strlcpy(nmp->nm_hostname, hostname,
722		    sizeof(nmp->nm_hostname));
723		p = strchr(nmp->nm_hostname, ':');
724		if (p != NULL)
725			*p = '\0';
726	}
727}
728
729static const char *nfs_opts[] = { "from", "nfs_args",
730    "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
731    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
732    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
733    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
734    "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
735    "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
736    "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
737    "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
738    "pnfs", "wcommitsize", "oneopenown",
739    NULL };
740
741/*
742 * Parse the "from" mountarg, passed by the generic mount(8) program
743 * or the mountroot code.  This is used when rerooting into NFS.
744 *
745 * Note that the "hostname" is actually a "hostname:/share/path" string.
746 */
747static int
748nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
749    struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
750{
751	char *nam, *delimp, *hostp, *spec;
752	int error, have_bracket = 0, offset, rv, speclen;
753	struct sockaddr_in *sin;
754	size_t len;
755
756	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
757	if (error != 0)
758		return (error);
759	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
760
761	/*
762	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
763	 */
764	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
765	    *(delimp + 1) == ':') {
766		hostp = spec + 1;
767		spec = delimp + 2;
768		have_bracket = 1;
769	} else if ((delimp = strrchr(spec, ':')) != NULL) {
770		hostp = spec;
771		spec = delimp + 1;
772	} else if ((delimp = strrchr(spec, '@')) != NULL) {
773		printf("%s: path@server syntax is deprecated, "
774		    "use server:path\n", __func__);
775		hostp = delimp + 1;
776	} else {
777		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
778		free(nam, M_TEMP);
779		return (EINVAL);
780	}
781	*delimp = '\0';
782
783	/*
784	 * If there has been a trailing slash at mounttime it seems
785	 * that some mountd implementations fail to remove the mount
786	 * entries from their mountlist while unmounting.
787	 */
788	for (speclen = strlen(spec);
789	    speclen > 1 && spec[speclen - 1] == '/';
790	    speclen--)
791		spec[speclen - 1] = '\0';
792	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
793		printf("%s: %s:%s: name too long", __func__, hostp, spec);
794		free(nam, M_TEMP);
795		return (EINVAL);
796	}
797	/* Make both '@' and ':' notations equal */
798	if (*hostp != '\0') {
799		len = strlen(hostp);
800		offset = 0;
801		if (have_bracket)
802			nam[offset++] = '[';
803		memmove(nam + offset, hostp, len);
804		if (have_bracket)
805			nam[len + offset++] = ']';
806		nam[len + offset++] = ':';
807		memmove(nam + len + offset, spec, speclen);
808		nam[len + speclen + offset] = '\0';
809	} else
810		nam[0] = '\0';
811
812	/*
813	 * XXX: IPv6
814	 */
815	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
816	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
817	if (rv != 1) {
818		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
819		    __func__, hostp, rv);
820		free(nam, M_TEMP);
821		free(sin, M_SONAME);
822		return (EINVAL);
823	}
824
825	sin->sin_len = sizeof(*sin);
826	sin->sin_family = AF_INET;
827	/*
828	 * XXX: hardcoded port number.
829	 */
830	sin->sin_port = htons(2049);
831
832	*hostnamep = strdup(nam, M_NEWNFSMNT);
833	*sinp = sin;
834	strlcpy(dirpath, spec, dirpathsize);
835	*dirlenp = strlen(dirpath);
836
837	free(nam, M_TEMP);
838	return (0);
839}
840
841/*
842 * VFS Operations.
843 *
844 * mount system call
845 * It seems a bit dumb to copyinstr() the host and path here and then
846 * bcopy() them in mountnfs(), but I wanted to detect errors before
847 * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
848 * an error after that means that I have to release the mbuf.
849 */
850/* ARGSUSED */
851static int
852nfs_mount(struct mount *mp)
853{
854	struct nfs_args args = {
855	    .version = NFS_ARGSVERSION,
856	    .addr = NULL,
857	    .addrlen = sizeof (struct sockaddr_in),
858	    .sotype = SOCK_STREAM,
859	    .proto = 0,
860	    .fh = NULL,
861	    .fhsize = 0,
862	    .flags = NFSMNT_RESVPORT,
863	    .wsize = NFS_WSIZE,
864	    .rsize = NFS_RSIZE,
865	    .readdirsize = NFS_READDIRSIZE,
866	    .timeo = 10,
867	    .retrans = NFS_RETRANS,
868	    .readahead = NFS_DEFRAHEAD,
869	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
870	    .hostname = NULL,
871	    .acregmin = NFS_MINATTRTIMO,
872	    .acregmax = NFS_MAXATTRTIMO,
873	    .acdirmin = NFS_MINDIRATTRTIMO,
874	    .acdirmax = NFS_MAXDIRATTRTIMO,
875	};
876	int error = 0, ret, len;
877	struct sockaddr *nam = NULL;
878	struct vnode *vp;
879	struct thread *td;
880	char *hst;
881	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
882	char *cp, *opt, *name, *secname;
883	int nametimeo = NFS_DEFAULT_NAMETIMEO;
884	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
885	int minvers = 0;
886	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
887	    krbnamelen, srvkrbnamelen;
888	size_t hstlen;
889
890	has_nfs_args_opt = 0;
891	has_nfs_from_opt = 0;
892	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
893	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
894		error = EINVAL;
895		goto out;
896	}
897
898	td = curthread;
899	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
900	    nfs_diskless_valid != 0) {
901		error = nfs_mountroot(mp);
902		goto out;
903	}
904
905	nfscl_init();
906
907	/*
908	 * The old mount_nfs program passed the struct nfs_args
909	 * from userspace to kernel.  The new mount_nfs program
910	 * passes string options via nmount() from userspace to kernel
911	 * and we populate the struct nfs_args in the kernel.
912	 */
913	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
914		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
915		    sizeof(args));
916		if (error != 0)
917			goto out;
918
919		if (args.version != NFS_ARGSVERSION) {
920			error = EPROGMISMATCH;
921			goto out;
922		}
923		has_nfs_args_opt = 1;
924	}
925
926	/* Handle the new style options. */
927	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
928		args.acdirmin = args.acdirmax =
929		    args.acregmin = args.acregmax = 0;
930		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
931		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
932	}
933	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
934		args.flags |= NFSMNT_NOCONN;
935	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
936		args.flags &= ~NFSMNT_NOCONN;
937	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
938		args.flags |= NFSMNT_NOLOCKD;
939	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
940		args.flags &= ~NFSMNT_NOLOCKD;
941	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
942		args.flags |= NFSMNT_INT;
943	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
944		args.flags |= NFSMNT_RDIRPLUS;
945	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
946		args.flags |= NFSMNT_RESVPORT;
947	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
948		args.flags &= ~NFSMNT_RESVPORT;
949	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
950		args.flags |= NFSMNT_SOFT;
951	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
952		args.flags &= ~NFSMNT_SOFT;
953	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
954		args.sotype = SOCK_DGRAM;
955	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
956		args.sotype = SOCK_DGRAM;
957	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
958		args.sotype = SOCK_STREAM;
959	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
960		args.flags |= NFSMNT_NFSV3;
961	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
962		args.flags |= NFSMNT_NFSV4;
963		args.sotype = SOCK_STREAM;
964	}
965	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
966		args.flags |= NFSMNT_ALLGSSNAME;
967	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
968		args.flags |= NFSMNT_NOCTO;
969	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
970		args.flags |= NFSMNT_NONCONTIGWR;
971	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
972		args.flags |= NFSMNT_PNFS;
973	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
974		args.flags |= NFSMNT_ONEOPENOWN;
975	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
976		if (opt == NULL) {
977			vfs_mount_error(mp, "illegal readdirsize");
978			error = EINVAL;
979			goto out;
980		}
981		ret = sscanf(opt, "%d", &args.readdirsize);
982		if (ret != 1 || args.readdirsize <= 0) {
983			vfs_mount_error(mp, "illegal readdirsize: %s",
984			    opt);
985			error = EINVAL;
986			goto out;
987		}
988		args.flags |= NFSMNT_READDIRSIZE;
989	}
990	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
991		if (opt == NULL) {
992			vfs_mount_error(mp, "illegal readahead");
993			error = EINVAL;
994			goto out;
995		}
996		ret = sscanf(opt, "%d", &args.readahead);
997		if (ret != 1 || args.readahead <= 0) {
998			vfs_mount_error(mp, "illegal readahead: %s",
999			    opt);
1000			error = EINVAL;
1001			goto out;
1002		}
1003		args.flags |= NFSMNT_READAHEAD;
1004	}
1005	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1006		if (opt == NULL) {
1007			vfs_mount_error(mp, "illegal wsize");
1008			error = EINVAL;
1009			goto out;
1010		}
1011		ret = sscanf(opt, "%d", &args.wsize);
1012		if (ret != 1 || args.wsize <= 0) {
1013			vfs_mount_error(mp, "illegal wsize: %s",
1014			    opt);
1015			error = EINVAL;
1016			goto out;
1017		}
1018		args.flags |= NFSMNT_WSIZE;
1019	}
1020	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1021		if (opt == NULL) {
1022			vfs_mount_error(mp, "illegal rsize");
1023			error = EINVAL;
1024			goto out;
1025		}
1026		ret = sscanf(opt, "%d", &args.rsize);
1027		if (ret != 1 || args.rsize <= 0) {
1028			vfs_mount_error(mp, "illegal wsize: %s",
1029			    opt);
1030			error = EINVAL;
1031			goto out;
1032		}
1033		args.flags |= NFSMNT_RSIZE;
1034	}
1035	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1036		if (opt == NULL) {
1037			vfs_mount_error(mp, "illegal retrans");
1038			error = EINVAL;
1039			goto out;
1040		}
1041		ret = sscanf(opt, "%d", &args.retrans);
1042		if (ret != 1 || args.retrans <= 0) {
1043			vfs_mount_error(mp, "illegal retrans: %s",
1044			    opt);
1045			error = EINVAL;
1046			goto out;
1047		}
1048		args.flags |= NFSMNT_RETRANS;
1049	}
1050	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1051		ret = sscanf(opt, "%d", &args.acregmin);
1052		if (ret != 1 || args.acregmin < 0) {
1053			vfs_mount_error(mp, "illegal actimeo: %s",
1054			    opt);
1055			error = EINVAL;
1056			goto out;
1057		}
1058		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1059		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1060		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1061	}
1062	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1063		ret = sscanf(opt, "%d", &args.acregmin);
1064		if (ret != 1 || args.acregmin < 0) {
1065			vfs_mount_error(mp, "illegal acregmin: %s",
1066			    opt);
1067			error = EINVAL;
1068			goto out;
1069		}
1070		args.flags |= NFSMNT_ACREGMIN;
1071	}
1072	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1073		ret = sscanf(opt, "%d", &args.acregmax);
1074		if (ret != 1 || args.acregmax < 0) {
1075			vfs_mount_error(mp, "illegal acregmax: %s",
1076			    opt);
1077			error = EINVAL;
1078			goto out;
1079		}
1080		args.flags |= NFSMNT_ACREGMAX;
1081	}
1082	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1083		ret = sscanf(opt, "%d", &args.acdirmin);
1084		if (ret != 1 || args.acdirmin < 0) {
1085			vfs_mount_error(mp, "illegal acdirmin: %s",
1086			    opt);
1087			error = EINVAL;
1088			goto out;
1089		}
1090		args.flags |= NFSMNT_ACDIRMIN;
1091	}
1092	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1093		ret = sscanf(opt, "%d", &args.acdirmax);
1094		if (ret != 1 || args.acdirmax < 0) {
1095			vfs_mount_error(mp, "illegal acdirmax: %s",
1096			    opt);
1097			error = EINVAL;
1098			goto out;
1099		}
1100		args.flags |= NFSMNT_ACDIRMAX;
1101	}
1102	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1103		ret = sscanf(opt, "%d", &args.wcommitsize);
1104		if (ret != 1 || args.wcommitsize < 0) {
1105			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1106			error = EINVAL;
1107			goto out;
1108		}
1109		args.flags |= NFSMNT_WCOMMITSIZE;
1110	}
1111	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1112		ret = sscanf(opt, "%d", &args.timeo);
1113		if (ret != 1 || args.timeo <= 0) {
1114			vfs_mount_error(mp, "illegal timeo: %s",
1115			    opt);
1116			error = EINVAL;
1117			goto out;
1118		}
1119		args.flags |= NFSMNT_TIMEO;
1120	}
1121	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1122		ret = sscanf(opt, "%d", &args.timeo);
1123		if (ret != 1 || args.timeo <= 0) {
1124			vfs_mount_error(mp, "illegal timeout: %s",
1125			    opt);
1126			error = EINVAL;
1127			goto out;
1128		}
1129		args.flags |= NFSMNT_TIMEO;
1130	}
1131	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1132		ret = sscanf(opt, "%d", &nametimeo);
1133		if (ret != 1 || nametimeo < 0) {
1134			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1135			error = EINVAL;
1136			goto out;
1137		}
1138	}
1139	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1140	    == 0) {
1141		ret = sscanf(opt, "%d", &negnametimeo);
1142		if (ret != 1 || negnametimeo < 0) {
1143			vfs_mount_error(mp, "illegal negnametimeo: %s",
1144			    opt);
1145			error = EINVAL;
1146			goto out;
1147		}
1148	}
1149	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1150	    0) {
1151		ret = sscanf(opt, "%d", &minvers);
1152		if (ret != 1 || minvers < 0 || minvers > 1 ||
1153		    (args.flags & NFSMNT_NFSV4) == 0) {
1154			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1155			error = EINVAL;
1156			goto out;
1157		}
1158	}
1159	if (vfs_getopt(mp->mnt_optnew, "sec",
1160		(void **) &secname, NULL) == 0)
1161		nfs_sec_name(secname, &args.flags);
1162
1163	if (mp->mnt_flag & MNT_UPDATE) {
1164		struct nfsmount *nmp = VFSTONFS(mp);
1165
1166		if (nmp == NULL) {
1167			error = EIO;
1168			goto out;
1169		}
1170
1171		/*
1172		 * If a change from TCP->UDP is done and there are thread(s)
1173		 * that have I/O RPC(s) in progress with a transfer size
1174		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1175		 * hung, retrying the RPC(s) forever. Usually these threads
1176		 * will be seen doing an uninterruptible sleep on wait channel
1177		 * "nfsreq".
1178		 */
1179		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1180			tprintf(td->td_proc, LOG_WARNING,
1181	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1182
1183		/*
1184		 * When doing an update, we can't change version,
1185		 * security, switch lockd strategies, change cookie
1186		 * translation or switch oneopenown.
1187		 */
1188		args.flags = (args.flags &
1189		    ~(NFSMNT_NFSV3 |
1190		      NFSMNT_NFSV4 |
1191		      NFSMNT_KERB |
1192		      NFSMNT_INTEGRITY |
1193		      NFSMNT_PRIVACY |
1194		      NFSMNT_ONEOPENOWN |
1195		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1196		    (nmp->nm_flag &
1197			(NFSMNT_NFSV3 |
1198			 NFSMNT_NFSV4 |
1199			 NFSMNT_KERB |
1200			 NFSMNT_INTEGRITY |
1201			 NFSMNT_PRIVACY |
1202			 NFSMNT_ONEOPENOWN |
1203			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1204		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1205		goto out;
1206	}
1207
1208	/*
1209	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1210	 * or no-connection mode for those protocols that support
1211	 * no-connection mode (the flag will be cleared later for protocols
1212	 * that do not support no-connection mode).  This will allow a client
1213	 * to receive replies from a different IP then the request was
1214	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1215	 * not 0.
1216	 */
1217	if (nfs_ip_paranoia == 0)
1218		args.flags |= NFSMNT_NOCONN;
1219
1220	if (has_nfs_args_opt != 0) {
1221		/*
1222		 * In the 'nfs_args' case, the pointers in the args
1223		 * structure are in userland - we copy them in here.
1224		 */
1225		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1226			vfs_mount_error(mp, "Bad file handle");
1227			error = EINVAL;
1228			goto out;
1229		}
1230		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1231		    args.fhsize);
1232		if (error != 0)
1233			goto out;
1234		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1235		if (error != 0)
1236			goto out;
1237		bzero(&hst[hstlen], MNAMELEN - hstlen);
1238		args.hostname = hst;
1239		/* getsockaddr() call must be after above copyin() calls */
1240		error = getsockaddr(&nam, (caddr_t)args.addr,
1241		    args.addrlen);
1242		if (error != 0)
1243			goto out;
1244	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1245	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1246	    sizeof(dirpath), &dirlen) == 0) {
1247		has_nfs_from_opt = 1;
1248		bcopy(args.hostname, hst, MNAMELEN);
1249		hst[MNAMELEN - 1] = '\0';
1250
1251		/*
1252		 * This only works with NFSv4 for now.
1253		 */
1254		args.fhsize = 0;
1255		args.flags |= NFSMNT_NFSV4;
1256		args.sotype = SOCK_STREAM;
1257	} else {
1258		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1259		    &args.fhsize) == 0) {
1260			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1261				vfs_mount_error(mp, "Bad file handle");
1262				error = EINVAL;
1263				goto out;
1264			}
1265			bcopy(args.fh, nfh, args.fhsize);
1266		} else {
1267			args.fhsize = 0;
1268		}
1269		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1270		    (void **)&args.hostname, &len);
1271		if (args.hostname == NULL) {
1272			vfs_mount_error(mp, "Invalid hostname");
1273			error = EINVAL;
1274			goto out;
1275		}
1276		if (len >= MNAMELEN) {
1277			vfs_mount_error(mp, "Hostname too long");
1278			error = EINVAL;
1279			goto out;
1280		}
1281		bcopy(args.hostname, hst, len);
1282		hst[len] = '\0';
1283	}
1284
1285	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1286		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1287	else {
1288		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1289		cp = strchr(srvkrbname, ':');
1290		if (cp != NULL)
1291			*cp = '\0';
1292	}
1293	srvkrbnamelen = strlen(srvkrbname);
1294
1295	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1296		strlcpy(krbname, name, sizeof (krbname));
1297	else
1298		krbname[0] = '\0';
1299	krbnamelen = strlen(krbname);
1300
1301	if (has_nfs_from_opt == 0) {
1302		if (vfs_getopt(mp->mnt_optnew,
1303		    "dirpath", (void **)&name, NULL) == 0)
1304			strlcpy(dirpath, name, sizeof (dirpath));
1305		else
1306			dirpath[0] = '\0';
1307		dirlen = strlen(dirpath);
1308	}
1309
1310	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1311		if (vfs_getopt(mp->mnt_optnew, "addr",
1312		    (void **)&args.addr, &args.addrlen) == 0) {
1313			if (args.addrlen > SOCK_MAXADDRLEN) {
1314				error = ENAMETOOLONG;
1315				goto out;
1316			}
1317			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1318			bcopy(args.addr, nam, args.addrlen);
1319			nam->sa_len = args.addrlen;
1320		} else {
1321			vfs_mount_error(mp, "No server address");
1322			error = EINVAL;
1323			goto out;
1324		}
1325	}
1326
1327	args.fh = nfh;
1328	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1329	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1330	    nametimeo, negnametimeo, minvers);
1331out:
1332	if (!error) {
1333		MNT_ILOCK(mp);
1334		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1335		    MNTK_USES_BCACHE;
1336		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1337			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1338		MNT_IUNLOCK(mp);
1339	}
1340	free(hst, M_TEMP);
1341	return (error);
1342}
1343
1344
1345/*
1346 * VFS Operations.
1347 *
1348 * mount system call
1349 * It seems a bit dumb to copyinstr() the host and path here and then
1350 * bcopy() them in mountnfs(), but I wanted to detect errors before
1351 * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1352 * an error after that means that I have to release the mbuf.
1353 */
1354/* ARGSUSED */
1355static int
1356nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1357{
1358	int error;
1359	struct nfs_args args;
1360
1361	error = copyin(data, &args, sizeof (struct nfs_args));
1362	if (error)
1363		return error;
1364
1365	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1366
1367	error = kernel_mount(ma, flags);
1368	return (error);
1369}
1370
1371/*
1372 * Common code for mount and mountroot
1373 */
1374static int
1375mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1376    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1377    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1378    struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1379    int minvers)
1380{
1381	struct nfsmount *nmp;
1382	struct nfsnode *np;
1383	int error, trycnt, ret;
1384	struct nfsvattr nfsva;
1385	struct nfsclclient *clp;
1386	struct nfsclds *dsp, *tdsp;
1387	uint32_t lease;
1388	static u_int64_t clval = 0;
1389
1390	NFSCL_DEBUG(3, "in mnt\n");
1391	clp = NULL;
1392	if (mp->mnt_flag & MNT_UPDATE) {
1393		nmp = VFSTONFS(mp);
1394		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1395		FREE(nam, M_SONAME);
1396		return (0);
1397	} else {
1398		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1399		    krbnamelen + dirlen + srvkrbnamelen + 2,
1400		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1401		TAILQ_INIT(&nmp->nm_bufq);
1402		TAILQ_INIT(&nmp->nm_sess);
1403		if (clval == 0)
1404			clval = (u_int64_t)nfsboottime.tv_sec;
1405		nmp->nm_clval = clval++;
1406		nmp->nm_krbnamelen = krbnamelen;
1407		nmp->nm_dirpathlen = dirlen;
1408		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1409		if (td->td_ucred->cr_uid != (uid_t)0) {
1410			/*
1411			 * nm_uid is used to get KerberosV credentials for
1412			 * the nfsv4 state handling operations if there is
1413			 * no host based principal set. Use the uid of
1414			 * this user if not root, since they are doing the
1415			 * mount. I don't think setting this for root will
1416			 * work, since root normally does not have user
1417			 * credentials in a credentials cache.
1418			 */
1419			nmp->nm_uid = td->td_ucred->cr_uid;
1420		} else {
1421			/*
1422			 * Just set to -1, so it won't be used.
1423			 */
1424			nmp->nm_uid = (uid_t)-1;
1425		}
1426
1427		/* Copy and null terminate all the names */
1428		if (nmp->nm_krbnamelen > 0) {
1429			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1430			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1431		}
1432		if (nmp->nm_dirpathlen > 0) {
1433			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1434			    nmp->nm_dirpathlen);
1435			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1436			    + 1] = '\0';
1437		}
1438		if (nmp->nm_srvkrbnamelen > 0) {
1439			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1440			    nmp->nm_srvkrbnamelen);
1441			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1442			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1443		}
1444		nmp->nm_sockreq.nr_cred = crhold(cred);
1445		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1446		mp->mnt_data = nmp;
1447		nmp->nm_getinfo = nfs_getnlminfo;
1448		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1449	}
1450	vfs_getnewfsid(mp);
1451	nmp->nm_mountp = mp;
1452	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1453
1454	/*
1455	 * Since nfs_decode_args() might optionally set them, these
1456	 * need to be set to defaults before the call, so that the
1457	 * optional settings aren't overwritten.
1458	 */
1459	nmp->nm_nametimeo = nametimeo;
1460	nmp->nm_negnametimeo = negnametimeo;
1461	nmp->nm_timeo = NFS_TIMEO;
1462	nmp->nm_retry = NFS_RETRANS;
1463	nmp->nm_readahead = NFS_DEFRAHEAD;
1464
1465	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1466	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1467	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1468		nmp->nm_wcommitsize *= 2;
1469	nmp->nm_wcommitsize *= 256;
1470
1471	if ((argp->flags & NFSMNT_NFSV4) != 0)
1472		nmp->nm_minorvers = minvers;
1473	else
1474		nmp->nm_minorvers = 0;
1475
1476	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1477
1478	/*
1479	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1480	 * high, depending on whether we end up with negative offsets in
1481	 * the client or server somewhere.  2GB-1 may be safer.
1482	 *
1483	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1484	 * that we can handle until we find out otherwise.
1485	 */
1486	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1487		nmp->nm_maxfilesize = 0xffffffffLL;
1488	else
1489		nmp->nm_maxfilesize = OFF_MAX;
1490
1491	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1492		nmp->nm_wsize = NFS_WSIZE;
1493		nmp->nm_rsize = NFS_RSIZE;
1494		nmp->nm_readdirsize = NFS_READDIRSIZE;
1495	}
1496	nmp->nm_numgrps = NFS_MAXGRPS;
1497	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1498	if (nmp->nm_tprintf_delay < 0)
1499		nmp->nm_tprintf_delay = 0;
1500	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1501	if (nmp->nm_tprintf_initial_delay < 0)
1502		nmp->nm_tprintf_initial_delay = 0;
1503	nmp->nm_fhsize = argp->fhsize;
1504	if (nmp->nm_fhsize > 0)
1505		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1506	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1507	nmp->nm_nam = nam;
1508	/* Set up the sockets and per-host congestion */
1509	nmp->nm_sotype = argp->sotype;
1510	nmp->nm_soproto = argp->proto;
1511	nmp->nm_sockreq.nr_prog = NFS_PROG;
1512	if ((argp->flags & NFSMNT_NFSV4))
1513		nmp->nm_sockreq.nr_vers = NFS_VER4;
1514	else if ((argp->flags & NFSMNT_NFSV3))
1515		nmp->nm_sockreq.nr_vers = NFS_VER3;
1516	else
1517		nmp->nm_sockreq.nr_vers = NFS_VER2;
1518
1519
1520	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1521		goto bad;
1522	/* For NFSv4.1, get the clientid now. */
1523	if (nmp->nm_minorvers > 0) {
1524		NFSCL_DEBUG(3, "at getcl\n");
1525		error = nfscl_getcl(mp, cred, td, 0, &clp);
1526		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1527		if (error != 0)
1528			goto bad;
1529	}
1530
1531	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1532	    nmp->nm_dirpathlen > 0) {
1533		NFSCL_DEBUG(3, "in dirp\n");
1534		/*
1535		 * If the fhsize on the mount point == 0 for V4, the mount
1536		 * path needs to be looked up.
1537		 */
1538		trycnt = 3;
1539		do {
1540			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1541			    cred, td);
1542			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1543			if (error)
1544				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1545		} while (error && --trycnt > 0);
1546		if (error) {
1547			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1548			goto bad;
1549		}
1550	}
1551
1552	/*
1553	 * A reference count is needed on the nfsnode representing the
1554	 * remote root.  If this object is not persistent, then backward
1555	 * traversals of the mount point (i.e. "..") will not work if
1556	 * the nfsnode gets flushed out of the cache. Ufs does not have
1557	 * this problem, because one can identify root inodes by their
1558	 * number == ROOTINO (2).
1559	 */
1560	if (nmp->nm_fhsize > 0) {
1561		/*
1562		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1563		 * non-zero for the root vnode. f_iosize will be set correctly
1564		 * by nfs_statfs() before any I/O occurs.
1565		 */
1566		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1567		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1568		    LK_EXCLUSIVE);
1569		if (error)
1570			goto bad;
1571		*vpp = NFSTOV(np);
1572
1573		/*
1574		 * Get file attributes and transfer parameters for the
1575		 * mountpoint.  This has the side effect of filling in
1576		 * (*vpp)->v_type with the correct value.
1577		 */
1578		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1579		    cred, td, &nfsva, NULL, &lease);
1580		if (ret) {
1581			/*
1582			 * Just set default values to get things going.
1583			 */
1584			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1585			nfsva.na_vattr.va_type = VDIR;
1586			nfsva.na_vattr.va_mode = 0777;
1587			nfsva.na_vattr.va_nlink = 100;
1588			nfsva.na_vattr.va_uid = (uid_t)0;
1589			nfsva.na_vattr.va_gid = (gid_t)0;
1590			nfsva.na_vattr.va_fileid = 2;
1591			nfsva.na_vattr.va_gen = 1;
1592			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1593			nfsva.na_vattr.va_size = 512 * 1024;
1594			lease = 60;
1595		}
1596		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1597		if (nmp->nm_minorvers > 0) {
1598			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1599			NFSLOCKCLSTATE();
1600			clp->nfsc_renew = NFSCL_RENEW(lease);
1601			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1602			clp->nfsc_clientidrev++;
1603			if (clp->nfsc_clientidrev == 0)
1604				clp->nfsc_clientidrev++;
1605			NFSUNLOCKCLSTATE();
1606			/*
1607			 * Mount will succeed, so the renew thread can be
1608			 * started now.
1609			 */
1610			nfscl_start_renewthread(clp);
1611			nfscl_clientrelease(clp);
1612		}
1613		if (argp->flags & NFSMNT_NFSV3)
1614			ncl_fsinfo(nmp, *vpp, cred, td);
1615
1616		/* Mark if the mount point supports NFSv4 ACLs. */
1617		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1618		    ret == 0 &&
1619		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1620			MNT_ILOCK(mp);
1621			mp->mnt_flag |= MNT_NFS4ACLS;
1622			MNT_IUNLOCK(mp);
1623		}
1624
1625		/*
1626		 * Lose the lock but keep the ref.
1627		 */
1628		NFSVOPUNLOCK(*vpp, 0);
1629		return (0);
1630	}
1631	error = EIO;
1632
1633bad:
1634	if (clp != NULL)
1635		nfscl_clientrelease(clp);
1636	newnfs_disconnect(&nmp->nm_sockreq);
1637	crfree(nmp->nm_sockreq.nr_cred);
1638	if (nmp->nm_sockreq.nr_auth != NULL)
1639		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1640	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1641	mtx_destroy(&nmp->nm_mtx);
1642	if (nmp->nm_clp != NULL) {
1643		NFSLOCKCLSTATE();
1644		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1645		NFSUNLOCKCLSTATE();
1646		free(nmp->nm_clp, M_NFSCLCLIENT);
1647	}
1648	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1649		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1650		    dsp->nfsclds_sockp != NULL)
1651			newnfs_disconnect(dsp->nfsclds_sockp);
1652		nfscl_freenfsclds(dsp);
1653	}
1654	FREE(nmp, M_NEWNFSMNT);
1655	FREE(nam, M_SONAME);
1656	return (error);
1657}
1658
1659/*
1660 * unmount system call
1661 */
1662static int
1663nfs_unmount(struct mount *mp, int mntflags)
1664{
1665	struct thread *td;
1666	struct nfsmount *nmp;
1667	int error, flags = 0, i, trycnt = 0;
1668	struct nfsclds *dsp, *tdsp;
1669
1670	td = curthread;
1671
1672	if (mntflags & MNT_FORCE)
1673		flags |= FORCECLOSE;
1674	nmp = VFSTONFS(mp);
1675	/*
1676	 * Goes something like this..
1677	 * - Call vflush() to clear out vnodes for this filesystem
1678	 * - Close the socket
1679	 * - Free up the data structures
1680	 */
1681	/* In the forced case, cancel any outstanding requests. */
1682	if (mntflags & MNT_FORCE) {
1683		error = newnfs_nmcancelreqs(nmp);
1684		if (error)
1685			goto out;
1686		/* For a forced close, get rid of the renew thread now */
1687		nfscl_umount(nmp, td);
1688	}
1689	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1690	do {
1691		error = vflush(mp, 1, flags, td);
1692		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1693			(void) nfs_catnap(PSOCK, error, "newndm");
1694	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1695	if (error)
1696		goto out;
1697
1698	/*
1699	 * We are now committed to the unmount.
1700	 */
1701	if ((mntflags & MNT_FORCE) == 0)
1702		nfscl_umount(nmp, td);
1703	else {
1704		mtx_lock(&nmp->nm_mtx);
1705		nmp->nm_privflag |= NFSMNTP_FORCEDISM;
1706		mtx_unlock(&nmp->nm_mtx);
1707	}
1708	/* Make sure no nfsiods are assigned to this mount. */
1709	mtx_lock(&ncl_iod_mutex);
1710	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1711		if (ncl_iodmount[i] == nmp) {
1712			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1713			ncl_iodmount[i] = NULL;
1714		}
1715	mtx_unlock(&ncl_iod_mutex);
1716
1717	/*
1718	 * We can now set mnt_data to NULL and wait for
1719	 * nfssvc(NFSSVC_FORCEDISM) to complete.
1720	 */
1721	mtx_lock(&mountlist_mtx);
1722	mtx_lock(&nmp->nm_mtx);
1723	mp->mnt_data = NULL;
1724	mtx_unlock(&mountlist_mtx);
1725	while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0)
1726		msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
1727	mtx_unlock(&nmp->nm_mtx);
1728
1729	newnfs_disconnect(&nmp->nm_sockreq);
1730	crfree(nmp->nm_sockreq.nr_cred);
1731	FREE(nmp->nm_nam, M_SONAME);
1732	if (nmp->nm_sockreq.nr_auth != NULL)
1733		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1734	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1735	mtx_destroy(&nmp->nm_mtx);
1736	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1737		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1738		    dsp->nfsclds_sockp != NULL)
1739			newnfs_disconnect(dsp->nfsclds_sockp);
1740		nfscl_freenfsclds(dsp);
1741	}
1742	FREE(nmp, M_NEWNFSMNT);
1743out:
1744	return (error);
1745}
1746
1747/*
1748 * Return root of a filesystem
1749 */
1750static int
1751nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1752{
1753	struct vnode *vp;
1754	struct nfsmount *nmp;
1755	struct nfsnode *np;
1756	int error;
1757
1758	nmp = VFSTONFS(mp);
1759	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1760	if (error)
1761		return error;
1762	vp = NFSTOV(np);
1763	/*
1764	 * Get transfer parameters and attributes for root vnode once.
1765	 */
1766	mtx_lock(&nmp->nm_mtx);
1767	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1768		mtx_unlock(&nmp->nm_mtx);
1769		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1770	} else
1771		mtx_unlock(&nmp->nm_mtx);
1772	if (vp->v_type == VNON)
1773	    vp->v_type = VDIR;
1774	vp->v_vflag |= VV_ROOT;
1775	*vpp = vp;
1776	return (0);
1777}
1778
1779/*
1780 * Flush out the buffer cache
1781 */
1782/* ARGSUSED */
1783static int
1784nfs_sync(struct mount *mp, int waitfor)
1785{
1786	struct vnode *vp, *mvp;
1787	struct thread *td;
1788	int error, allerror = 0;
1789
1790	td = curthread;
1791
1792	MNT_ILOCK(mp);
1793	/*
1794	 * If a forced dismount is in progress, return from here so that
1795	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1796	 * calling VFS_UNMOUNT().
1797	 */
1798	if (NFSCL_FORCEDISM(mp)) {
1799		MNT_IUNLOCK(mp);
1800		return (EBADF);
1801	}
1802	MNT_IUNLOCK(mp);
1803
1804	/*
1805	 * Force stale buffer cache information to be flushed.
1806	 */
1807loop:
1808	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1809		/* XXX Racy bv_cnt check. */
1810		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1811		    waitfor == MNT_LAZY) {
1812			VI_UNLOCK(vp);
1813			continue;
1814		}
1815		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1816			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1817			goto loop;
1818		}
1819		error = VOP_FSYNC(vp, waitfor, td);
1820		if (error)
1821			allerror = error;
1822		NFSVOPUNLOCK(vp, 0);
1823		vrele(vp);
1824	}
1825	return (allerror);
1826}
1827
1828static int
1829nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1830{
1831	struct nfsmount *nmp = VFSTONFS(mp);
1832	struct vfsquery vq;
1833	int error;
1834
1835	bzero(&vq, sizeof(vq));
1836	switch (op) {
1837#if 0
1838	case VFS_CTL_NOLOCKS:
1839		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1840 		if (req->oldptr != NULL) {
1841 			error = SYSCTL_OUT(req, &val, sizeof(val));
1842 			if (error)
1843 				return (error);
1844 		}
1845 		if (req->newptr != NULL) {
1846 			error = SYSCTL_IN(req, &val, sizeof(val));
1847 			if (error)
1848 				return (error);
1849			if (val)
1850				nmp->nm_flag |= NFSMNT_NOLOCKS;
1851			else
1852				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1853 		}
1854		break;
1855#endif
1856	case VFS_CTL_QUERY:
1857		mtx_lock(&nmp->nm_mtx);
1858		if (nmp->nm_state & NFSSTA_TIMEO)
1859			vq.vq_flags |= VQ_NOTRESP;
1860		mtx_unlock(&nmp->nm_mtx);
1861#if 0
1862		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1863		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1864			vq.vq_flags |= VQ_NOTRESPLOCK;
1865#endif
1866		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1867		break;
1868 	case VFS_CTL_TIMEO:
1869 		if (req->oldptr != NULL) {
1870 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1871 			    sizeof(nmp->nm_tprintf_initial_delay));
1872 			if (error)
1873 				return (error);
1874 		}
1875 		if (req->newptr != NULL) {
1876			error = vfs_suser(mp, req->td);
1877			if (error)
1878				return (error);
1879 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1880 			    sizeof(nmp->nm_tprintf_initial_delay));
1881 			if (error)
1882 				return (error);
1883 			if (nmp->nm_tprintf_initial_delay < 0)
1884 				nmp->nm_tprintf_initial_delay = 0;
1885 		}
1886		break;
1887	default:
1888		return (ENOTSUP);
1889	}
1890	return (0);
1891}
1892
1893/*
1894 * Purge any RPCs in progress, so that they will all return errors.
1895 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1896 * forced dismount.
1897 */
1898static void
1899nfs_purge(struct mount *mp)
1900{
1901	struct nfsmount *nmp = VFSTONFS(mp);
1902
1903	newnfs_nmcancelreqs(nmp);
1904}
1905
1906/*
1907 * Extract the information needed by the nlm from the nfs vnode.
1908 */
1909static void
1910nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1911    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1912    struct timeval *timeop)
1913{
1914	struct nfsmount *nmp;
1915	struct nfsnode *np = VTONFS(vp);
1916
1917	nmp = VFSTONFS(vp->v_mount);
1918	if (fhlenp != NULL)
1919		*fhlenp = (size_t)np->n_fhp->nfh_len;
1920	if (fhp != NULL)
1921		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1922	if (sp != NULL)
1923		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1924	if (is_v3p != NULL)
1925		*is_v3p = NFS_ISV3(vp);
1926	if (sizep != NULL)
1927		*sizep = np->n_size;
1928	if (timeop != NULL) {
1929		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1930		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1931	}
1932}
1933
1934/*
1935 * This function prints out an option name, based on the conditional
1936 * argument.
1937 */
1938static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1939    char *opt, char **buf, size_t *blen)
1940{
1941	int len;
1942
1943	if (testval != 0 && *blen > strlen(opt)) {
1944		len = snprintf(*buf, *blen, "%s", opt);
1945		if (len != strlen(opt))
1946			printf("EEK!!\n");
1947		*buf += len;
1948		*blen -= len;
1949	}
1950}
1951
1952/*
1953 * This function printf out an options integer value.
1954 */
1955static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1956    char *opt, char **buf, size_t *blen)
1957{
1958	int len;
1959
1960	if (*blen > strlen(opt) + 1) {
1961		/* Could result in truncated output string. */
1962		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1963		if (len < *blen) {
1964			*buf += len;
1965			*blen -= len;
1966		}
1967	}
1968}
1969
1970/*
1971 * Load the option flags and values into the buffer.
1972 */
1973void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1974{
1975	char *buf;
1976	size_t blen;
1977
1978	buf = buffer;
1979	blen = buflen;
1980	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1981	    &blen);
1982	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1983		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1984		    &blen);
1985		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1986		    &buf, &blen);
1987		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
1988		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
1989	}
1990	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1991	    &blen);
1992	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1993	    "nfsv2", &buf, &blen);
1994	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1995	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1996	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1997	    &buf, &blen);
1998	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1999	    &buf, &blen);
2000	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
2001	    &blen);
2002	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
2003	    &blen);
2004	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
2005	    &blen);
2006	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
2007	    &blen);
2008	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
2009	    &blen);
2010	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
2011	    ",noncontigwr", &buf, &blen);
2012	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2013	    0, ",lockd", &buf, &blen);
2014	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2015	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
2016	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
2017	    &buf, &blen);
2018	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
2019	    &buf, &blen);
2020	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2021	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
2022	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2023	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
2024	    &buf, &blen);
2025	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2026	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
2027	    &buf, &blen);
2028	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
2029	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
2030	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2031	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2032	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2033	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2034	    &blen);
2035	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2036	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2037	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2038	    &blen);
2039	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2040	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2041	    &blen);
2042	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2043	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2044}
2045
2046