nfs_clvfsops.c revision 224082
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 224082 2011-07-16 08:05:36Z zack $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/mount.h>
55#include <sys/proc.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <netinet/in.h>
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <nfs/nfsdiskless.h>
76
77FEATURE(nfscl, "NFSv4 client");
78
79extern int nfscl_ticks;
80extern struct timeval nfsboottime;
81extern struct nfsstats	newnfsstats;
82extern int nfsrv_useacl;
83
84MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
85MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
86
87SYSCTL_DECL(_vfs_nfs);
88static int nfs_ip_paranoia = 1;
89SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
90    &nfs_ip_paranoia, 0, "");
91static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
92SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
93        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
94/* how long between console messages "nfs server foo not responding" */
95static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
96SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
97        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
98
99static int	nfs_mountroot(struct mount *);
100static void	nfs_sec_name(char *, int *);
101static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
102		    struct nfs_args *argp, const char *, struct ucred *,
103		    struct thread *);
104static int	mountnfs(struct nfs_args *, struct mount *,
105		    struct sockaddr *, char *, u_char *, int, u_char *, int,
106		    u_char *, int, struct vnode **, struct ucred *,
107		    struct thread *, int);
108static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
109		    struct sockaddr_storage *, int *, off_t *,
110		    struct timeval *);
111static vfs_mount_t nfs_mount;
112static vfs_cmount_t nfs_cmount;
113static vfs_unmount_t nfs_unmount;
114static vfs_root_t nfs_root;
115static vfs_statfs_t nfs_statfs;
116static vfs_sync_t nfs_sync;
117static vfs_sysctl_t nfs_sysctl;
118
119/*
120 * nfs vfs operations.
121 */
122static struct vfsops nfs_vfsops = {
123	.vfs_init =		ncl_init,
124	.vfs_mount =		nfs_mount,
125	.vfs_cmount =		nfs_cmount,
126	.vfs_root =		nfs_root,
127	.vfs_statfs =		nfs_statfs,
128	.vfs_sync =		nfs_sync,
129	.vfs_uninit =		ncl_uninit,
130	.vfs_unmount =		nfs_unmount,
131	.vfs_sysctl =		nfs_sysctl,
132};
133VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
134
135/* So that loader and kldload(2) can find us, wherever we are.. */
136MODULE_VERSION(nfs, 1);
137MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
138MODULE_DEPEND(nfs, krpc, 1, 1, 1);
139MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
140MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
141
142/*
143 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
144 * can be shared by both NFS clients. It is declared here so that it
145 * will be defined for kernels built without NFS_ROOT, although it
146 * isn't used in that case.
147 */
148#if !defined(NFS_ROOT) && !defined(NFSCLIENT)
149struct nfs_diskless	nfs_diskless = { { { 0 } } };
150struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
151int			nfs_diskless_valid = 0;
152#endif
153
154SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
155    &nfs_diskless_valid, 0,
156    "Has the diskless struct been filled correctly");
157
158SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
159    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
160
161SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
162    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
163    "%Ssockaddr_in", "Diskless root nfs address");
164
165
166void		newnfsargs_ntoh(struct nfs_args *);
167static int	nfs_mountdiskless(char *,
168		    struct sockaddr_in *, struct nfs_args *,
169		    struct thread *, struct vnode **, struct mount *);
170static void	nfs_convert_diskless(void);
171static void	nfs_convert_oargs(struct nfs_args *args,
172		    struct onfs_args *oargs);
173
174int
175newnfs_iosize(struct nfsmount *nmp)
176{
177	int iosize, maxio;
178
179	/* First, set the upper limit for iosize */
180	if (nmp->nm_flag & NFSMNT_NFSV4) {
181		maxio = NFS_MAXBSIZE;
182	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
183		if (nmp->nm_sotype == SOCK_DGRAM)
184			maxio = NFS_MAXDGRAMDATA;
185		else
186			maxio = NFS_MAXBSIZE;
187	} else {
188		maxio = NFS_V2MAXDATA;
189	}
190	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
191		nmp->nm_rsize = maxio;
192	if (nmp->nm_rsize > MAXBSIZE)
193		nmp->nm_rsize = MAXBSIZE;
194	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
195		nmp->nm_readdirsize = maxio;
196	if (nmp->nm_readdirsize > nmp->nm_rsize)
197		nmp->nm_readdirsize = nmp->nm_rsize;
198	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
199		nmp->nm_wsize = maxio;
200	if (nmp->nm_wsize > MAXBSIZE)
201		nmp->nm_wsize = MAXBSIZE;
202
203	/*
204	 * Calculate the size used for io buffers.  Use the larger
205	 * of the two sizes to minimise nfs requests but make sure
206	 * that it is at least one VM page to avoid wasting buffer
207	 * space.
208	 */
209	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
210	iosize = imax(iosize, PAGE_SIZE);
211	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
212	return (iosize);
213}
214
215static void
216nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
217{
218
219	args->version = NFS_ARGSVERSION;
220	args->addr = oargs->addr;
221	args->addrlen = oargs->addrlen;
222	args->sotype = oargs->sotype;
223	args->proto = oargs->proto;
224	args->fh = oargs->fh;
225	args->fhsize = oargs->fhsize;
226	args->flags = oargs->flags;
227	args->wsize = oargs->wsize;
228	args->rsize = oargs->rsize;
229	args->readdirsize = oargs->readdirsize;
230	args->timeo = oargs->timeo;
231	args->retrans = oargs->retrans;
232	args->readahead = oargs->readahead;
233	args->hostname = oargs->hostname;
234}
235
236static void
237nfs_convert_diskless(void)
238{
239
240	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
241		sizeof(struct ifaliasreq));
242	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
243		sizeof(struct sockaddr_in));
244	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
245	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
246		nfsv3_diskless.root_fhsize = NFSX_MYFH;
247		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
248	} else {
249		nfsv3_diskless.root_fhsize = NFSX_V2FH;
250		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
251	}
252	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
253		sizeof(struct sockaddr_in));
254	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
255	nfsv3_diskless.root_time = nfs_diskless.root_time;
256	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
257		MAXHOSTNAMELEN);
258	nfs_diskless_valid = 3;
259}
260
261/*
262 * nfs statfs call
263 */
264static int
265nfs_statfs(struct mount *mp, struct statfs *sbp)
266{
267	struct vnode *vp;
268	struct thread *td;
269	struct nfsmount *nmp = VFSTONFS(mp);
270	struct nfsvattr nfsva;
271	struct nfsfsinfo fs;
272	struct nfsstatfs sb;
273	int error = 0, attrflag, gotfsinfo = 0, ret;
274	struct nfsnode *np;
275
276	td = curthread;
277
278	error = vfs_busy(mp, MBF_NOWAIT);
279	if (error)
280		return (error);
281	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
282	if (error) {
283		vfs_unbusy(mp);
284		return (error);
285	}
286	vp = NFSTOV(np);
287	mtx_lock(&nmp->nm_mtx);
288	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
289		mtx_unlock(&nmp->nm_mtx);
290		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
291		    &attrflag, NULL);
292		if (!error)
293			gotfsinfo = 1;
294	} else
295		mtx_unlock(&nmp->nm_mtx);
296	if (!error)
297		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
298		    &attrflag, NULL);
299	if (attrflag == 0) {
300		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
301		    td->td_ucred, td, &nfsva, NULL);
302		if (ret) {
303			/*
304			 * Just set default values to get things going.
305			 */
306			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
307			nfsva.na_vattr.va_type = VDIR;
308			nfsva.na_vattr.va_mode = 0777;
309			nfsva.na_vattr.va_nlink = 100;
310			nfsva.na_vattr.va_uid = (uid_t)0;
311			nfsva.na_vattr.va_gid = (gid_t)0;
312			nfsva.na_vattr.va_fileid = 2;
313			nfsva.na_vattr.va_gen = 1;
314			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
315			nfsva.na_vattr.va_size = 512 * 1024;
316		}
317	}
318	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
319	if (!error) {
320	    mtx_lock(&nmp->nm_mtx);
321	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
322		nfscl_loadfsinfo(nmp, &fs);
323	    nfscl_loadsbinfo(nmp, &sb, sbp);
324	    sbp->f_iosize = newnfs_iosize(nmp);
325	    mtx_unlock(&nmp->nm_mtx);
326	    if (sbp != &mp->mnt_stat) {
327		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
328		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
329	    }
330	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
331	} else if (NFS_ISV4(vp)) {
332		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
333	}
334	vput(vp);
335	vfs_unbusy(mp);
336	return (error);
337}
338
339/*
340 * nfs version 3 fsinfo rpc call
341 */
342int
343ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
344    struct thread *td)
345{
346	struct nfsfsinfo fs;
347	struct nfsvattr nfsva;
348	int error, attrflag;
349
350	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
351	if (!error) {
352		if (attrflag)
353			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
354			    1);
355		mtx_lock(&nmp->nm_mtx);
356		nfscl_loadfsinfo(nmp, &fs);
357		mtx_unlock(&nmp->nm_mtx);
358	}
359	return (error);
360}
361
362/*
363 * Mount a remote root fs via. nfs. This depends on the info in the
364 * nfs_diskless structure that has been filled in properly by some primary
365 * bootstrap.
366 * It goes something like this:
367 * - do enough of "ifconfig" by calling ifioctl() so that the system
368 *   can talk to the server
369 * - If nfs_diskless.mygateway is filled in, use that address as
370 *   a default gateway.
371 * - build the rootfs mount point and call mountnfs() to do the rest.
372 *
373 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
374 * structure, as well as other global NFS client variables here, as
375 * nfs_mountroot() will be called once in the boot before any other NFS
376 * client activity occurs.
377 */
378static int
379nfs_mountroot(struct mount *mp)
380{
381	struct thread *td = curthread;
382	struct nfsv3_diskless *nd = &nfsv3_diskless;
383	struct socket *so;
384	struct vnode *vp;
385	struct ifreq ir;
386	int error;
387	u_long l;
388	char buf[128];
389	char *cp;
390
391#if defined(BOOTP_NFSROOT) && defined(BOOTP)
392	bootpc_init();		/* use bootp to get nfs_diskless filled in */
393#elif defined(NFS_ROOT)
394	nfs_setup_diskless();
395#endif
396
397	if (nfs_diskless_valid == 0)
398		return (-1);
399	if (nfs_diskless_valid == 1)
400		nfs_convert_diskless();
401
402	/*
403	 * XXX splnet, so networks will receive...
404	 */
405	splnet();
406
407	/*
408	 * Do enough of ifconfig(8) so that the critical net interface can
409	 * talk to the server.
410	 */
411	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
412	    td->td_ucred, td);
413	if (error)
414		panic("nfs_mountroot: socreate(%04x): %d",
415			nd->myif.ifra_addr.sa_family, error);
416
417#if 0 /* XXX Bad idea */
418	/*
419	 * We might not have been told the right interface, so we pass
420	 * over the first ten interfaces of the same kind, until we get
421	 * one of them configured.
422	 */
423
424	for (i = strlen(nd->myif.ifra_name) - 1;
425		nd->myif.ifra_name[i] >= '0' &&
426		nd->myif.ifra_name[i] <= '9';
427		nd->myif.ifra_name[i] ++) {
428		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
429		if(!error)
430			break;
431	}
432#endif
433	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
434	if (error)
435		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
436	if ((cp = getenv("boot.netif.mtu")) != NULL) {
437		ir.ifr_mtu = strtol(cp, NULL, 10);
438		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
439		freeenv(cp);
440		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
441		if (error)
442			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
443	}
444	soclose(so);
445
446	/*
447	 * If the gateway field is filled in, set it as the default route.
448	 * Note that pxeboot will set a default route of 0 if the route
449	 * is not set by the DHCP server.  Check also for a value of 0
450	 * to avoid panicking inappropriately in that situation.
451	 */
452	if (nd->mygateway.sin_len != 0 &&
453	    nd->mygateway.sin_addr.s_addr != 0) {
454		struct sockaddr_in mask, sin;
455
456		bzero((caddr_t)&mask, sizeof(mask));
457		sin = mask;
458		sin.sin_family = AF_INET;
459		sin.sin_len = sizeof(sin);
460                /* XXX MRT use table 0 for this sort of thing */
461		CURVNET_SET(TD_TO_VNET(td));
462		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
463		    (struct sockaddr *)&nd->mygateway,
464		    (struct sockaddr *)&mask,
465		    RTF_UP | RTF_GATEWAY, NULL);
466		CURVNET_RESTORE();
467		if (error)
468			panic("nfs_mountroot: RTM_ADD: %d", error);
469	}
470
471	/*
472	 * Create the rootfs mount point.
473	 */
474	nd->root_args.fh = nd->root_fh;
475	nd->root_args.fhsize = nd->root_fhsize;
476	l = ntohl(nd->root_saddr.sin_addr.s_addr);
477	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
478		(l >> 24) & 0xff, (l >> 16) & 0xff,
479		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
480	printf("NFS ROOT: %s\n", buf);
481	nd->root_args.hostname = buf;
482	if ((error = nfs_mountdiskless(buf,
483	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
484		return (error);
485	}
486
487	/*
488	 * This is not really an nfs issue, but it is much easier to
489	 * set hostname here and then let the "/etc/rc.xxx" files
490	 * mount the right /var based upon its preset value.
491	 */
492	mtx_lock(&prison0.pr_mtx);
493	strlcpy(prison0.pr_hostname, nd->my_hostnam,
494	    sizeof(prison0.pr_hostname));
495	mtx_unlock(&prison0.pr_mtx);
496	inittodr(ntohl(nd->root_time));
497	return (0);
498}
499
500/*
501 * Internal version of mount system call for diskless setup.
502 */
503static int
504nfs_mountdiskless(char *path,
505    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
506    struct vnode **vpp, struct mount *mp)
507{
508	struct sockaddr *nam;
509	int dirlen, error;
510	char *dirpath;
511
512	/*
513	 * Find the directory path in "path", which also has the server's
514	 * name/ip address in it.
515	 */
516	dirpath = strchr(path, ':');
517	if (dirpath != NULL)
518		dirlen = strlen(++dirpath);
519	else
520		dirlen = 0;
521	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
522	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
523	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
524		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
525		return (error);
526	}
527	return (0);
528}
529
530static void
531nfs_sec_name(char *sec, int *flagsp)
532{
533	if (!strcmp(sec, "krb5"))
534		*flagsp |= NFSMNT_KERB;
535	else if (!strcmp(sec, "krb5i"))
536		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
537	else if (!strcmp(sec, "krb5p"))
538		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
539}
540
541static void
542nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
543    const char *hostname, struct ucred *cred, struct thread *td)
544{
545	int s;
546	int adjsock;
547	char *p;
548
549	s = splnet();
550
551	/*
552	 * Set read-only flag if requested; otherwise, clear it if this is
553	 * an update.  If this is not an update, then either the read-only
554	 * flag is already clear, or this is a root mount and it was set
555	 * intentionally at some previous point.
556	 */
557	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
558		MNT_ILOCK(mp);
559		mp->mnt_flag |= MNT_RDONLY;
560		MNT_IUNLOCK(mp);
561	} else if (mp->mnt_flag & MNT_UPDATE) {
562		MNT_ILOCK(mp);
563		mp->mnt_flag &= ~MNT_RDONLY;
564		MNT_IUNLOCK(mp);
565	}
566
567	/*
568	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
569	 * no sense in that context.  Also, set up appropriate retransmit
570	 * and soft timeout behavior.
571	 */
572	if (argp->sotype == SOCK_STREAM) {
573		nmp->nm_flag &= ~NFSMNT_NOCONN;
574		nmp->nm_timeo = NFS_MAXTIMEO;
575		if ((argp->flags & NFSMNT_NFSV4) != 0)
576			nmp->nm_retry = INT_MAX;
577		else
578			nmp->nm_retry = NFS_RETRANS_TCP;
579	}
580
581	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
582	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
583		argp->flags &= ~NFSMNT_RDIRPLUS;
584		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
585	}
586
587	/* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */
588	if ((argp->flags & NFSMNT_NFSV4) != 0) {
589		argp->flags &= ~NFSMNT_RESVPORT;
590		nmp->nm_flag &= ~NFSMNT_RESVPORT;
591	}
592
593	/* Re-bind if rsrvd port requested and wasn't on one */
594	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
595		  && (argp->flags & NFSMNT_RESVPORT);
596	/* Also re-bind if we're switching to/from a connected UDP socket */
597	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
598		    (argp->flags & NFSMNT_NOCONN));
599
600	/* Update flags atomically.  Don't change the lock bits. */
601	nmp->nm_flag = argp->flags | nmp->nm_flag;
602	splx(s);
603
604	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
605		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
606		if (nmp->nm_timeo < NFS_MINTIMEO)
607			nmp->nm_timeo = NFS_MINTIMEO;
608		else if (nmp->nm_timeo > NFS_MAXTIMEO)
609			nmp->nm_timeo = NFS_MAXTIMEO;
610	}
611
612	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
613		nmp->nm_retry = argp->retrans;
614		if (nmp->nm_retry > NFS_MAXREXMIT)
615			nmp->nm_retry = NFS_MAXREXMIT;
616	}
617
618	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
619		nmp->nm_wsize = argp->wsize;
620		/* Round down to multiple of blocksize */
621		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
622		if (nmp->nm_wsize <= 0)
623			nmp->nm_wsize = NFS_FABLKSIZE;
624	}
625
626	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
627		nmp->nm_rsize = argp->rsize;
628		/* Round down to multiple of blocksize */
629		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
630		if (nmp->nm_rsize <= 0)
631			nmp->nm_rsize = NFS_FABLKSIZE;
632	}
633
634	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
635		nmp->nm_readdirsize = argp->readdirsize;
636	}
637
638	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
639		nmp->nm_acregmin = argp->acregmin;
640	else
641		nmp->nm_acregmin = NFS_MINATTRTIMO;
642	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
643		nmp->nm_acregmax = argp->acregmax;
644	else
645		nmp->nm_acregmax = NFS_MAXATTRTIMO;
646	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
647		nmp->nm_acdirmin = argp->acdirmin;
648	else
649		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
650	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
651		nmp->nm_acdirmax = argp->acdirmax;
652	else
653		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
654	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
655		nmp->nm_acdirmin = nmp->nm_acdirmax;
656	if (nmp->nm_acregmin > nmp->nm_acregmax)
657		nmp->nm_acregmin = nmp->nm_acregmax;
658
659	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
660		if (argp->readahead <= NFS_MAXRAHEAD)
661			nmp->nm_readahead = argp->readahead;
662		else
663			nmp->nm_readahead = NFS_MAXRAHEAD;
664	}
665	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
666		if (argp->wcommitsize < nmp->nm_wsize)
667			nmp->nm_wcommitsize = nmp->nm_wsize;
668		else
669			nmp->nm_wcommitsize = argp->wcommitsize;
670	}
671
672	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
673		    (nmp->nm_soproto != argp->proto));
674
675	if (nmp->nm_client != NULL && adjsock) {
676		int haslock = 0, error = 0;
677
678		if (nmp->nm_sotype == SOCK_STREAM) {
679			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
680			if (!error)
681				haslock = 1;
682		}
683		if (!error) {
684		    newnfs_disconnect(&nmp->nm_sockreq);
685		    if (haslock)
686			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
687		    nmp->nm_sotype = argp->sotype;
688		    nmp->nm_soproto = argp->proto;
689		    if (nmp->nm_sotype == SOCK_DGRAM)
690			while (newnfs_connect(nmp, &nmp->nm_sockreq,
691			    cred, td, 0)) {
692				printf("newnfs_args: retrying connect\n");
693				(void) nfs_catnap(PSOCK, 0, "newnfscon");
694			}
695		}
696	} else {
697		nmp->nm_sotype = argp->sotype;
698		nmp->nm_soproto = argp->proto;
699	}
700
701	if (hostname != NULL) {
702		strlcpy(nmp->nm_hostname, hostname,
703		    sizeof(nmp->nm_hostname));
704		p = strchr(nmp->nm_hostname, ':');
705		if (p != NULL)
706			*p = '\0';
707	}
708}
709
710static const char *nfs_opts[] = { "from", "nfs_args",
711    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
712    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
713    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
714    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
715    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
716    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
717    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
718    "negnametimeo", "nocto",
719    NULL };
720
721/*
722 * VFS Operations.
723 *
724 * mount system call
725 * It seems a bit dumb to copyinstr() the host and path here and then
726 * bcopy() them in mountnfs(), but I wanted to detect errors before
727 * doing the sockargs() call because sockargs() allocates an mbuf and
728 * an error after that means that I have to release the mbuf.
729 */
730/* ARGSUSED */
731static int
732nfs_mount(struct mount *mp)
733{
734	struct nfs_args args = {
735	    .version = NFS_ARGSVERSION,
736	    .addr = NULL,
737	    .addrlen = sizeof (struct sockaddr_in),
738	    .sotype = SOCK_STREAM,
739	    .proto = 0,
740	    .fh = NULL,
741	    .fhsize = 0,
742	    .flags = NFSMNT_RESVPORT,
743	    .wsize = NFS_WSIZE,
744	    .rsize = NFS_RSIZE,
745	    .readdirsize = NFS_READDIRSIZE,
746	    .timeo = 10,
747	    .retrans = NFS_RETRANS,
748	    .readahead = NFS_DEFRAHEAD,
749	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
750	    .hostname = NULL,
751	    .acregmin = NFS_MINATTRTIMO,
752	    .acregmax = NFS_MAXATTRTIMO,
753	    .acdirmin = NFS_MINDIRATTRTIMO,
754	    .acdirmax = NFS_MAXDIRATTRTIMO,
755	};
756	int error = 0, ret, len;
757	struct sockaddr *nam = NULL;
758	struct vnode *vp;
759	struct thread *td;
760	char hst[MNAMELEN];
761	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
762	char *opt, *name, *secname;
763	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
764	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
765	size_t hstlen;
766
767	has_nfs_args_opt = 0;
768	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
769		error = EINVAL;
770		goto out;
771	}
772
773	td = curthread;
774	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
775		error = nfs_mountroot(mp);
776		goto out;
777	}
778
779	nfscl_init();
780
781	/*
782	 * The old mount_nfs program passed the struct nfs_args
783	 * from userspace to kernel.  The new mount_nfs program
784	 * passes string options via nmount() from userspace to kernel
785	 * and we populate the struct nfs_args in the kernel.
786	 */
787	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
788		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
789		    sizeof(args));
790		if (error != 0)
791			goto out;
792
793		if (args.version != NFS_ARGSVERSION) {
794			error = EPROGMISMATCH;
795			goto out;
796		}
797		has_nfs_args_opt = 1;
798	}
799
800	/* Handle the new style options. */
801	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
802		args.flags |= NFSMNT_NOCONN;
803	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
804		args.flags |= NFSMNT_NOCONN;
805	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
806		args.flags |= NFSMNT_NOLOCKD;
807	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
808		args.flags &= ~NFSMNT_NOLOCKD;
809	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
810		args.flags |= NFSMNT_INT;
811	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
812		args.flags |= NFSMNT_RDIRPLUS;
813	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
814		args.flags |= NFSMNT_RESVPORT;
815	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
816		args.flags &= ~NFSMNT_RESVPORT;
817	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
818		args.flags |= NFSMNT_SOFT;
819	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
820		args.flags &= ~NFSMNT_SOFT;
821	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
822		args.sotype = SOCK_DGRAM;
823	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
824		args.sotype = SOCK_DGRAM;
825	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
826		args.sotype = SOCK_STREAM;
827	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
828		args.flags |= NFSMNT_NFSV3;
829	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
830		args.flags |= NFSMNT_NFSV4;
831		args.sotype = SOCK_STREAM;
832	}
833	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
834		args.flags |= NFSMNT_ALLGSSNAME;
835	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
836		args.flags |= NFSMNT_NOCTO;
837	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
838		if (opt == NULL) {
839			vfs_mount_error(mp, "illegal readdirsize");
840			error = EINVAL;
841			goto out;
842		}
843		ret = sscanf(opt, "%d", &args.readdirsize);
844		if (ret != 1 || args.readdirsize <= 0) {
845			vfs_mount_error(mp, "illegal readdirsize: %s",
846			    opt);
847			error = EINVAL;
848			goto out;
849		}
850		args.flags |= NFSMNT_READDIRSIZE;
851	}
852	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
853		if (opt == NULL) {
854			vfs_mount_error(mp, "illegal readahead");
855			error = EINVAL;
856			goto out;
857		}
858		ret = sscanf(opt, "%d", &args.readahead);
859		if (ret != 1 || args.readahead <= 0) {
860			vfs_mount_error(mp, "illegal readahead: %s",
861			    opt);
862			error = EINVAL;
863			goto out;
864		}
865		args.flags |= NFSMNT_READAHEAD;
866	}
867	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
868		if (opt == NULL) {
869			vfs_mount_error(mp, "illegal wsize");
870			error = EINVAL;
871			goto out;
872		}
873		ret = sscanf(opt, "%d", &args.wsize);
874		if (ret != 1 || args.wsize <= 0) {
875			vfs_mount_error(mp, "illegal wsize: %s",
876			    opt);
877			error = EINVAL;
878			goto out;
879		}
880		args.flags |= NFSMNT_WSIZE;
881	}
882	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
883		if (opt == NULL) {
884			vfs_mount_error(mp, "illegal rsize");
885			error = EINVAL;
886			goto out;
887		}
888		ret = sscanf(opt, "%d", &args.rsize);
889		if (ret != 1 || args.rsize <= 0) {
890			vfs_mount_error(mp, "illegal wsize: %s",
891			    opt);
892			error = EINVAL;
893			goto out;
894		}
895		args.flags |= NFSMNT_RSIZE;
896	}
897	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
898		if (opt == NULL) {
899			vfs_mount_error(mp, "illegal retrans");
900			error = EINVAL;
901			goto out;
902		}
903		ret = sscanf(opt, "%d", &args.retrans);
904		if (ret != 1 || args.retrans <= 0) {
905			vfs_mount_error(mp, "illegal retrans: %s",
906			    opt);
907			error = EINVAL;
908			goto out;
909		}
910		args.flags |= NFSMNT_RETRANS;
911	}
912	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
913		ret = sscanf(opt, "%d", &args.acregmin);
914		if (ret != 1 || args.acregmin < 0) {
915			vfs_mount_error(mp, "illegal acregmin: %s",
916			    opt);
917			error = EINVAL;
918			goto out;
919		}
920		args.flags |= NFSMNT_ACREGMIN;
921	}
922	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
923		ret = sscanf(opt, "%d", &args.acregmax);
924		if (ret != 1 || args.acregmax < 0) {
925			vfs_mount_error(mp, "illegal acregmax: %s",
926			    opt);
927			error = EINVAL;
928			goto out;
929		}
930		args.flags |= NFSMNT_ACREGMAX;
931	}
932	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
933		ret = sscanf(opt, "%d", &args.acdirmin);
934		if (ret != 1 || args.acdirmin < 0) {
935			vfs_mount_error(mp, "illegal acdirmin: %s",
936			    opt);
937			error = EINVAL;
938			goto out;
939		}
940		args.flags |= NFSMNT_ACDIRMIN;
941	}
942	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
943		ret = sscanf(opt, "%d", &args.acdirmax);
944		if (ret != 1 || args.acdirmax < 0) {
945			vfs_mount_error(mp, "illegal acdirmax: %s",
946			    opt);
947			error = EINVAL;
948			goto out;
949		}
950		args.flags |= NFSMNT_ACDIRMAX;
951	}
952	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
953		ret = sscanf(opt, "%d", &args.timeo);
954		if (ret != 1 || args.timeo <= 0) {
955			vfs_mount_error(mp, "illegal timeout: %s",
956			    opt);
957			error = EINVAL;
958			goto out;
959		}
960		args.flags |= NFSMNT_TIMEO;
961	}
962	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
963	    == 0) {
964		ret = sscanf(opt, "%d", &negnametimeo);
965		if (ret != 1 || negnametimeo < 0) {
966			vfs_mount_error(mp, "illegal negnametimeo: %s",
967			    opt);
968			error = EINVAL;
969			goto out;
970		}
971	}
972	if (vfs_getopt(mp->mnt_optnew, "sec",
973		(void **) &secname, NULL) == 0)
974		nfs_sec_name(secname, &args.flags);
975
976	if (mp->mnt_flag & MNT_UPDATE) {
977		struct nfsmount *nmp = VFSTONFS(mp);
978
979		if (nmp == NULL) {
980			error = EIO;
981			goto out;
982		}
983		/*
984		 * When doing an update, we can't change version,
985		 * security, switch lockd strategies or change cookie
986		 * translation
987		 */
988		args.flags = (args.flags &
989		    ~(NFSMNT_NFSV3 |
990		      NFSMNT_NFSV4 |
991		      NFSMNT_KERB |
992		      NFSMNT_INTEGRITY |
993		      NFSMNT_PRIVACY |
994		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
995		    (nmp->nm_flag &
996			(NFSMNT_NFSV3 |
997			 NFSMNT_NFSV4 |
998			 NFSMNT_KERB |
999			 NFSMNT_INTEGRITY |
1000			 NFSMNT_PRIVACY |
1001			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1002		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1003		goto out;
1004	}
1005
1006	/*
1007	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1008	 * or no-connection mode for those protocols that support
1009	 * no-connection mode (the flag will be cleared later for protocols
1010	 * that do not support no-connection mode).  This will allow a client
1011	 * to receive replies from a different IP then the request was
1012	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1013	 * not 0.
1014	 */
1015	if (nfs_ip_paranoia == 0)
1016		args.flags |= NFSMNT_NOCONN;
1017
1018	if (has_nfs_args_opt != 0) {
1019		/*
1020		 * In the 'nfs_args' case, the pointers in the args
1021		 * structure are in userland - we copy them in here.
1022		 */
1023		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1024			vfs_mount_error(mp, "Bad file handle");
1025			error = EINVAL;
1026			goto out;
1027		}
1028		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1029		    args.fhsize);
1030		if (error != 0)
1031			goto out;
1032		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1033		if (error != 0)
1034			goto out;
1035		bzero(&hst[hstlen], MNAMELEN - hstlen);
1036		args.hostname = hst;
1037		/* sockargs() call must be after above copyin() calls */
1038		error = getsockaddr(&nam, (caddr_t)args.addr,
1039		    args.addrlen);
1040		if (error != 0)
1041			goto out;
1042	} else {
1043		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1044		    &args.fhsize) == 0) {
1045			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1046				vfs_mount_error(mp, "Bad file handle");
1047				error = EINVAL;
1048				goto out;
1049			}
1050			bcopy(args.fh, nfh, args.fhsize);
1051		} else {
1052			args.fhsize = 0;
1053		}
1054		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1055		    (void **)&args.hostname, &len);
1056		if (args.hostname == NULL) {
1057			vfs_mount_error(mp, "Invalid hostname");
1058			error = EINVAL;
1059			goto out;
1060		}
1061		bcopy(args.hostname, hst, MNAMELEN);
1062		hst[MNAMELEN - 1] = '\0';
1063	}
1064
1065	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1066		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1067	else
1068		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1069	srvkrbnamelen = strlen(srvkrbname);
1070
1071	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1072		strlcpy(krbname, name, sizeof (krbname));
1073	else
1074		krbname[0] = '\0';
1075	krbnamelen = strlen(krbname);
1076
1077	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1078		strlcpy(dirpath, name, sizeof (dirpath));
1079	else
1080		dirpath[0] = '\0';
1081	dirlen = strlen(dirpath);
1082
1083	if (has_nfs_args_opt == 0) {
1084		if (vfs_getopt(mp->mnt_optnew, "addr",
1085		    (void **)&args.addr, &args.addrlen) == 0) {
1086			if (args.addrlen > SOCK_MAXADDRLEN) {
1087				error = ENAMETOOLONG;
1088				goto out;
1089			}
1090			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1091			bcopy(args.addr, nam, args.addrlen);
1092			nam->sa_len = args.addrlen;
1093		} else {
1094			vfs_mount_error(mp, "No server address");
1095			error = EINVAL;
1096			goto out;
1097		}
1098	}
1099
1100	args.fh = nfh;
1101	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1102	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1103	    negnametimeo);
1104out:
1105	if (!error) {
1106		MNT_ILOCK(mp);
1107		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1108		MNT_IUNLOCK(mp);
1109	}
1110	return (error);
1111}
1112
1113
1114/*
1115 * VFS Operations.
1116 *
1117 * mount system call
1118 * It seems a bit dumb to copyinstr() the host and path here and then
1119 * bcopy() them in mountnfs(), but I wanted to detect errors before
1120 * doing the sockargs() call because sockargs() allocates an mbuf and
1121 * an error after that means that I have to release the mbuf.
1122 */
1123/* ARGSUSED */
1124static int
1125nfs_cmount(struct mntarg *ma, void *data, int flags)
1126{
1127	int error;
1128	struct nfs_args args;
1129
1130	error = copyin(data, &args, sizeof (struct nfs_args));
1131	if (error)
1132		return error;
1133
1134	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1135
1136	error = kernel_mount(ma, flags);
1137	return (error);
1138}
1139
1140/*
1141 * Common code for mount and mountroot
1142 */
1143static int
1144mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1145    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1146    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1147    struct ucred *cred, struct thread *td, int negnametimeo)
1148{
1149	struct nfsmount *nmp;
1150	struct nfsnode *np;
1151	int error, trycnt, ret;
1152	struct nfsvattr nfsva;
1153	static u_int64_t clval = 0;
1154
1155	if (mp->mnt_flag & MNT_UPDATE) {
1156		nmp = VFSTONFS(mp);
1157		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1158		FREE(nam, M_SONAME);
1159		return (0);
1160	} else {
1161		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1162		    krbnamelen + dirlen + srvkrbnamelen + 2,
1163		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1164		TAILQ_INIT(&nmp->nm_bufq);
1165		if (clval == 0)
1166			clval = (u_int64_t)nfsboottime.tv_sec;
1167		nmp->nm_clval = clval++;
1168		nmp->nm_krbnamelen = krbnamelen;
1169		nmp->nm_dirpathlen = dirlen;
1170		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1171		if (td->td_ucred->cr_uid != (uid_t)0) {
1172			/*
1173			 * nm_uid is used to get KerberosV credentials for
1174			 * the nfsv4 state handling operations if there is
1175			 * no host based principal set. Use the uid of
1176			 * this user if not root, since they are doing the
1177			 * mount. I don't think setting this for root will
1178			 * work, since root normally does not have user
1179			 * credentials in a credentials cache.
1180			 */
1181			nmp->nm_uid = td->td_ucred->cr_uid;
1182		} else {
1183			/*
1184			 * Just set to -1, so it won't be used.
1185			 */
1186			nmp->nm_uid = (uid_t)-1;
1187		}
1188
1189		/* Copy and null terminate all the names */
1190		if (nmp->nm_krbnamelen > 0) {
1191			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1192			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1193		}
1194		if (nmp->nm_dirpathlen > 0) {
1195			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1196			    nmp->nm_dirpathlen);
1197			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1198			    + 1] = '\0';
1199		}
1200		if (nmp->nm_srvkrbnamelen > 0) {
1201			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1202			    nmp->nm_srvkrbnamelen);
1203			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1204			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1205		}
1206		nmp->nm_sockreq.nr_cred = crhold(cred);
1207		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1208		mp->mnt_data = nmp;
1209		nmp->nm_getinfo = nfs_getnlminfo;
1210		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1211	}
1212	vfs_getnewfsid(mp);
1213	nmp->nm_mountp = mp;
1214	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1215	nmp->nm_negnametimeo = negnametimeo;
1216
1217	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1218
1219	/*
1220	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1221	 * high, depending on whether we end up with negative offsets in
1222	 * the client or server somewhere.  2GB-1 may be safer.
1223	 *
1224	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1225	 * that we can handle until we find out otherwise.
1226	 * XXX Our "safe" limit on the client is what we can store in our
1227	 * buffer cache using signed(!) block numbers.
1228	 */
1229	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1230		nmp->nm_maxfilesize = 0xffffffffLL;
1231	else
1232		nmp->nm_maxfilesize = OFF_MAX;
1233
1234	nmp->nm_timeo = NFS_TIMEO;
1235	nmp->nm_retry = NFS_RETRANS;
1236	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1237		nmp->nm_wsize = NFS_WSIZE;
1238		nmp->nm_rsize = NFS_RSIZE;
1239		nmp->nm_readdirsize = NFS_READDIRSIZE;
1240	}
1241	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1242	nmp->nm_numgrps = NFS_MAXGRPS;
1243	nmp->nm_readahead = NFS_DEFRAHEAD;
1244	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1245	if (nmp->nm_tprintf_delay < 0)
1246		nmp->nm_tprintf_delay = 0;
1247	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1248	if (nmp->nm_tprintf_initial_delay < 0)
1249		nmp->nm_tprintf_initial_delay = 0;
1250	nmp->nm_fhsize = argp->fhsize;
1251	if (nmp->nm_fhsize > 0)
1252		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1253	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1254	nmp->nm_nam = nam;
1255	/* Set up the sockets and per-host congestion */
1256	nmp->nm_sotype = argp->sotype;
1257	nmp->nm_soproto = argp->proto;
1258	nmp->nm_sockreq.nr_prog = NFS_PROG;
1259	if ((argp->flags & NFSMNT_NFSV4))
1260		nmp->nm_sockreq.nr_vers = NFS_VER4;
1261	else if ((argp->flags & NFSMNT_NFSV3))
1262		nmp->nm_sockreq.nr_vers = NFS_VER3;
1263	else
1264		nmp->nm_sockreq.nr_vers = NFS_VER2;
1265
1266
1267	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1268		goto bad;
1269
1270	/*
1271	 * A reference count is needed on the nfsnode representing the
1272	 * remote root.  If this object is not persistent, then backward
1273	 * traversals of the mount point (i.e. "..") will not work if
1274	 * the nfsnode gets flushed out of the cache. Ufs does not have
1275	 * this problem, because one can identify root inodes by their
1276	 * number == ROOTINO (2).
1277	 */
1278	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1279	    nmp->nm_dirpathlen > 0) {
1280		/*
1281		 * If the fhsize on the mount point == 0 for V4, the mount
1282		 * path needs to be looked up.
1283		 */
1284		trycnt = 3;
1285		do {
1286			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1287			    cred, td);
1288			if (error)
1289				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1290		} while (error && --trycnt > 0);
1291		if (error) {
1292			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1293			goto bad;
1294		}
1295	}
1296	if (nmp->nm_fhsize > 0) {
1297		/*
1298		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1299		 * non-zero for the root vnode. f_iosize will be set correctly
1300		 * by nfs_statfs() before any I/O occurs.
1301		 */
1302		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1303		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1304		    LK_EXCLUSIVE);
1305		if (error)
1306			goto bad;
1307		*vpp = NFSTOV(np);
1308
1309		/*
1310		 * Get file attributes and transfer parameters for the
1311		 * mountpoint.  This has the side effect of filling in
1312		 * (*vpp)->v_type with the correct value.
1313		 */
1314		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1315		    cred, td, &nfsva, NULL);
1316		if (ret) {
1317			/*
1318			 * Just set default values to get things going.
1319			 */
1320			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1321			nfsva.na_vattr.va_type = VDIR;
1322			nfsva.na_vattr.va_mode = 0777;
1323			nfsva.na_vattr.va_nlink = 100;
1324			nfsva.na_vattr.va_uid = (uid_t)0;
1325			nfsva.na_vattr.va_gid = (gid_t)0;
1326			nfsva.na_vattr.va_fileid = 2;
1327			nfsva.na_vattr.va_gen = 1;
1328			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1329			nfsva.na_vattr.va_size = 512 * 1024;
1330		}
1331		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1332		if (argp->flags & NFSMNT_NFSV3)
1333			ncl_fsinfo(nmp, *vpp, cred, td);
1334
1335		/* Mark if the mount point supports NFSv4 ACLs. */
1336		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1337		    ret == 0 &&
1338		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1339			MNT_ILOCK(mp);
1340			mp->mnt_flag |= MNT_NFS4ACLS;
1341			MNT_IUNLOCK(mp);
1342		}
1343
1344		/*
1345		 * Lose the lock but keep the ref.
1346		 */
1347		NFSVOPUNLOCK(*vpp, 0);
1348		return (0);
1349	}
1350	error = EIO;
1351
1352bad:
1353	newnfs_disconnect(&nmp->nm_sockreq);
1354	crfree(nmp->nm_sockreq.nr_cred);
1355	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1356	mtx_destroy(&nmp->nm_mtx);
1357	FREE(nmp, M_NEWNFSMNT);
1358	FREE(nam, M_SONAME);
1359	return (error);
1360}
1361
1362/*
1363 * unmount system call
1364 */
1365static int
1366nfs_unmount(struct mount *mp, int mntflags)
1367{
1368	struct thread *td;
1369	struct nfsmount *nmp;
1370	int error, flags = 0, trycnt = 0;
1371
1372	td = curthread;
1373
1374	if (mntflags & MNT_FORCE)
1375		flags |= FORCECLOSE;
1376	nmp = VFSTONFS(mp);
1377	/*
1378	 * Goes something like this..
1379	 * - Call vflush() to clear out vnodes for this filesystem
1380	 * - Close the socket
1381	 * - Free up the data structures
1382	 */
1383	/* In the forced case, cancel any outstanding requests. */
1384	if (mntflags & MNT_FORCE) {
1385		error = newnfs_nmcancelreqs(nmp);
1386		if (error)
1387			goto out;
1388		/* For a forced close, get rid of the renew thread now */
1389		nfscl_umount(nmp, td);
1390	}
1391	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1392	do {
1393		error = vflush(mp, 1, flags, td);
1394		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1395			(void) nfs_catnap(PSOCK, error, "newndm");
1396	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1397	if (error)
1398		goto out;
1399
1400	/*
1401	 * We are now committed to the unmount.
1402	 */
1403	if ((mntflags & MNT_FORCE) == 0)
1404		nfscl_umount(nmp, td);
1405	newnfs_disconnect(&nmp->nm_sockreq);
1406	crfree(nmp->nm_sockreq.nr_cred);
1407	FREE(nmp->nm_nam, M_SONAME);
1408
1409	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1410	mtx_destroy(&nmp->nm_mtx);
1411	FREE(nmp, M_NEWNFSMNT);
1412out:
1413	return (error);
1414}
1415
1416/*
1417 * Return root of a filesystem
1418 */
1419static int
1420nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1421{
1422	struct vnode *vp;
1423	struct nfsmount *nmp;
1424	struct nfsnode *np;
1425	int error;
1426
1427	nmp = VFSTONFS(mp);
1428	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1429	if (error)
1430		return error;
1431	vp = NFSTOV(np);
1432	/*
1433	 * Get transfer parameters and attributes for root vnode once.
1434	 */
1435	mtx_lock(&nmp->nm_mtx);
1436	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1437		mtx_unlock(&nmp->nm_mtx);
1438		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1439	} else
1440		mtx_unlock(&nmp->nm_mtx);
1441	if (vp->v_type == VNON)
1442	    vp->v_type = VDIR;
1443	vp->v_vflag |= VV_ROOT;
1444	*vpp = vp;
1445	return (0);
1446}
1447
1448/*
1449 * Flush out the buffer cache
1450 */
1451/* ARGSUSED */
1452static int
1453nfs_sync(struct mount *mp, int waitfor)
1454{
1455	struct vnode *vp, *mvp;
1456	struct thread *td;
1457	int error, allerror = 0;
1458
1459	td = curthread;
1460
1461	MNT_ILOCK(mp);
1462	/*
1463	 * If a forced dismount is in progress, return from here so that
1464	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1465	 * calling VFS_UNMOUNT().
1466	 */
1467	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1468		MNT_IUNLOCK(mp);
1469		return (EBADF);
1470	}
1471
1472	/*
1473	 * Force stale buffer cache information to be flushed.
1474	 */
1475loop:
1476	MNT_VNODE_FOREACH(vp, mp, mvp) {
1477		VI_LOCK(vp);
1478		MNT_IUNLOCK(mp);
1479		/* XXX Racy bv_cnt check. */
1480		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1481		    waitfor == MNT_LAZY) {
1482			VI_UNLOCK(vp);
1483			MNT_ILOCK(mp);
1484			continue;
1485		}
1486		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1487			MNT_ILOCK(mp);
1488			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1489			goto loop;
1490		}
1491		error = VOP_FSYNC(vp, waitfor, td);
1492		if (error)
1493			allerror = error;
1494		NFSVOPUNLOCK(vp, 0);
1495		vrele(vp);
1496
1497		MNT_ILOCK(mp);
1498	}
1499	MNT_IUNLOCK(mp);
1500	return (allerror);
1501}
1502
1503static int
1504nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1505{
1506	struct nfsmount *nmp = VFSTONFS(mp);
1507	struct vfsquery vq;
1508	int error;
1509
1510	bzero(&vq, sizeof(vq));
1511	switch (op) {
1512#if 0
1513	case VFS_CTL_NOLOCKS:
1514		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1515 		if (req->oldptr != NULL) {
1516 			error = SYSCTL_OUT(req, &val, sizeof(val));
1517 			if (error)
1518 				return (error);
1519 		}
1520 		if (req->newptr != NULL) {
1521 			error = SYSCTL_IN(req, &val, sizeof(val));
1522 			if (error)
1523 				return (error);
1524			if (val)
1525				nmp->nm_flag |= NFSMNT_NOLOCKS;
1526			else
1527				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1528 		}
1529		break;
1530#endif
1531	case VFS_CTL_QUERY:
1532		mtx_lock(&nmp->nm_mtx);
1533		if (nmp->nm_state & NFSSTA_TIMEO)
1534			vq.vq_flags |= VQ_NOTRESP;
1535		mtx_unlock(&nmp->nm_mtx);
1536#if 0
1537		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1538		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1539			vq.vq_flags |= VQ_NOTRESPLOCK;
1540#endif
1541		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1542		break;
1543 	case VFS_CTL_TIMEO:
1544 		if (req->oldptr != NULL) {
1545 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1546 			    sizeof(nmp->nm_tprintf_initial_delay));
1547 			if (error)
1548 				return (error);
1549 		}
1550 		if (req->newptr != NULL) {
1551			error = vfs_suser(mp, req->td);
1552			if (error)
1553				return (error);
1554 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1555 			    sizeof(nmp->nm_tprintf_initial_delay));
1556 			if (error)
1557 				return (error);
1558 			if (nmp->nm_tprintf_initial_delay < 0)
1559 				nmp->nm_tprintf_initial_delay = 0;
1560 		}
1561		break;
1562	default:
1563		return (ENOTSUP);
1564	}
1565	return (0);
1566}
1567
1568/*
1569 * Extract the information needed by the nlm from the nfs vnode.
1570 */
1571static void
1572nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1573    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1574    struct timeval *timeop)
1575{
1576	struct nfsmount *nmp;
1577	struct nfsnode *np = VTONFS(vp);
1578
1579	nmp = VFSTONFS(vp->v_mount);
1580	if (fhlenp != NULL)
1581		*fhlenp = (size_t)np->n_fhp->nfh_len;
1582	if (fhp != NULL)
1583		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1584	if (sp != NULL)
1585		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1586	if (is_v3p != NULL)
1587		*is_v3p = NFS_ISV3(vp);
1588	if (sizep != NULL)
1589		*sizep = np->n_size;
1590	if (timeop != NULL) {
1591		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1592		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1593	}
1594}
1595
1596