nfs_clvfsops.c revision 216931
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 216931 2011-01-03 20:37:31Z rmacklem $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/lock.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/module.h>
53#include <sys/mount.h>
54#include <sys/proc.h>
55#include <sys/socket.h>
56#include <sys/socketvar.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/vnode.h>
60#include <sys/signalvar.h>
61
62#include <vm/vm.h>
63#include <vm/vm_extern.h>
64#include <vm/uma.h>
65
66#include <net/if.h>
67#include <net/route.h>
68#include <netinet/in.h>
69
70#include <fs/nfs/nfsport.h>
71#include <fs/nfsclient/nfsnode.h>
72#include <fs/nfsclient/nfsmount.h>
73#include <fs/nfsclient/nfs.h>
74#include <fs/nfsclient/nfsdiskless.h>
75
76extern int nfscl_ticks;
77extern struct timeval nfsboottime;
78extern struct nfsstats	newnfsstats;
79
80MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
81MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
82
83SYSCTL_DECL(_vfs_newnfs);
84SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
85	&newnfsstats, nfsstats, "S,nfsstats");
86static int nfs_ip_paranoia = 1;
87SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
88    &nfs_ip_paranoia, 0, "");
89static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
90SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
91        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
92/* how long between console messages "nfs server foo not responding" */
93static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
94SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
95        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
96
97static void	nfs_sec_name(char *, int *);
98static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
99		    struct nfs_args *argp, const char *, struct ucred *,
100		    struct thread *);
101static int	mountnfs(struct nfs_args *, struct mount *,
102		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
103		    struct vnode **, struct ucred *, struct thread *, int);
104static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
105		    struct sockaddr_storage *, int *, off_t *,
106		    struct timeval *);
107static vfs_mount_t nfs_mount;
108static vfs_cmount_t nfs_cmount;
109static vfs_unmount_t nfs_unmount;
110static vfs_root_t nfs_root;
111static vfs_statfs_t nfs_statfs;
112static vfs_sync_t nfs_sync;
113static vfs_sysctl_t nfs_sysctl;
114
115/*
116 * nfs vfs operations.
117 */
118static struct vfsops nfs_vfsops = {
119	.vfs_init =		ncl_init,
120	.vfs_mount =		nfs_mount,
121	.vfs_cmount =		nfs_cmount,
122	.vfs_root =		nfs_root,
123	.vfs_statfs =		nfs_statfs,
124	.vfs_sync =		nfs_sync,
125	.vfs_uninit =		ncl_uninit,
126	.vfs_unmount =		nfs_unmount,
127	.vfs_sysctl =		nfs_sysctl,
128};
129VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
130
131/* So that loader and kldload(2) can find us, wherever we are.. */
132MODULE_VERSION(newnfs, 1);
133
134/*
135 * This structure must be filled in by a primary bootstrap or bootstrap
136 * server for a diskless/dataless machine. It is initialized below just
137 * to ensure that it is allocated to initialized data (.data not .bss).
138 */
139struct nfs_diskless newnfs_diskless = { { { 0 } } };
140struct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
141int newnfs_diskless_valid = 0;
142
143SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
144    &newnfs_diskless_valid, 0,
145    "Has the diskless struct been filled correctly");
146
147SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
148    newnfsv3_diskless.root_hostnam, 0, "Path to nfs root");
149
150SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
151    &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
152    "%Ssockaddr_in", "Diskless root nfs address");
153
154
155void		newnfsargs_ntoh(struct nfs_args *);
156static int	nfs_mountdiskless(char *,
157		    struct sockaddr_in *, struct nfs_args *,
158		    struct thread *, struct vnode **, struct mount *);
159static void	nfs_convert_diskless(void);
160static void	nfs_convert_oargs(struct nfs_args *args,
161		    struct onfs_args *oargs);
162
163int
164newnfs_iosize(struct nfsmount *nmp)
165{
166	int iosize, maxio;
167
168	/* First, set the upper limit for iosize */
169	if (nmp->nm_flag & NFSMNT_NFSV4) {
170		maxio = NFS_MAXBSIZE;
171	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
172		if (nmp->nm_sotype == SOCK_DGRAM)
173			maxio = NFS_MAXDGRAMDATA;
174		else
175			maxio = NFS_MAXBSIZE;
176	} else {
177		maxio = NFS_V2MAXDATA;
178	}
179	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
180		nmp->nm_rsize = maxio;
181	if (nmp->nm_rsize > MAXBSIZE)
182		nmp->nm_rsize = MAXBSIZE;
183	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
184		nmp->nm_readdirsize = maxio;
185	if (nmp->nm_readdirsize > nmp->nm_rsize)
186		nmp->nm_readdirsize = nmp->nm_rsize;
187	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
188		nmp->nm_wsize = maxio;
189	if (nmp->nm_wsize > MAXBSIZE)
190		nmp->nm_wsize = MAXBSIZE;
191
192	/*
193	 * Calculate the size used for io buffers.  Use the larger
194	 * of the two sizes to minimise nfs requests but make sure
195	 * that it is at least one VM page to avoid wasting buffer
196	 * space.
197	 */
198	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
199	iosize = imax(iosize, PAGE_SIZE);
200	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
201	return (iosize);
202}
203
204static void
205nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
206{
207
208	args->version = NFS_ARGSVERSION;
209	args->addr = oargs->addr;
210	args->addrlen = oargs->addrlen;
211	args->sotype = oargs->sotype;
212	args->proto = oargs->proto;
213	args->fh = oargs->fh;
214	args->fhsize = oargs->fhsize;
215	args->flags = oargs->flags;
216	args->wsize = oargs->wsize;
217	args->rsize = oargs->rsize;
218	args->readdirsize = oargs->readdirsize;
219	args->timeo = oargs->timeo;
220	args->retrans = oargs->retrans;
221	args->readahead = oargs->readahead;
222	args->hostname = oargs->hostname;
223}
224
225static void
226nfs_convert_diskless(void)
227{
228
229	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
230	    sizeof (struct ifaliasreq));
231	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
232	    sizeof (struct sockaddr_in));
233	nfs_convert_oargs(&newnfsv3_diskless.root_args,
234	    &newnfs_diskless.root_args);
235	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
236		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
237		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
238		    NFSX_MYFH);
239	} else {
240		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
241		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
242		    NFSX_V2FH);
243	}
244	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
245	    sizeof(struct sockaddr_in));
246	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam,
247	    MNAMELEN);
248	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
249	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
250	    MAXHOSTNAMELEN);
251	newnfs_diskless_valid = 3;
252}
253
254/*
255 * nfs statfs call
256 */
257static int
258nfs_statfs(struct mount *mp, struct statfs *sbp)
259{
260	struct vnode *vp;
261	struct thread *td;
262	struct nfsmount *nmp = VFSTONFS(mp);
263	struct nfsvattr nfsva;
264	struct nfsfsinfo fs;
265	struct nfsstatfs sb;
266	int error = 0, attrflag, gotfsinfo = 0, ret;
267	struct nfsnode *np;
268
269	td = curthread;
270
271	error = vfs_busy(mp, MBF_NOWAIT);
272	if (error)
273		return (error);
274	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
275	if (error) {
276		vfs_unbusy(mp);
277		return (error);
278	}
279	vp = NFSTOV(np);
280	mtx_lock(&nmp->nm_mtx);
281	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
282		mtx_unlock(&nmp->nm_mtx);
283		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
284		    &attrflag, NULL);
285		if (!error)
286			gotfsinfo = 1;
287	} else
288		mtx_unlock(&nmp->nm_mtx);
289	if (!error)
290		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
291		    &attrflag, NULL);
292	if (attrflag == 0) {
293		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
294		    td->td_ucred, td, &nfsva, NULL);
295		if (ret) {
296			/*
297			 * Just set default values to get things going.
298			 */
299			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
300			nfsva.na_vattr.va_type = VDIR;
301			nfsva.na_vattr.va_mode = 0777;
302			nfsva.na_vattr.va_nlink = 100;
303			nfsva.na_vattr.va_uid = (uid_t)0;
304			nfsva.na_vattr.va_gid = (gid_t)0;
305			nfsva.na_vattr.va_fileid = 2;
306			nfsva.na_vattr.va_gen = 1;
307			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
308			nfsva.na_vattr.va_size = 512 * 1024;
309		}
310	}
311	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
312	if (!error) {
313	    mtx_lock(&nmp->nm_mtx);
314	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
315		nfscl_loadfsinfo(nmp, &fs);
316	    nfscl_loadsbinfo(nmp, &sb, sbp);
317	    sbp->f_flags = nmp->nm_flag;
318	    sbp->f_iosize = newnfs_iosize(nmp);
319	    mtx_unlock(&nmp->nm_mtx);
320	    if (sbp != &mp->mnt_stat) {
321		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
322		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
323	    }
324	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
325	} else if (NFS_ISV4(vp)) {
326		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
327	}
328	vput(vp);
329	vfs_unbusy(mp);
330	return (error);
331}
332
333/*
334 * nfs version 3 fsinfo rpc call
335 */
336int
337ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
338    struct thread *td)
339{
340	struct nfsfsinfo fs;
341	struct nfsvattr nfsva;
342	int error, attrflag;
343
344	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
345	if (!error) {
346		if (attrflag)
347			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
348			    1);
349		mtx_lock(&nmp->nm_mtx);
350		nfscl_loadfsinfo(nmp, &fs);
351		mtx_unlock(&nmp->nm_mtx);
352	}
353	return (error);
354}
355
356/*
357 * Mount a remote root fs via. nfs. This depends on the info in the
358 * newnfs_diskless structure that has been filled in properly by some primary
359 * bootstrap.
360 * It goes something like this:
361 * - do enough of "ifconfig" by calling ifioctl() so that the system
362 *   can talk to the server
363 * - If newnfs_diskless.mygateway is filled in, use that address as
364 *   a default gateway.
365 * - build the rootfs mount point and call mountnfs() to do the rest.
366 *
367 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
368 * structure, as well as other global NFS client variables here, as
369 * nfs_mountroot() will be called once in the boot before any other NFS
370 * client activity occurs.
371 */
372int
373ncl_mountroot(struct mount *mp)
374{
375	struct thread *td = curthread;
376	struct nfsv3_diskless *nd = &newnfsv3_diskless;
377	struct socket *so;
378	struct vnode *vp;
379	struct ifreq ir;
380	int error;
381	u_long l;
382	char buf[128];
383	char *cp;
384
385#if defined(BOOTP_NFSROOT) && defined(BOOTP)
386	bootpc_init();		/* use bootp to get nfs_diskless filled in */
387#elif defined(NFS_ROOT)
388	nfs_setup_diskless();
389#endif
390
391	if (newnfs_diskless_valid == 0)
392		return (-1);
393	if (newnfs_diskless_valid == 1)
394		nfs_convert_diskless();
395
396	/*
397	 * XXX splnet, so networks will receive...
398	 */
399	splnet();
400
401	/*
402	 * Do enough of ifconfig(8) so that the critical net interface can
403	 * talk to the server.
404	 */
405	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
406	    td->td_ucred, td);
407	if (error)
408		panic("nfs_mountroot: socreate(%04x): %d",
409			nd->myif.ifra_addr.sa_family, error);
410
411#if 0 /* XXX Bad idea */
412	/*
413	 * We might not have been told the right interface, so we pass
414	 * over the first ten interfaces of the same kind, until we get
415	 * one of them configured.
416	 */
417
418	for (i = strlen(nd->myif.ifra_name) - 1;
419		nd->myif.ifra_name[i] >= '0' &&
420		nd->myif.ifra_name[i] <= '9';
421		nd->myif.ifra_name[i] ++) {
422		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
423		if(!error)
424			break;
425	}
426#endif
427	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
428	if (error)
429		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
430	if ((cp = getenv("boot.netif.mtu")) != NULL) {
431		ir.ifr_mtu = strtol(cp, NULL, 10);
432		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
433		freeenv(cp);
434		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
435		if (error)
436			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
437	}
438	soclose(so);
439
440	/*
441	 * If the gateway field is filled in, set it as the default route.
442	 * Note that pxeboot will set a default route of 0 if the route
443	 * is not set by the DHCP server.  Check also for a value of 0
444	 * to avoid panicking inappropriately in that situation.
445	 */
446	if (nd->mygateway.sin_len != 0 &&
447	    nd->mygateway.sin_addr.s_addr != 0) {
448		struct sockaddr_in mask, sin;
449
450		bzero((caddr_t)&mask, sizeof(mask));
451		sin = mask;
452		sin.sin_family = AF_INET;
453		sin.sin_len = sizeof(sin);
454                /* XXX MRT use table 0 for this sort of thing */
455		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
456		    (struct sockaddr *)&nd->mygateway,
457		    (struct sockaddr *)&mask,
458		    RTF_UP | RTF_GATEWAY, NULL);
459		if (error)
460			panic("nfs_mountroot: RTM_ADD: %d", error);
461	}
462
463	/*
464	 * Create the rootfs mount point.
465	 */
466	nd->root_args.fh = nd->root_fh;
467	nd->root_args.fhsize = nd->root_fhsize;
468	l = ntohl(nd->root_saddr.sin_addr.s_addr);
469	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
470		(l >> 24) & 0xff, (l >> 16) & 0xff,
471		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
472	printf("NFS ROOT: %s\n", buf);
473	nd->root_args.hostname = buf;
474	if ((error = nfs_mountdiskless(buf,
475	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
476		return (error);
477	}
478
479	/*
480	 * This is not really an nfs issue, but it is much easier to
481	 * set hostname here and then let the "/etc/rc.xxx" files
482	 * mount the right /var based upon its preset value.
483	 */
484	mtx_lock(&prison0.pr_mtx);
485	strlcpy(prison0.pr_hostname, nd->my_hostnam,
486	    sizeof(prison0.pr_hostname));
487	mtx_unlock(&prison0.pr_mtx);
488	inittodr(ntohl(nd->root_time));
489	return (0);
490}
491
492/*
493 * Internal version of mount system call for diskless setup.
494 */
495static int
496nfs_mountdiskless(char *path,
497    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
498    struct vnode **vpp, struct mount *mp)
499{
500	struct sockaddr *nam;
501	int error;
502
503	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
504	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
505	    td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
506		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
507		return (error);
508	}
509	return (0);
510}
511
512static void
513nfs_sec_name(char *sec, int *flagsp)
514{
515	if (!strcmp(sec, "krb5"))
516		*flagsp |= NFSMNT_KERB;
517	else if (!strcmp(sec, "krb5i"))
518		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
519	else if (!strcmp(sec, "krb5p"))
520		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
521}
522
523static void
524nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
525    const char *hostname, struct ucred *cred, struct thread *td)
526{
527	int s;
528	int adjsock;
529	char *p;
530
531	s = splnet();
532
533	/*
534	 * Set read-only flag if requested; otherwise, clear it if this is
535	 * an update.  If this is not an update, then either the read-only
536	 * flag is already clear, or this is a root mount and it was set
537	 * intentionally at some previous point.
538	 */
539	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
540		MNT_ILOCK(mp);
541		mp->mnt_flag |= MNT_RDONLY;
542		MNT_IUNLOCK(mp);
543	} else if (mp->mnt_flag & MNT_UPDATE) {
544		MNT_ILOCK(mp);
545		mp->mnt_flag &= ~MNT_RDONLY;
546		MNT_IUNLOCK(mp);
547	}
548
549	/*
550	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
551	 * no sense in that context.  Also, set up appropriate retransmit
552	 * and soft timeout behavior.
553	 */
554	if (argp->sotype == SOCK_STREAM) {
555		nmp->nm_flag &= ~NFSMNT_NOCONN;
556		nmp->nm_timeo = NFS_MAXTIMEO;
557	}
558
559	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
560	if ((argp->flags & NFSMNT_NFSV3) == 0)
561		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
562
563	/* Also re-bind if we're switching to/from a connected UDP socket */
564	adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) !=
565		    (argp->flags & NFSMNT_NOCONN));
566
567	/* Update flags atomically.  Don't change the lock bits. */
568	nmp->nm_flag = argp->flags | nmp->nm_flag;
569	splx(s);
570
571	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
572		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
573		if (nmp->nm_timeo < NFS_MINTIMEO)
574			nmp->nm_timeo = NFS_MINTIMEO;
575		else if (nmp->nm_timeo > NFS_MAXTIMEO)
576			nmp->nm_timeo = NFS_MAXTIMEO;
577	}
578
579	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
580		nmp->nm_retry = argp->retrans;
581		if (nmp->nm_retry > NFS_MAXREXMIT)
582			nmp->nm_retry = NFS_MAXREXMIT;
583	}
584
585	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
586		nmp->nm_wsize = argp->wsize;
587		/* Round down to multiple of blocksize */
588		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
589		if (nmp->nm_wsize <= 0)
590			nmp->nm_wsize = NFS_FABLKSIZE;
591	}
592
593	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
594		nmp->nm_rsize = argp->rsize;
595		/* Round down to multiple of blocksize */
596		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
597		if (nmp->nm_rsize <= 0)
598			nmp->nm_rsize = NFS_FABLKSIZE;
599	}
600
601	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
602		nmp->nm_readdirsize = argp->readdirsize;
603	}
604
605	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
606		nmp->nm_acregmin = argp->acregmin;
607	else
608		nmp->nm_acregmin = NFS_MINATTRTIMO;
609	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
610		nmp->nm_acregmax = argp->acregmax;
611	else
612		nmp->nm_acregmax = NFS_MAXATTRTIMO;
613	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
614		nmp->nm_acdirmin = argp->acdirmin;
615	else
616		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
617	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
618		nmp->nm_acdirmax = argp->acdirmax;
619	else
620		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
621	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
622		nmp->nm_acdirmin = nmp->nm_acdirmax;
623	if (nmp->nm_acregmin > nmp->nm_acregmax)
624		nmp->nm_acregmin = nmp->nm_acregmax;
625
626	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
627		if (argp->readahead <= NFS_MAXRAHEAD)
628			nmp->nm_readahead = argp->readahead;
629		else
630			nmp->nm_readahead = NFS_MAXRAHEAD;
631	}
632	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
633		if (argp->wcommitsize < nmp->nm_wsize)
634			nmp->nm_wcommitsize = nmp->nm_wsize;
635		else
636			nmp->nm_wcommitsize = argp->wcommitsize;
637	}
638
639	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
640		    (nmp->nm_soproto != argp->proto));
641
642	if (nmp->nm_client != NULL && adjsock) {
643		int haslock = 0, error = 0;
644
645		if (nmp->nm_sotype == SOCK_STREAM) {
646			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
647			if (!error)
648				haslock = 1;
649		}
650		if (!error) {
651		    newnfs_disconnect(&nmp->nm_sockreq);
652		    if (haslock)
653			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
654		    nmp->nm_sotype = argp->sotype;
655		    nmp->nm_soproto = argp->proto;
656		    if (nmp->nm_sotype == SOCK_DGRAM)
657			while (newnfs_connect(nmp, &nmp->nm_sockreq,
658			    cred, td, 0)) {
659				printf("newnfs_args: retrying connect\n");
660				(void) nfs_catnap(PSOCK, 0, "newnfscon");
661			}
662		}
663	} else {
664		nmp->nm_sotype = argp->sotype;
665		nmp->nm_soproto = argp->proto;
666	}
667
668	if (hostname != NULL) {
669		strlcpy(nmp->nm_hostname, hostname,
670		    sizeof(nmp->nm_hostname));
671		p = strchr(nmp->nm_hostname, ':');
672		if (p != NULL)
673			*p = '\0';
674	}
675}
676
677static const char *nfs_opts[] = { "from",
678    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
679    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
680    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
681    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
682    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
683    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
684    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
685    "negnametimeo",
686    NULL };
687
688/*
689 * VFS Operations.
690 *
691 * mount system call
692 * It seems a bit dumb to copyinstr() the host and path here and then
693 * bcopy() them in mountnfs(), but I wanted to detect errors before
694 * doing the sockargs() call because sockargs() allocates an mbuf and
695 * an error after that means that I have to release the mbuf.
696 */
697/* ARGSUSED */
698static int
699nfs_mount(struct mount *mp)
700{
701	struct nfs_args args = {
702	    .version = NFS_ARGSVERSION,
703	    .addr = NULL,
704	    .addrlen = sizeof (struct sockaddr_in),
705	    .sotype = SOCK_STREAM,
706	    .proto = 0,
707	    .fh = NULL,
708	    .fhsize = 0,
709	    .flags = 0,
710	    .wsize = NFS_WSIZE,
711	    .rsize = NFS_RSIZE,
712	    .readdirsize = NFS_READDIRSIZE,
713	    .timeo = 10,
714	    .retrans = NFS_RETRANS,
715	    .readahead = NFS_DEFRAHEAD,
716	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
717	    .hostname = NULL,
718	    /* args version 4 */
719	    .acregmin = NFS_MINATTRTIMO,
720	    .acregmax = NFS_MAXATTRTIMO,
721	    .acdirmin = NFS_MINDIRATTRTIMO,
722	    .acdirmax = NFS_MAXDIRATTRTIMO,
723	    .dirlen = 0,
724	    .krbnamelen = 0,
725	    .srvkrbnamelen = 0,
726	};
727	int error = 0, ret, len;
728	struct sockaddr *nam = NULL;
729	struct vnode *vp;
730	struct thread *td;
731	char hst[MNAMELEN];
732	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
733	char *opt, *name, *secname;
734	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
735
736	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
737		error = EINVAL;
738		goto out;
739	}
740
741	td = curthread;
742	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
743		error = ncl_mountroot(mp);
744		goto out;
745	}
746
747	nfscl_init();
748
749	/* Handle the new style options. */
750	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
751		args.flags |= NFSMNT_NOCONN;
752	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
753		args.flags |= NFSMNT_NOCONN;
754	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
755		args.flags |= NFSMNT_NOLOCKD;
756	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
757		args.flags &= ~NFSMNT_NOLOCKD;
758	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
759		args.flags |= NFSMNT_INT;
760	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
761		args.flags |= NFSMNT_RDIRPLUS;
762	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
763		args.flags |= NFSMNT_RESVPORT;
764	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
765		args.flags &= ~NFSMNT_RESVPORT;
766	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
767		args.flags |= NFSMNT_SOFT;
768	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
769		args.flags &= ~NFSMNT_SOFT;
770	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
771		args.sotype = SOCK_DGRAM;
772	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
773		args.sotype = SOCK_DGRAM;
774	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
775		args.sotype = SOCK_STREAM;
776	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
777		args.flags |= NFSMNT_NFSV3;
778	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
779		args.flags |= NFSMNT_NFSV4;
780		args.sotype = SOCK_STREAM;
781	}
782	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
783		args.flags |= NFSMNT_ALLGSSNAME;
784	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
785		if (opt == NULL) {
786			vfs_mount_error(mp, "illegal readdirsize");
787			error = EINVAL;
788			goto out;
789		}
790		ret = sscanf(opt, "%d", &args.readdirsize);
791		if (ret != 1 || args.readdirsize <= 0) {
792			vfs_mount_error(mp, "illegal readdirsize: %s",
793			    opt);
794			error = EINVAL;
795			goto out;
796		}
797		args.flags |= NFSMNT_READDIRSIZE;
798	}
799	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
800		if (opt == NULL) {
801			vfs_mount_error(mp, "illegal readahead");
802			error = EINVAL;
803			goto out;
804		}
805		ret = sscanf(opt, "%d", &args.readahead);
806		if (ret != 1 || args.readahead <= 0) {
807			vfs_mount_error(mp, "illegal readahead: %s",
808			    opt);
809			error = EINVAL;
810			goto out;
811		}
812		args.flags |= NFSMNT_READAHEAD;
813	}
814	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
815		if (opt == NULL) {
816			vfs_mount_error(mp, "illegal wsize");
817			error = EINVAL;
818			goto out;
819		}
820		ret = sscanf(opt, "%d", &args.wsize);
821		if (ret != 1 || args.wsize <= 0) {
822			vfs_mount_error(mp, "illegal wsize: %s",
823			    opt);
824			error = EINVAL;
825			goto out;
826		}
827		args.flags |= NFSMNT_WSIZE;
828	}
829	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
830		if (opt == NULL) {
831			vfs_mount_error(mp, "illegal rsize");
832			error = EINVAL;
833			goto out;
834		}
835		ret = sscanf(opt, "%d", &args.rsize);
836		if (ret != 1 || args.rsize <= 0) {
837			vfs_mount_error(mp, "illegal wsize: %s",
838			    opt);
839			error = EINVAL;
840			goto out;
841		}
842		args.flags |= NFSMNT_RSIZE;
843	}
844	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
845		if (opt == NULL) {
846			vfs_mount_error(mp, "illegal retrans");
847			error = EINVAL;
848			goto out;
849		}
850		ret = sscanf(opt, "%d", &args.retrans);
851		if (ret != 1 || args.retrans <= 0) {
852			vfs_mount_error(mp, "illegal retrans: %s",
853			    opt);
854			error = EINVAL;
855			goto out;
856		}
857		args.flags |= NFSMNT_RETRANS;
858	}
859	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
860		ret = sscanf(opt, "%d", &args.acregmin);
861		if (ret != 1 || args.acregmin < 0) {
862			vfs_mount_error(mp, "illegal acregmin: %s",
863			    opt);
864			error = EINVAL;
865			goto out;
866		}
867		args.flags |= NFSMNT_ACREGMIN;
868	}
869	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
870		ret = sscanf(opt, "%d", &args.acregmax);
871		if (ret != 1 || args.acregmax < 0) {
872			vfs_mount_error(mp, "illegal acregmax: %s",
873			    opt);
874			error = EINVAL;
875			goto out;
876		}
877		args.flags |= NFSMNT_ACREGMAX;
878	}
879	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
880		ret = sscanf(opt, "%d", &args.acdirmin);
881		if (ret != 1 || args.acdirmin < 0) {
882			vfs_mount_error(mp, "illegal acdirmin: %s",
883			    opt);
884			error = EINVAL;
885			goto out;
886		}
887		args.flags |= NFSMNT_ACDIRMIN;
888	}
889	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
890		ret = sscanf(opt, "%d", &args.acdirmax);
891		if (ret != 1 || args.acdirmax < 0) {
892			vfs_mount_error(mp, "illegal acdirmax: %s",
893			    opt);
894			error = EINVAL;
895			goto out;
896		}
897		args.flags |= NFSMNT_ACDIRMAX;
898	}
899	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
900		ret = sscanf(opt, "%d", &args.timeo);
901		if (ret != 1 || args.timeo <= 0) {
902			vfs_mount_error(mp, "illegal timeout: %s",
903			    opt);
904			error = EINVAL;
905			goto out;
906		}
907		args.flags |= NFSMNT_TIMEO;
908	}
909	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
910	    == 0) {
911		ret = sscanf(opt, "%d", &negnametimeo);
912		if (ret != 1 || negnametimeo < 0) {
913			vfs_mount_error(mp, "illegal negnametimeo: %s",
914			    opt);
915			error = EINVAL;
916			goto out;
917		}
918	}
919	if (vfs_getopt(mp->mnt_optnew, "sec",
920		(void **) &secname, NULL) == 0)
921		nfs_sec_name(secname, &args.flags);
922
923	if (mp->mnt_flag & MNT_UPDATE) {
924		struct nfsmount *nmp = VFSTONFS(mp);
925
926		if (nmp == NULL) {
927			error = EIO;
928			goto out;
929		}
930		/*
931		 * When doing an update, we can't change version,
932		 * security, switch lockd strategies or change cookie
933		 * translation
934		 */
935		args.flags = (args.flags &
936		    ~(NFSMNT_NFSV3 |
937		      NFSMNT_NFSV4 |
938		      NFSMNT_KERB |
939		      NFSMNT_INTEGRITY |
940		      NFSMNT_PRIVACY |
941		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
942		    (nmp->nm_flag &
943			(NFSMNT_NFSV3 |
944			 NFSMNT_NFSV4 |
945			 NFSMNT_KERB |
946			 NFSMNT_INTEGRITY |
947			 NFSMNT_PRIVACY |
948			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
949		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
950		goto out;
951	}
952
953	/*
954	 * Make the nfs_ip_paranoia sysctl serve as the default connection
955	 * or no-connection mode for those protocols that support
956	 * no-connection mode (the flag will be cleared later for protocols
957	 * that do not support no-connection mode).  This will allow a client
958	 * to receive replies from a different IP then the request was
959	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
960	 * not 0.
961	 */
962	if (nfs_ip_paranoia == 0)
963		args.flags |= NFSMNT_NOCONN;
964
965	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
966	    &args.fhsize) == 0) {
967		if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
968			vfs_mount_error(mp, "Bad file handle");
969			error = EINVAL;
970			goto out;
971		}
972		bcopy(args.fh, nfh, args.fhsize);
973	} else {
974		args.fhsize = 0;
975	}
976
977	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
978	    &len);
979	if (args.hostname == NULL) {
980		vfs_mount_error(mp, "Invalid hostname");
981		error = EINVAL;
982		goto out;
983	}
984	bcopy(args.hostname, hst, MNAMELEN);
985	hst[MNAMELEN - 1] = '\0';
986
987	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
988		strlcpy(srvkrbname, name, sizeof (srvkrbname));
989	else
990		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
991	args.srvkrbnamelen = strlen(srvkrbname);
992
993	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
994		strlcpy(krbname, name, sizeof (krbname));
995	else
996		krbname[0] = '\0';
997	args.krbnamelen = strlen(krbname);
998
999	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1000		strlcpy(dirpath, name, sizeof (dirpath));
1001	else
1002		dirpath[0] = '\0';
1003	args.dirlen = strlen(dirpath);
1004
1005	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1006	    &args.addrlen) == 0) {
1007		if (args.addrlen > SOCK_MAXADDRLEN) {
1008			error = ENAMETOOLONG;
1009			goto out;
1010		}
1011		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1012		bcopy(args.addr, nam, args.addrlen);
1013		nam->sa_len = args.addrlen;
1014	}
1015
1016	args.fh = nfh;
1017	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
1018	    &vp, td->td_ucred, td, negnametimeo);
1019out:
1020	if (!error) {
1021		MNT_ILOCK(mp);
1022		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1023		MNT_IUNLOCK(mp);
1024	}
1025	return (error);
1026}
1027
1028
1029/*
1030 * VFS Operations.
1031 *
1032 * mount system call
1033 * It seems a bit dumb to copyinstr() the host and path here and then
1034 * bcopy() them in mountnfs(), but I wanted to detect errors before
1035 * doing the sockargs() call because sockargs() allocates an mbuf and
1036 * an error after that means that I have to release the mbuf.
1037 */
1038/* ARGSUSED */
1039static int
1040nfs_cmount(struct mntarg *ma, void *data, int flags)
1041{
1042	int error;
1043	struct nfs_args args;
1044
1045	error = copyin(data, &args, sizeof (struct nfs_args));
1046	if (error)
1047		return error;
1048
1049	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1050
1051	error = kernel_mount(ma, flags);
1052	return (error);
1053}
1054
1055/*
1056 * Common code for mount and mountroot
1057 */
1058static int
1059mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1060    char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
1061    struct vnode **vpp, struct ucred *cred, struct thread *td,
1062    int negnametimeo)
1063{
1064	struct nfsmount *nmp;
1065	struct nfsnode *np;
1066	int error, trycnt, ret;
1067	struct nfsvattr nfsva;
1068	static u_int64_t clval = 0;
1069
1070	if (mp->mnt_flag & MNT_UPDATE) {
1071		nmp = VFSTONFS(mp);
1072		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1073		FREE(nam, M_SONAME);
1074		return (0);
1075	} else {
1076		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1077		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
1078		    M_NEWNFSMNT, M_WAITOK);
1079		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
1080		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
1081		TAILQ_INIT(&nmp->nm_bufq);
1082		if (clval == 0)
1083			clval = (u_int64_t)nfsboottime.tv_sec;
1084		nmp->nm_clval = clval++;
1085		nmp->nm_krbnamelen = argp->krbnamelen;
1086		nmp->nm_dirpathlen = argp->dirlen;
1087		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
1088		if (td->td_ucred->cr_uid != (uid_t)0) {
1089			/*
1090			 * nm_uid is used to get KerberosV credentials for
1091			 * the nfsv4 state handling operations if there is
1092			 * no host based principal set. Use the uid of
1093			 * this user if not root, since they are doing the
1094			 * mount. I don't think setting this for root will
1095			 * work, since root normally does not have user
1096			 * credentials in a credentials cache.
1097			 */
1098			nmp->nm_uid = td->td_ucred->cr_uid;
1099		} else {
1100			/*
1101			 * Just set to -1, so it won't be used.
1102			 */
1103			nmp->nm_uid = (uid_t)-1;
1104		}
1105
1106		/* Copy and null terminate all the names */
1107		if (nmp->nm_krbnamelen > 0) {
1108			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1109			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1110		}
1111		if (nmp->nm_dirpathlen > 0) {
1112			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1113			    nmp->nm_dirpathlen);
1114			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1115			    + 1] = '\0';
1116		}
1117		if (nmp->nm_srvkrbnamelen > 0) {
1118			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1119			    nmp->nm_srvkrbnamelen);
1120			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1121			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1122		}
1123		nmp->nm_sockreq.nr_cred = crhold(cred);
1124		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1125		mp->mnt_data = nmp;
1126		nmp->nm_getinfo = nfs_getnlminfo;
1127		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1128	}
1129	vfs_getnewfsid(mp);
1130	nmp->nm_mountp = mp;
1131	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1132	nmp->nm_negnametimeo = negnametimeo;
1133
1134	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1135
1136	/*
1137	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1138	 * high, depending on whether we end up with negative offsets in
1139	 * the client or server somewhere.  2GB-1 may be safer.
1140	 *
1141	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1142	 * that we can handle until we find out otherwise.
1143	 * XXX Our "safe" limit on the client is what we can store in our
1144	 * buffer cache using signed(!) block numbers.
1145	 */
1146	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1147		nmp->nm_maxfilesize = 0xffffffffLL;
1148	else
1149		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1150
1151	nmp->nm_timeo = NFS_TIMEO;
1152	nmp->nm_retry = NFS_RETRANS;
1153	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1154		nmp->nm_wsize = NFS_WSIZE;
1155		nmp->nm_rsize = NFS_RSIZE;
1156		nmp->nm_readdirsize = NFS_READDIRSIZE;
1157	}
1158	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1159	nmp->nm_numgrps = NFS_MAXGRPS;
1160	nmp->nm_readahead = NFS_DEFRAHEAD;
1161	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1162	if (nmp->nm_tprintf_delay < 0)
1163		nmp->nm_tprintf_delay = 0;
1164	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1165	if (nmp->nm_tprintf_initial_delay < 0)
1166		nmp->nm_tprintf_initial_delay = 0;
1167	nmp->nm_fhsize = argp->fhsize;
1168	if (nmp->nm_fhsize > 0)
1169		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1170	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1171	nmp->nm_nam = nam;
1172	/* Set up the sockets and per-host congestion */
1173	nmp->nm_sotype = argp->sotype;
1174	nmp->nm_soproto = argp->proto;
1175	nmp->nm_sockreq.nr_prog = NFS_PROG;
1176	if ((argp->flags & NFSMNT_NFSV4))
1177		nmp->nm_sockreq.nr_vers = NFS_VER4;
1178	else if ((argp->flags & NFSMNT_NFSV3))
1179		nmp->nm_sockreq.nr_vers = NFS_VER3;
1180	else
1181		nmp->nm_sockreq.nr_vers = NFS_VER2;
1182
1183
1184	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1185		goto bad;
1186
1187	/*
1188	 * A reference count is needed on the nfsnode representing the
1189	 * remote root.  If this object is not persistent, then backward
1190	 * traversals of the mount point (i.e. "..") will not work if
1191	 * the nfsnode gets flushed out of the cache. Ufs does not have
1192	 * this problem, because one can identify root inodes by their
1193	 * number == ROOTINO (2).
1194	 */
1195	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1196	    nmp->nm_dirpathlen > 0) {
1197		/*
1198		 * If the fhsize on the mount point == 0 for V4, the mount
1199		 * path needs to be looked up.
1200		 */
1201		trycnt = 3;
1202		do {
1203			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1204			    cred, td);
1205			if (error)
1206				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1207		} while (error && --trycnt > 0);
1208		if (error) {
1209			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1210			goto bad;
1211		}
1212	}
1213	if (nmp->nm_fhsize > 0) {
1214		/*
1215		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1216		 * non-zero for the root vnode. f_iosize will be set correctly
1217		 * by nfs_statfs() before any I/O occurs.
1218		 */
1219		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1220		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1221		if (error)
1222			goto bad;
1223		*vpp = NFSTOV(np);
1224
1225		/*
1226		 * Get file attributes and transfer parameters for the
1227		 * mountpoint.  This has the side effect of filling in
1228		 * (*vpp)->v_type with the correct value.
1229		 */
1230		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1231		    cred, td, &nfsva, NULL);
1232		if (ret) {
1233			/*
1234			 * Just set default values to get things going.
1235			 */
1236			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1237			nfsva.na_vattr.va_type = VDIR;
1238			nfsva.na_vattr.va_mode = 0777;
1239			nfsva.na_vattr.va_nlink = 100;
1240			nfsva.na_vattr.va_uid = (uid_t)0;
1241			nfsva.na_vattr.va_gid = (gid_t)0;
1242			nfsva.na_vattr.va_fileid = 2;
1243			nfsva.na_vattr.va_gen = 1;
1244			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1245			nfsva.na_vattr.va_size = 512 * 1024;
1246		}
1247		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1248		if (argp->flags & NFSMNT_NFSV3)
1249			ncl_fsinfo(nmp, *vpp, cred, td);
1250
1251		/*
1252		 * Lose the lock but keep the ref.
1253		 */
1254		VOP_UNLOCK(*vpp, 0);
1255		return (0);
1256	}
1257	error = EIO;
1258
1259bad:
1260	newnfs_disconnect(&nmp->nm_sockreq);
1261	crfree(nmp->nm_sockreq.nr_cred);
1262	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1263	mtx_destroy(&nmp->nm_mtx);
1264	FREE(nmp, M_NEWNFSMNT);
1265	FREE(nam, M_SONAME);
1266	return (error);
1267}
1268
1269/*
1270 * unmount system call
1271 */
1272static int
1273nfs_unmount(struct mount *mp, int mntflags)
1274{
1275	struct thread *td;
1276	struct nfsmount *nmp;
1277	int error, flags = 0, trycnt = 0;
1278
1279	td = curthread;
1280
1281	if (mntflags & MNT_FORCE)
1282		flags |= FORCECLOSE;
1283	nmp = VFSTONFS(mp);
1284	/*
1285	 * Goes something like this..
1286	 * - Call vflush() to clear out vnodes for this filesystem
1287	 * - Close the socket
1288	 * - Free up the data structures
1289	 */
1290	/* In the forced case, cancel any outstanding requests. */
1291	if (mntflags & MNT_FORCE) {
1292		error = newnfs_nmcancelreqs(nmp);
1293		if (error)
1294			goto out;
1295		/* For a forced close, get rid of the renew thread now */
1296		nfscl_umount(nmp, td);
1297	}
1298	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1299	do {
1300		error = vflush(mp, 1, flags, td);
1301		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1302			(void) nfs_catnap(PSOCK, error, "newndm");
1303	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1304	if (error)
1305		goto out;
1306
1307	/*
1308	 * We are now committed to the unmount.
1309	 */
1310	if ((mntflags & MNT_FORCE) == 0)
1311		nfscl_umount(nmp, td);
1312	newnfs_disconnect(&nmp->nm_sockreq);
1313	crfree(nmp->nm_sockreq.nr_cred);
1314	FREE(nmp->nm_nam, M_SONAME);
1315
1316	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1317	mtx_destroy(&nmp->nm_mtx);
1318	FREE(nmp, M_NEWNFSMNT);
1319out:
1320	return (error);
1321}
1322
1323/*
1324 * Return root of a filesystem
1325 */
1326static int
1327nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1328{
1329	struct vnode *vp;
1330	struct nfsmount *nmp;
1331	struct nfsnode *np;
1332	int error;
1333
1334	nmp = VFSTONFS(mp);
1335	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1336	if (error)
1337		return error;
1338	vp = NFSTOV(np);
1339	/*
1340	 * Get transfer parameters and attributes for root vnode once.
1341	 */
1342	mtx_lock(&nmp->nm_mtx);
1343	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1344		mtx_unlock(&nmp->nm_mtx);
1345		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1346	} else
1347		mtx_unlock(&nmp->nm_mtx);
1348	if (vp->v_type == VNON)
1349	    vp->v_type = VDIR;
1350	vp->v_vflag |= VV_ROOT;
1351	*vpp = vp;
1352	return (0);
1353}
1354
1355/*
1356 * Flush out the buffer cache
1357 */
1358/* ARGSUSED */
1359static int
1360nfs_sync(struct mount *mp, int waitfor)
1361{
1362	struct vnode *vp, *mvp;
1363	struct thread *td;
1364	int error, allerror = 0;
1365
1366	td = curthread;
1367
1368	/*
1369	 * Force stale buffer cache information to be flushed.
1370	 */
1371	MNT_ILOCK(mp);
1372loop:
1373	MNT_VNODE_FOREACH(vp, mp, mvp) {
1374		VI_LOCK(vp);
1375		MNT_IUNLOCK(mp);
1376		/* XXX Racy bv_cnt check. */
1377		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1378		    waitfor == MNT_LAZY) {
1379			VI_UNLOCK(vp);
1380			MNT_ILOCK(mp);
1381			continue;
1382		}
1383		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1384			MNT_ILOCK(mp);
1385			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1386			goto loop;
1387		}
1388		error = VOP_FSYNC(vp, waitfor, td);
1389		if (error)
1390			allerror = error;
1391		VOP_UNLOCK(vp, 0);
1392		vrele(vp);
1393
1394		MNT_ILOCK(mp);
1395	}
1396	MNT_IUNLOCK(mp);
1397	return (allerror);
1398}
1399
1400static int
1401nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1402{
1403	struct nfsmount *nmp = VFSTONFS(mp);
1404	struct vfsquery vq;
1405	int error;
1406
1407	bzero(&vq, sizeof(vq));
1408	switch (op) {
1409#if 0
1410	case VFS_CTL_NOLOCKS:
1411		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1412 		if (req->oldptr != NULL) {
1413 			error = SYSCTL_OUT(req, &val, sizeof(val));
1414 			if (error)
1415 				return (error);
1416 		}
1417 		if (req->newptr != NULL) {
1418 			error = SYSCTL_IN(req, &val, sizeof(val));
1419 			if (error)
1420 				return (error);
1421			if (val)
1422				nmp->nm_flag |= NFSMNT_NOLOCKS;
1423			else
1424				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1425 		}
1426		break;
1427#endif
1428	case VFS_CTL_QUERY:
1429		mtx_lock(&nmp->nm_mtx);
1430		if (nmp->nm_state & NFSSTA_TIMEO)
1431			vq.vq_flags |= VQ_NOTRESP;
1432		mtx_unlock(&nmp->nm_mtx);
1433#if 0
1434		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1435		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1436			vq.vq_flags |= VQ_NOTRESPLOCK;
1437#endif
1438		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1439		break;
1440 	case VFS_CTL_TIMEO:
1441 		if (req->oldptr != NULL) {
1442 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1443 			    sizeof(nmp->nm_tprintf_initial_delay));
1444 			if (error)
1445 				return (error);
1446 		}
1447 		if (req->newptr != NULL) {
1448			error = vfs_suser(mp, req->td);
1449			if (error)
1450				return (error);
1451 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1452 			    sizeof(nmp->nm_tprintf_initial_delay));
1453 			if (error)
1454 				return (error);
1455 			if (nmp->nm_tprintf_initial_delay < 0)
1456 				nmp->nm_tprintf_initial_delay = 0;
1457 		}
1458		break;
1459	default:
1460		return (ENOTSUP);
1461	}
1462	return (0);
1463}
1464
1465/*
1466 * Extract the information needed by the nlm from the nfs vnode.
1467 */
1468static void
1469nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1470    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1471    struct timeval *timeop)
1472{
1473	struct nfsmount *nmp;
1474	struct nfsnode *np = VTONFS(vp);
1475
1476	nmp = VFSTONFS(vp->v_mount);
1477	if (fhlenp != NULL)
1478		*fhlenp = (size_t)np->n_fhp->nfh_len;
1479	if (fhp != NULL)
1480		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1481	if (sp != NULL)
1482		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1483	if (is_v3p != NULL)
1484		*is_v3p = NFS_ISV3(vp);
1485	if (sizep != NULL)
1486		*sizep = np->n_size;
1487	if (timeop != NULL) {
1488		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1489		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1490	}
1491}
1492
1493