nfs_clvfsops.c revision 221014
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 221014 2011-04-25 13:09:32Z rmacklem $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/mount.h>
55#include <sys/proc.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <netinet/in.h>
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <fs/nfsclient/nfsdiskless.h>
76
77FEATURE(nfscl, "NFSv4 client");
78
79extern int nfscl_ticks;
80extern struct timeval nfsboottime;
81extern struct nfsstats	newnfsstats;
82
83MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
84MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
85
86SYSCTL_DECL(_vfs_newnfs);
87SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
88	&newnfsstats, nfsstats, "S,nfsstats");
89static int nfs_ip_paranoia = 1;
90SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
91    &nfs_ip_paranoia, 0, "");
92static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
93SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
94        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
95/* how long between console messages "nfs server foo not responding" */
96static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
97SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
98        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
99
100static void	nfs_sec_name(char *, int *);
101static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
102		    struct nfs_args *argp, const char *, struct ucred *,
103		    struct thread *);
104static int	mountnfs(struct nfs_args *, struct mount *,
105		    struct sockaddr *, char *, u_char *, int, u_char *, int,
106		    u_char *, int, struct vnode **, struct ucred *,
107		    struct thread *, int);
108static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
109		    struct sockaddr_storage *, int *, off_t *,
110		    struct timeval *);
111static vfs_mount_t nfs_mount;
112static vfs_cmount_t nfs_cmount;
113static vfs_unmount_t nfs_unmount;
114static vfs_root_t nfs_root;
115static vfs_statfs_t nfs_statfs;
116static vfs_sync_t nfs_sync;
117static vfs_sysctl_t nfs_sysctl;
118
119/*
120 * nfs vfs operations.
121 */
122static struct vfsops nfs_vfsops = {
123	.vfs_init =		ncl_init,
124	.vfs_mount =		nfs_mount,
125	.vfs_cmount =		nfs_cmount,
126	.vfs_root =		nfs_root,
127	.vfs_statfs =		nfs_statfs,
128	.vfs_sync =		nfs_sync,
129	.vfs_uninit =		ncl_uninit,
130	.vfs_unmount =		nfs_unmount,
131	.vfs_sysctl =		nfs_sysctl,
132};
133VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
134
135/* So that loader and kldload(2) can find us, wherever we are.. */
136MODULE_VERSION(newnfs, 1);
137
138/*
139 * This structure must be filled in by a primary bootstrap or bootstrap
140 * server for a diskless/dataless machine. It is initialized below just
141 * to ensure that it is allocated to initialized data (.data not .bss).
142 */
143struct nfs_diskless newnfs_diskless = { { { 0 } } };
144struct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
145int newnfs_diskless_valid = 0;
146
147SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
148    &newnfs_diskless_valid, 0,
149    "Has the diskless struct been filled correctly");
150
151SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
152    newnfsv3_diskless.root_hostnam, 0, "Path to nfs root");
153
154SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
155    &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
156    "%Ssockaddr_in", "Diskless root nfs address");
157
158
159void		newnfsargs_ntoh(struct nfs_args *);
160static int	nfs_mountdiskless(char *,
161		    struct sockaddr_in *, struct nfs_args *,
162		    struct thread *, struct vnode **, struct mount *);
163static void	nfs_convert_diskless(void);
164static void	nfs_convert_oargs(struct nfs_args *args,
165		    struct onfs_args *oargs);
166
167int
168newnfs_iosize(struct nfsmount *nmp)
169{
170	int iosize, maxio;
171
172	/* First, set the upper limit for iosize */
173	if (nmp->nm_flag & NFSMNT_NFSV4) {
174		maxio = NFS_MAXBSIZE;
175	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
176		if (nmp->nm_sotype == SOCK_DGRAM)
177			maxio = NFS_MAXDGRAMDATA;
178		else
179			maxio = NFS_MAXBSIZE;
180	} else {
181		maxio = NFS_V2MAXDATA;
182	}
183	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
184		nmp->nm_rsize = maxio;
185	if (nmp->nm_rsize > MAXBSIZE)
186		nmp->nm_rsize = MAXBSIZE;
187	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
188		nmp->nm_readdirsize = maxio;
189	if (nmp->nm_readdirsize > nmp->nm_rsize)
190		nmp->nm_readdirsize = nmp->nm_rsize;
191	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
192		nmp->nm_wsize = maxio;
193	if (nmp->nm_wsize > MAXBSIZE)
194		nmp->nm_wsize = MAXBSIZE;
195
196	/*
197	 * Calculate the size used for io buffers.  Use the larger
198	 * of the two sizes to minimise nfs requests but make sure
199	 * that it is at least one VM page to avoid wasting buffer
200	 * space.
201	 */
202	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
203	iosize = imax(iosize, PAGE_SIZE);
204	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
205	return (iosize);
206}
207
208static void
209nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
210{
211
212	args->version = NFS_ARGSVERSION;
213	args->addr = oargs->addr;
214	args->addrlen = oargs->addrlen;
215	args->sotype = oargs->sotype;
216	args->proto = oargs->proto;
217	args->fh = oargs->fh;
218	args->fhsize = oargs->fhsize;
219	args->flags = oargs->flags;
220	args->wsize = oargs->wsize;
221	args->rsize = oargs->rsize;
222	args->readdirsize = oargs->readdirsize;
223	args->timeo = oargs->timeo;
224	args->retrans = oargs->retrans;
225	args->readahead = oargs->readahead;
226	args->hostname = oargs->hostname;
227}
228
229static void
230nfs_convert_diskless(void)
231{
232
233	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
234	    sizeof (struct ifaliasreq));
235	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
236	    sizeof (struct sockaddr_in));
237	nfs_convert_oargs(&newnfsv3_diskless.root_args,
238	    &newnfs_diskless.root_args);
239	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
240		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
241		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
242		    NFSX_MYFH);
243	} else {
244		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
245		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
246		    NFSX_V2FH);
247	}
248	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
249	    sizeof(struct sockaddr_in));
250	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam,
251	    MNAMELEN);
252	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
253	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
254	    MAXHOSTNAMELEN);
255	newnfs_diskless_valid = 3;
256}
257
258/*
259 * nfs statfs call
260 */
261static int
262nfs_statfs(struct mount *mp, struct statfs *sbp)
263{
264	struct vnode *vp;
265	struct thread *td;
266	struct nfsmount *nmp = VFSTONFS(mp);
267	struct nfsvattr nfsva;
268	struct nfsfsinfo fs;
269	struct nfsstatfs sb;
270	int error = 0, attrflag, gotfsinfo = 0, ret;
271	struct nfsnode *np;
272
273	td = curthread;
274
275	error = vfs_busy(mp, MBF_NOWAIT);
276	if (error)
277		return (error);
278	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
279	if (error) {
280		vfs_unbusy(mp);
281		return (error);
282	}
283	vp = NFSTOV(np);
284	mtx_lock(&nmp->nm_mtx);
285	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
286		mtx_unlock(&nmp->nm_mtx);
287		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
288		    &attrflag, NULL);
289		if (!error)
290			gotfsinfo = 1;
291	} else
292		mtx_unlock(&nmp->nm_mtx);
293	if (!error)
294		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
295		    &attrflag, NULL);
296	if (attrflag == 0) {
297		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
298		    td->td_ucred, td, &nfsva, NULL);
299		if (ret) {
300			/*
301			 * Just set default values to get things going.
302			 */
303			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
304			nfsva.na_vattr.va_type = VDIR;
305			nfsva.na_vattr.va_mode = 0777;
306			nfsva.na_vattr.va_nlink = 100;
307			nfsva.na_vattr.va_uid = (uid_t)0;
308			nfsva.na_vattr.va_gid = (gid_t)0;
309			nfsva.na_vattr.va_fileid = 2;
310			nfsva.na_vattr.va_gen = 1;
311			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
312			nfsva.na_vattr.va_size = 512 * 1024;
313		}
314	}
315	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
316	if (!error) {
317	    mtx_lock(&nmp->nm_mtx);
318	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
319		nfscl_loadfsinfo(nmp, &fs);
320	    nfscl_loadsbinfo(nmp, &sb, sbp);
321	    sbp->f_flags = nmp->nm_flag;
322	    sbp->f_iosize = newnfs_iosize(nmp);
323	    mtx_unlock(&nmp->nm_mtx);
324	    if (sbp != &mp->mnt_stat) {
325		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
326		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
327	    }
328	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
329	} else if (NFS_ISV4(vp)) {
330		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
331	}
332	vput(vp);
333	vfs_unbusy(mp);
334	return (error);
335}
336
337/*
338 * nfs version 3 fsinfo rpc call
339 */
340int
341ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
342    struct thread *td)
343{
344	struct nfsfsinfo fs;
345	struct nfsvattr nfsva;
346	int error, attrflag;
347
348	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
349	if (!error) {
350		if (attrflag)
351			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
352			    1);
353		mtx_lock(&nmp->nm_mtx);
354		nfscl_loadfsinfo(nmp, &fs);
355		mtx_unlock(&nmp->nm_mtx);
356	}
357	return (error);
358}
359
360/*
361 * Mount a remote root fs via. nfs. This depends on the info in the
362 * newnfs_diskless structure that has been filled in properly by some primary
363 * bootstrap.
364 * It goes something like this:
365 * - do enough of "ifconfig" by calling ifioctl() so that the system
366 *   can talk to the server
367 * - If newnfs_diskless.mygateway is filled in, use that address as
368 *   a default gateway.
369 * - build the rootfs mount point and call mountnfs() to do the rest.
370 *
371 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
372 * structure, as well as other global NFS client variables here, as
373 * nfs_mountroot() will be called once in the boot before any other NFS
374 * client activity occurs.
375 */
376int
377ncl_mountroot(struct mount *mp)
378{
379	struct thread *td = curthread;
380	struct nfsv3_diskless *nd = &newnfsv3_diskless;
381	struct socket *so;
382	struct vnode *vp;
383	struct ifreq ir;
384	int error;
385	u_long l;
386	char buf[128];
387	char *cp;
388
389#if defined(BOOTP_NFSROOT) && defined(BOOTP)
390	bootpc_init();		/* use bootp to get nfs_diskless filled in */
391#elif defined(NFS_ROOT)
392	nfs_setup_diskless();
393#endif
394
395	if (newnfs_diskless_valid == 0)
396		return (-1);
397	if (newnfs_diskless_valid == 1)
398		nfs_convert_diskless();
399
400	/*
401	 * XXX splnet, so networks will receive...
402	 */
403	splnet();
404
405	/*
406	 * Do enough of ifconfig(8) so that the critical net interface can
407	 * talk to the server.
408	 */
409	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
410	    td->td_ucred, td);
411	if (error)
412		panic("nfs_mountroot: socreate(%04x): %d",
413			nd->myif.ifra_addr.sa_family, error);
414
415#if 0 /* XXX Bad idea */
416	/*
417	 * We might not have been told the right interface, so we pass
418	 * over the first ten interfaces of the same kind, until we get
419	 * one of them configured.
420	 */
421
422	for (i = strlen(nd->myif.ifra_name) - 1;
423		nd->myif.ifra_name[i] >= '0' &&
424		nd->myif.ifra_name[i] <= '9';
425		nd->myif.ifra_name[i] ++) {
426		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
427		if(!error)
428			break;
429	}
430#endif
431	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
432	if (error)
433		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
434	if ((cp = getenv("boot.netif.mtu")) != NULL) {
435		ir.ifr_mtu = strtol(cp, NULL, 10);
436		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
437		freeenv(cp);
438		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
439		if (error)
440			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
441	}
442	soclose(so);
443
444	/*
445	 * If the gateway field is filled in, set it as the default route.
446	 * Note that pxeboot will set a default route of 0 if the route
447	 * is not set by the DHCP server.  Check also for a value of 0
448	 * to avoid panicking inappropriately in that situation.
449	 */
450	if (nd->mygateway.sin_len != 0 &&
451	    nd->mygateway.sin_addr.s_addr != 0) {
452		struct sockaddr_in mask, sin;
453
454		bzero((caddr_t)&mask, sizeof(mask));
455		sin = mask;
456		sin.sin_family = AF_INET;
457		sin.sin_len = sizeof(sin);
458                /* XXX MRT use table 0 for this sort of thing */
459		CURVNET_SET(TD_TO_VNET(td));
460		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
461		    (struct sockaddr *)&nd->mygateway,
462		    (struct sockaddr *)&mask,
463		    RTF_UP | RTF_GATEWAY, NULL);
464		CURVNET_RESTORE();
465		if (error)
466			panic("nfs_mountroot: RTM_ADD: %d", error);
467	}
468
469	/*
470	 * Create the rootfs mount point.
471	 */
472	nd->root_args.fh = nd->root_fh;
473	nd->root_args.fhsize = nd->root_fhsize;
474	l = ntohl(nd->root_saddr.sin_addr.s_addr);
475	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
476		(l >> 24) & 0xff, (l >> 16) & 0xff,
477		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
478	printf("NFS ROOT: %s\n", buf);
479	nd->root_args.hostname = buf;
480	if ((error = nfs_mountdiskless(buf,
481	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
482		return (error);
483	}
484
485	/*
486	 * This is not really an nfs issue, but it is much easier to
487	 * set hostname here and then let the "/etc/rc.xxx" files
488	 * mount the right /var based upon its preset value.
489	 */
490	mtx_lock(&prison0.pr_mtx);
491	strlcpy(prison0.pr_hostname, nd->my_hostnam,
492	    sizeof(prison0.pr_hostname));
493	mtx_unlock(&prison0.pr_mtx);
494	inittodr(ntohl(nd->root_time));
495	return (0);
496}
497
498/*
499 * Internal version of mount system call for diskless setup.
500 */
501static int
502nfs_mountdiskless(char *path,
503    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
504    struct vnode **vpp, struct mount *mp)
505{
506	struct sockaddr *nam;
507	int dirlen, error;
508	char *dirpath;
509
510	/*
511	 * Find the directory path in "path", which also has the server's
512	 * name/ip address in it.
513	 */
514	dirpath = strchr(path, ':');
515	if (dirpath != NULL)
516		dirlen = strlen(++dirpath);
517	else
518		dirlen = 0;
519	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
520	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
521	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
522		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
523		return (error);
524	}
525	return (0);
526}
527
528static void
529nfs_sec_name(char *sec, int *flagsp)
530{
531	if (!strcmp(sec, "krb5"))
532		*flagsp |= NFSMNT_KERB;
533	else if (!strcmp(sec, "krb5i"))
534		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
535	else if (!strcmp(sec, "krb5p"))
536		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
537}
538
539static void
540nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
541    const char *hostname, struct ucred *cred, struct thread *td)
542{
543	int s;
544	int adjsock;
545	char *p;
546
547	s = splnet();
548
549	/*
550	 * Set read-only flag if requested; otherwise, clear it if this is
551	 * an update.  If this is not an update, then either the read-only
552	 * flag is already clear, or this is a root mount and it was set
553	 * intentionally at some previous point.
554	 */
555	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
556		MNT_ILOCK(mp);
557		mp->mnt_flag |= MNT_RDONLY;
558		MNT_IUNLOCK(mp);
559	} else if (mp->mnt_flag & MNT_UPDATE) {
560		MNT_ILOCK(mp);
561		mp->mnt_flag &= ~MNT_RDONLY;
562		MNT_IUNLOCK(mp);
563	}
564
565	/*
566	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
567	 * no sense in that context.  Also, set up appropriate retransmit
568	 * and soft timeout behavior.
569	 */
570	if (argp->sotype == SOCK_STREAM) {
571		nmp->nm_flag &= ~NFSMNT_NOCONN;
572		nmp->nm_timeo = NFS_MAXTIMEO;
573		if ((argp->flags & NFSMNT_NFSV4) != 0)
574			nmp->nm_retry = INT_MAX;
575		else
576			nmp->nm_retry = NFS_RETRANS_TCP;
577	}
578
579	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
580	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
581		argp->flags &= ~NFSMNT_RDIRPLUS;
582		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
583	}
584
585	/* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */
586	if ((argp->flags & NFSMNT_NFSV4) != 0) {
587		argp->flags &= ~NFSMNT_RESVPORT;
588		nmp->nm_flag &= ~NFSMNT_RESVPORT;
589	}
590
591	/* Re-bind if rsrvd port requested and wasn't on one */
592	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
593		  && (argp->flags & NFSMNT_RESVPORT);
594	/* Also re-bind if we're switching to/from a connected UDP socket */
595	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
596		    (argp->flags & NFSMNT_NOCONN));
597
598	/* Update flags atomically.  Don't change the lock bits. */
599	nmp->nm_flag = argp->flags | nmp->nm_flag;
600	splx(s);
601
602	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
603		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
604		if (nmp->nm_timeo < NFS_MINTIMEO)
605			nmp->nm_timeo = NFS_MINTIMEO;
606		else if (nmp->nm_timeo > NFS_MAXTIMEO)
607			nmp->nm_timeo = NFS_MAXTIMEO;
608	}
609
610	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
611		nmp->nm_retry = argp->retrans;
612		if (nmp->nm_retry > NFS_MAXREXMIT)
613			nmp->nm_retry = NFS_MAXREXMIT;
614	}
615
616	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
617		nmp->nm_wsize = argp->wsize;
618		/* Round down to multiple of blocksize */
619		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
620		if (nmp->nm_wsize <= 0)
621			nmp->nm_wsize = NFS_FABLKSIZE;
622	}
623
624	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
625		nmp->nm_rsize = argp->rsize;
626		/* Round down to multiple of blocksize */
627		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
628		if (nmp->nm_rsize <= 0)
629			nmp->nm_rsize = NFS_FABLKSIZE;
630	}
631
632	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
633		nmp->nm_readdirsize = argp->readdirsize;
634	}
635
636	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
637		nmp->nm_acregmin = argp->acregmin;
638	else
639		nmp->nm_acregmin = NFS_MINATTRTIMO;
640	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
641		nmp->nm_acregmax = argp->acregmax;
642	else
643		nmp->nm_acregmax = NFS_MAXATTRTIMO;
644	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
645		nmp->nm_acdirmin = argp->acdirmin;
646	else
647		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
648	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
649		nmp->nm_acdirmax = argp->acdirmax;
650	else
651		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
652	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
653		nmp->nm_acdirmin = nmp->nm_acdirmax;
654	if (nmp->nm_acregmin > nmp->nm_acregmax)
655		nmp->nm_acregmin = nmp->nm_acregmax;
656
657	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
658		if (argp->readahead <= NFS_MAXRAHEAD)
659			nmp->nm_readahead = argp->readahead;
660		else
661			nmp->nm_readahead = NFS_MAXRAHEAD;
662	}
663	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
664		if (argp->wcommitsize < nmp->nm_wsize)
665			nmp->nm_wcommitsize = nmp->nm_wsize;
666		else
667			nmp->nm_wcommitsize = argp->wcommitsize;
668	}
669
670	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
671		    (nmp->nm_soproto != argp->proto));
672
673	if (nmp->nm_client != NULL && adjsock) {
674		int haslock = 0, error = 0;
675
676		if (nmp->nm_sotype == SOCK_STREAM) {
677			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
678			if (!error)
679				haslock = 1;
680		}
681		if (!error) {
682		    newnfs_disconnect(&nmp->nm_sockreq);
683		    if (haslock)
684			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
685		    nmp->nm_sotype = argp->sotype;
686		    nmp->nm_soproto = argp->proto;
687		    if (nmp->nm_sotype == SOCK_DGRAM)
688			while (newnfs_connect(nmp, &nmp->nm_sockreq,
689			    cred, td, 0)) {
690				printf("newnfs_args: retrying connect\n");
691				(void) nfs_catnap(PSOCK, 0, "newnfscon");
692			}
693		}
694	} else {
695		nmp->nm_sotype = argp->sotype;
696		nmp->nm_soproto = argp->proto;
697	}
698
699	if (hostname != NULL) {
700		strlcpy(nmp->nm_hostname, hostname,
701		    sizeof(nmp->nm_hostname));
702		p = strchr(nmp->nm_hostname, ':');
703		if (p != NULL)
704			*p = '\0';
705	}
706}
707
708static const char *nfs_opts[] = { "from",
709    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
710    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
711    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
712    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
713    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
714    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
715    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
716    "negnametimeo",
717    NULL };
718
719/*
720 * VFS Operations.
721 *
722 * mount system call
723 * It seems a bit dumb to copyinstr() the host and path here and then
724 * bcopy() them in mountnfs(), but I wanted to detect errors before
725 * doing the sockargs() call because sockargs() allocates an mbuf and
726 * an error after that means that I have to release the mbuf.
727 */
728/* ARGSUSED */
729static int
730nfs_mount(struct mount *mp)
731{
732	struct nfs_args args = {
733	    .version = NFS_ARGSVERSION,
734	    .addr = NULL,
735	    .addrlen = sizeof (struct sockaddr_in),
736	    .sotype = SOCK_STREAM,
737	    .proto = 0,
738	    .fh = NULL,
739	    .fhsize = 0,
740	    .flags = NFSMNT_RESVPORT,
741	    .wsize = NFS_WSIZE,
742	    .rsize = NFS_RSIZE,
743	    .readdirsize = NFS_READDIRSIZE,
744	    .timeo = 10,
745	    .retrans = NFS_RETRANS,
746	    .readahead = NFS_DEFRAHEAD,
747	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
748	    .hostname = NULL,
749	    .acregmin = NFS_MINATTRTIMO,
750	    .acregmax = NFS_MAXATTRTIMO,
751	    .acdirmin = NFS_MINDIRATTRTIMO,
752	    .acdirmax = NFS_MAXDIRATTRTIMO,
753	};
754	int error = 0, ret, len;
755	struct sockaddr *nam = NULL;
756	struct vnode *vp;
757	struct thread *td;
758	char hst[MNAMELEN];
759	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
760	char *opt, *name, *secname;
761	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
762	int dirlen, krbnamelen, srvkrbnamelen;
763
764	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
765		error = EINVAL;
766		goto out;
767	}
768
769	td = curthread;
770	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
771		error = ncl_mountroot(mp);
772		goto out;
773	}
774
775	nfscl_init();
776
777	/* Handle the new style options. */
778	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
779		args.flags |= NFSMNT_NOCONN;
780	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
781		args.flags |= NFSMNT_NOCONN;
782	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
783		args.flags |= NFSMNT_NOLOCKD;
784	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
785		args.flags &= ~NFSMNT_NOLOCKD;
786	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
787		args.flags |= NFSMNT_INT;
788	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
789		args.flags |= NFSMNT_RDIRPLUS;
790	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
791		args.flags |= NFSMNT_RESVPORT;
792	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
793		args.flags &= ~NFSMNT_RESVPORT;
794	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
795		args.flags |= NFSMNT_SOFT;
796	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
797		args.flags &= ~NFSMNT_SOFT;
798	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
799		args.sotype = SOCK_DGRAM;
800	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
801		args.sotype = SOCK_DGRAM;
802	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
803		args.sotype = SOCK_STREAM;
804	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
805		args.flags |= NFSMNT_NFSV3;
806	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
807		args.flags |= NFSMNT_NFSV4;
808		args.sotype = SOCK_STREAM;
809	}
810	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
811		args.flags |= NFSMNT_ALLGSSNAME;
812	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
813		if (opt == NULL) {
814			vfs_mount_error(mp, "illegal readdirsize");
815			error = EINVAL;
816			goto out;
817		}
818		ret = sscanf(opt, "%d", &args.readdirsize);
819		if (ret != 1 || args.readdirsize <= 0) {
820			vfs_mount_error(mp, "illegal readdirsize: %s",
821			    opt);
822			error = EINVAL;
823			goto out;
824		}
825		args.flags |= NFSMNT_READDIRSIZE;
826	}
827	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
828		if (opt == NULL) {
829			vfs_mount_error(mp, "illegal readahead");
830			error = EINVAL;
831			goto out;
832		}
833		ret = sscanf(opt, "%d", &args.readahead);
834		if (ret != 1 || args.readahead <= 0) {
835			vfs_mount_error(mp, "illegal readahead: %s",
836			    opt);
837			error = EINVAL;
838			goto out;
839		}
840		args.flags |= NFSMNT_READAHEAD;
841	}
842	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
843		if (opt == NULL) {
844			vfs_mount_error(mp, "illegal wsize");
845			error = EINVAL;
846			goto out;
847		}
848		ret = sscanf(opt, "%d", &args.wsize);
849		if (ret != 1 || args.wsize <= 0) {
850			vfs_mount_error(mp, "illegal wsize: %s",
851			    opt);
852			error = EINVAL;
853			goto out;
854		}
855		args.flags |= NFSMNT_WSIZE;
856	}
857	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
858		if (opt == NULL) {
859			vfs_mount_error(mp, "illegal rsize");
860			error = EINVAL;
861			goto out;
862		}
863		ret = sscanf(opt, "%d", &args.rsize);
864		if (ret != 1 || args.rsize <= 0) {
865			vfs_mount_error(mp, "illegal wsize: %s",
866			    opt);
867			error = EINVAL;
868			goto out;
869		}
870		args.flags |= NFSMNT_RSIZE;
871	}
872	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
873		if (opt == NULL) {
874			vfs_mount_error(mp, "illegal retrans");
875			error = EINVAL;
876			goto out;
877		}
878		ret = sscanf(opt, "%d", &args.retrans);
879		if (ret != 1 || args.retrans <= 0) {
880			vfs_mount_error(mp, "illegal retrans: %s",
881			    opt);
882			error = EINVAL;
883			goto out;
884		}
885		args.flags |= NFSMNT_RETRANS;
886	}
887	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
888		ret = sscanf(opt, "%d", &args.acregmin);
889		if (ret != 1 || args.acregmin < 0) {
890			vfs_mount_error(mp, "illegal acregmin: %s",
891			    opt);
892			error = EINVAL;
893			goto out;
894		}
895		args.flags |= NFSMNT_ACREGMIN;
896	}
897	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
898		ret = sscanf(opt, "%d", &args.acregmax);
899		if (ret != 1 || args.acregmax < 0) {
900			vfs_mount_error(mp, "illegal acregmax: %s",
901			    opt);
902			error = EINVAL;
903			goto out;
904		}
905		args.flags |= NFSMNT_ACREGMAX;
906	}
907	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
908		ret = sscanf(opt, "%d", &args.acdirmin);
909		if (ret != 1 || args.acdirmin < 0) {
910			vfs_mount_error(mp, "illegal acdirmin: %s",
911			    opt);
912			error = EINVAL;
913			goto out;
914		}
915		args.flags |= NFSMNT_ACDIRMIN;
916	}
917	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
918		ret = sscanf(opt, "%d", &args.acdirmax);
919		if (ret != 1 || args.acdirmax < 0) {
920			vfs_mount_error(mp, "illegal acdirmax: %s",
921			    opt);
922			error = EINVAL;
923			goto out;
924		}
925		args.flags |= NFSMNT_ACDIRMAX;
926	}
927	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
928		ret = sscanf(opt, "%d", &args.timeo);
929		if (ret != 1 || args.timeo <= 0) {
930			vfs_mount_error(mp, "illegal timeout: %s",
931			    opt);
932			error = EINVAL;
933			goto out;
934		}
935		args.flags |= NFSMNT_TIMEO;
936	}
937	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
938	    == 0) {
939		ret = sscanf(opt, "%d", &negnametimeo);
940		if (ret != 1 || negnametimeo < 0) {
941			vfs_mount_error(mp, "illegal negnametimeo: %s",
942			    opt);
943			error = EINVAL;
944			goto out;
945		}
946	}
947	if (vfs_getopt(mp->mnt_optnew, "sec",
948		(void **) &secname, NULL) == 0)
949		nfs_sec_name(secname, &args.flags);
950
951	if (mp->mnt_flag & MNT_UPDATE) {
952		struct nfsmount *nmp = VFSTONFS(mp);
953
954		if (nmp == NULL) {
955			error = EIO;
956			goto out;
957		}
958		/*
959		 * When doing an update, we can't change version,
960		 * security, switch lockd strategies or change cookie
961		 * translation
962		 */
963		args.flags = (args.flags &
964		    ~(NFSMNT_NFSV3 |
965		      NFSMNT_NFSV4 |
966		      NFSMNT_KERB |
967		      NFSMNT_INTEGRITY |
968		      NFSMNT_PRIVACY |
969		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
970		    (nmp->nm_flag &
971			(NFSMNT_NFSV3 |
972			 NFSMNT_NFSV4 |
973			 NFSMNT_KERB |
974			 NFSMNT_INTEGRITY |
975			 NFSMNT_PRIVACY |
976			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
977		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
978		goto out;
979	}
980
981	/*
982	 * Make the nfs_ip_paranoia sysctl serve as the default connection
983	 * or no-connection mode for those protocols that support
984	 * no-connection mode (the flag will be cleared later for protocols
985	 * that do not support no-connection mode).  This will allow a client
986	 * to receive replies from a different IP then the request was
987	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
988	 * not 0.
989	 */
990	if (nfs_ip_paranoia == 0)
991		args.flags |= NFSMNT_NOCONN;
992
993	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
994	    &args.fhsize) == 0) {
995		if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
996			vfs_mount_error(mp, "Bad file handle");
997			error = EINVAL;
998			goto out;
999		}
1000		bcopy(args.fh, nfh, args.fhsize);
1001	} else {
1002		args.fhsize = 0;
1003	}
1004
1005	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
1006	    &len);
1007	if (args.hostname == NULL) {
1008		vfs_mount_error(mp, "Invalid hostname");
1009		error = EINVAL;
1010		goto out;
1011	}
1012	bcopy(args.hostname, hst, MNAMELEN);
1013	hst[MNAMELEN - 1] = '\0';
1014
1015	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1016		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1017	else
1018		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1019	srvkrbnamelen = strlen(srvkrbname);
1020
1021	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1022		strlcpy(krbname, name, sizeof (krbname));
1023	else
1024		krbname[0] = '\0';
1025	krbnamelen = strlen(krbname);
1026
1027	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1028		strlcpy(dirpath, name, sizeof (dirpath));
1029	else
1030		dirpath[0] = '\0';
1031	dirlen = strlen(dirpath);
1032
1033	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1034	    &args.addrlen) == 0) {
1035		if (args.addrlen > SOCK_MAXADDRLEN) {
1036			error = ENAMETOOLONG;
1037			goto out;
1038		}
1039		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1040		bcopy(args.addr, nam, args.addrlen);
1041		nam->sa_len = args.addrlen;
1042	}
1043
1044	args.fh = nfh;
1045	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1046	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1047	    negnametimeo);
1048out:
1049	if (!error) {
1050		MNT_ILOCK(mp);
1051		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1052		MNT_IUNLOCK(mp);
1053	}
1054	return (error);
1055}
1056
1057
1058/*
1059 * VFS Operations.
1060 *
1061 * mount system call
1062 * It seems a bit dumb to copyinstr() the host and path here and then
1063 * bcopy() them in mountnfs(), but I wanted to detect errors before
1064 * doing the sockargs() call because sockargs() allocates an mbuf and
1065 * an error after that means that I have to release the mbuf.
1066 */
1067/* ARGSUSED */
1068static int
1069nfs_cmount(struct mntarg *ma, void *data, int flags)
1070{
1071	int error;
1072	struct nfs_args args;
1073
1074	error = copyin(data, &args, sizeof (struct nfs_args));
1075	if (error)
1076		return error;
1077
1078	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1079
1080	error = kernel_mount(ma, flags);
1081	return (error);
1082}
1083
1084/*
1085 * Common code for mount and mountroot
1086 */
1087static int
1088mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1089    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1090    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1091    struct ucred *cred, struct thread *td, int negnametimeo)
1092{
1093	struct nfsmount *nmp;
1094	struct nfsnode *np;
1095	int error, trycnt, ret;
1096	struct nfsvattr nfsva;
1097	static u_int64_t clval = 0;
1098
1099	if (mp->mnt_flag & MNT_UPDATE) {
1100		nmp = VFSTONFS(mp);
1101		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1102		FREE(nam, M_SONAME);
1103		return (0);
1104	} else {
1105		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1106		    krbnamelen + dirlen + srvkrbnamelen + 2,
1107		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1108		TAILQ_INIT(&nmp->nm_bufq);
1109		if (clval == 0)
1110			clval = (u_int64_t)nfsboottime.tv_sec;
1111		nmp->nm_clval = clval++;
1112		nmp->nm_krbnamelen = krbnamelen;
1113		nmp->nm_dirpathlen = dirlen;
1114		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1115		if (td->td_ucred->cr_uid != (uid_t)0) {
1116			/*
1117			 * nm_uid is used to get KerberosV credentials for
1118			 * the nfsv4 state handling operations if there is
1119			 * no host based principal set. Use the uid of
1120			 * this user if not root, since they are doing the
1121			 * mount. I don't think setting this for root will
1122			 * work, since root normally does not have user
1123			 * credentials in a credentials cache.
1124			 */
1125			nmp->nm_uid = td->td_ucred->cr_uid;
1126		} else {
1127			/*
1128			 * Just set to -1, so it won't be used.
1129			 */
1130			nmp->nm_uid = (uid_t)-1;
1131		}
1132
1133		/* Copy and null terminate all the names */
1134		if (nmp->nm_krbnamelen > 0) {
1135			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1136			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1137		}
1138		if (nmp->nm_dirpathlen > 0) {
1139			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1140			    nmp->nm_dirpathlen);
1141			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1142			    + 1] = '\0';
1143		}
1144		if (nmp->nm_srvkrbnamelen > 0) {
1145			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1146			    nmp->nm_srvkrbnamelen);
1147			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1148			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1149		}
1150		nmp->nm_sockreq.nr_cred = crhold(cred);
1151		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1152		mp->mnt_data = nmp;
1153		nmp->nm_getinfo = nfs_getnlminfo;
1154		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1155	}
1156	vfs_getnewfsid(mp);
1157	nmp->nm_mountp = mp;
1158	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1159	nmp->nm_negnametimeo = negnametimeo;
1160
1161	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1162
1163	/*
1164	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1165	 * high, depending on whether we end up with negative offsets in
1166	 * the client or server somewhere.  2GB-1 may be safer.
1167	 *
1168	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1169	 * that we can handle until we find out otherwise.
1170	 * XXX Our "safe" limit on the client is what we can store in our
1171	 * buffer cache using signed(!) block numbers.
1172	 */
1173	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1174		nmp->nm_maxfilesize = 0xffffffffLL;
1175	else
1176		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1177
1178	nmp->nm_timeo = NFS_TIMEO;
1179	nmp->nm_retry = NFS_RETRANS;
1180	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1181		nmp->nm_wsize = NFS_WSIZE;
1182		nmp->nm_rsize = NFS_RSIZE;
1183		nmp->nm_readdirsize = NFS_READDIRSIZE;
1184	}
1185	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1186	nmp->nm_numgrps = NFS_MAXGRPS;
1187	nmp->nm_readahead = NFS_DEFRAHEAD;
1188	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1189	if (nmp->nm_tprintf_delay < 0)
1190		nmp->nm_tprintf_delay = 0;
1191	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1192	if (nmp->nm_tprintf_initial_delay < 0)
1193		nmp->nm_tprintf_initial_delay = 0;
1194	nmp->nm_fhsize = argp->fhsize;
1195	if (nmp->nm_fhsize > 0)
1196		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1197	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1198	nmp->nm_nam = nam;
1199	/* Set up the sockets and per-host congestion */
1200	nmp->nm_sotype = argp->sotype;
1201	nmp->nm_soproto = argp->proto;
1202	nmp->nm_sockreq.nr_prog = NFS_PROG;
1203	if ((argp->flags & NFSMNT_NFSV4))
1204		nmp->nm_sockreq.nr_vers = NFS_VER4;
1205	else if ((argp->flags & NFSMNT_NFSV3))
1206		nmp->nm_sockreq.nr_vers = NFS_VER3;
1207	else
1208		nmp->nm_sockreq.nr_vers = NFS_VER2;
1209
1210
1211	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1212		goto bad;
1213
1214	/*
1215	 * A reference count is needed on the nfsnode representing the
1216	 * remote root.  If this object is not persistent, then backward
1217	 * traversals of the mount point (i.e. "..") will not work if
1218	 * the nfsnode gets flushed out of the cache. Ufs does not have
1219	 * this problem, because one can identify root inodes by their
1220	 * number == ROOTINO (2).
1221	 */
1222	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1223	    nmp->nm_dirpathlen > 0) {
1224		/*
1225		 * If the fhsize on the mount point == 0 for V4, the mount
1226		 * path needs to be looked up.
1227		 */
1228		trycnt = 3;
1229		do {
1230			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1231			    cred, td);
1232			if (error)
1233				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1234		} while (error && --trycnt > 0);
1235		if (error) {
1236			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1237			goto bad;
1238		}
1239	}
1240	if (nmp->nm_fhsize > 0) {
1241		/*
1242		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1243		 * non-zero for the root vnode. f_iosize will be set correctly
1244		 * by nfs_statfs() before any I/O occurs.
1245		 */
1246		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1247		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1248		    LK_EXCLUSIVE);
1249		if (error)
1250			goto bad;
1251		*vpp = NFSTOV(np);
1252
1253		/*
1254		 * Get file attributes and transfer parameters for the
1255		 * mountpoint.  This has the side effect of filling in
1256		 * (*vpp)->v_type with the correct value.
1257		 */
1258		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1259		    cred, td, &nfsva, NULL);
1260		if (ret) {
1261			/*
1262			 * Just set default values to get things going.
1263			 */
1264			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1265			nfsva.na_vattr.va_type = VDIR;
1266			nfsva.na_vattr.va_mode = 0777;
1267			nfsva.na_vattr.va_nlink = 100;
1268			nfsva.na_vattr.va_uid = (uid_t)0;
1269			nfsva.na_vattr.va_gid = (gid_t)0;
1270			nfsva.na_vattr.va_fileid = 2;
1271			nfsva.na_vattr.va_gen = 1;
1272			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1273			nfsva.na_vattr.va_size = 512 * 1024;
1274		}
1275		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1276		if (argp->flags & NFSMNT_NFSV3)
1277			ncl_fsinfo(nmp, *vpp, cred, td);
1278
1279		/*
1280		 * Lose the lock but keep the ref.
1281		 */
1282		VOP_UNLOCK(*vpp, 0);
1283		return (0);
1284	}
1285	error = EIO;
1286
1287bad:
1288	newnfs_disconnect(&nmp->nm_sockreq);
1289	crfree(nmp->nm_sockreq.nr_cred);
1290	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1291	mtx_destroy(&nmp->nm_mtx);
1292	FREE(nmp, M_NEWNFSMNT);
1293	FREE(nam, M_SONAME);
1294	return (error);
1295}
1296
1297/*
1298 * unmount system call
1299 */
1300static int
1301nfs_unmount(struct mount *mp, int mntflags)
1302{
1303	struct thread *td;
1304	struct nfsmount *nmp;
1305	int error, flags = 0, trycnt = 0;
1306
1307	td = curthread;
1308
1309	if (mntflags & MNT_FORCE)
1310		flags |= FORCECLOSE;
1311	nmp = VFSTONFS(mp);
1312	/*
1313	 * Goes something like this..
1314	 * - Call vflush() to clear out vnodes for this filesystem
1315	 * - Close the socket
1316	 * - Free up the data structures
1317	 */
1318	/* In the forced case, cancel any outstanding requests. */
1319	if (mntflags & MNT_FORCE) {
1320		error = newnfs_nmcancelreqs(nmp);
1321		if (error)
1322			goto out;
1323		/* For a forced close, get rid of the renew thread now */
1324		nfscl_umount(nmp, td);
1325	}
1326	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1327	do {
1328		error = vflush(mp, 1, flags, td);
1329		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1330			(void) nfs_catnap(PSOCK, error, "newndm");
1331	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1332	if (error)
1333		goto out;
1334
1335	/*
1336	 * We are now committed to the unmount.
1337	 */
1338	if ((mntflags & MNT_FORCE) == 0)
1339		nfscl_umount(nmp, td);
1340	newnfs_disconnect(&nmp->nm_sockreq);
1341	crfree(nmp->nm_sockreq.nr_cred);
1342	FREE(nmp->nm_nam, M_SONAME);
1343
1344	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1345	mtx_destroy(&nmp->nm_mtx);
1346	FREE(nmp, M_NEWNFSMNT);
1347out:
1348	return (error);
1349}
1350
1351/*
1352 * Return root of a filesystem
1353 */
1354static int
1355nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1356{
1357	struct vnode *vp;
1358	struct nfsmount *nmp;
1359	struct nfsnode *np;
1360	int error;
1361
1362	nmp = VFSTONFS(mp);
1363	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1364	if (error)
1365		return error;
1366	vp = NFSTOV(np);
1367	/*
1368	 * Get transfer parameters and attributes for root vnode once.
1369	 */
1370	mtx_lock(&nmp->nm_mtx);
1371	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1372		mtx_unlock(&nmp->nm_mtx);
1373		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1374	} else
1375		mtx_unlock(&nmp->nm_mtx);
1376	if (vp->v_type == VNON)
1377	    vp->v_type = VDIR;
1378	vp->v_vflag |= VV_ROOT;
1379	*vpp = vp;
1380	return (0);
1381}
1382
1383/*
1384 * Flush out the buffer cache
1385 */
1386/* ARGSUSED */
1387static int
1388nfs_sync(struct mount *mp, int waitfor)
1389{
1390	struct vnode *vp, *mvp;
1391	struct thread *td;
1392	int error, allerror = 0;
1393
1394	td = curthread;
1395
1396	/*
1397	 * Force stale buffer cache information to be flushed.
1398	 */
1399	MNT_ILOCK(mp);
1400loop:
1401	MNT_VNODE_FOREACH(vp, mp, mvp) {
1402		VI_LOCK(vp);
1403		MNT_IUNLOCK(mp);
1404		/* XXX Racy bv_cnt check. */
1405		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1406		    waitfor == MNT_LAZY) {
1407			VI_UNLOCK(vp);
1408			MNT_ILOCK(mp);
1409			continue;
1410		}
1411		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1412			MNT_ILOCK(mp);
1413			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1414			goto loop;
1415		}
1416		error = VOP_FSYNC(vp, waitfor, td);
1417		if (error)
1418			allerror = error;
1419		VOP_UNLOCK(vp, 0);
1420		vrele(vp);
1421
1422		MNT_ILOCK(mp);
1423	}
1424	MNT_IUNLOCK(mp);
1425	return (allerror);
1426}
1427
1428static int
1429nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1430{
1431	struct nfsmount *nmp = VFSTONFS(mp);
1432	struct vfsquery vq;
1433	int error;
1434
1435	bzero(&vq, sizeof(vq));
1436	switch (op) {
1437#if 0
1438	case VFS_CTL_NOLOCKS:
1439		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1440 		if (req->oldptr != NULL) {
1441 			error = SYSCTL_OUT(req, &val, sizeof(val));
1442 			if (error)
1443 				return (error);
1444 		}
1445 		if (req->newptr != NULL) {
1446 			error = SYSCTL_IN(req, &val, sizeof(val));
1447 			if (error)
1448 				return (error);
1449			if (val)
1450				nmp->nm_flag |= NFSMNT_NOLOCKS;
1451			else
1452				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1453 		}
1454		break;
1455#endif
1456	case VFS_CTL_QUERY:
1457		mtx_lock(&nmp->nm_mtx);
1458		if (nmp->nm_state & NFSSTA_TIMEO)
1459			vq.vq_flags |= VQ_NOTRESP;
1460		mtx_unlock(&nmp->nm_mtx);
1461#if 0
1462		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1463		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1464			vq.vq_flags |= VQ_NOTRESPLOCK;
1465#endif
1466		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1467		break;
1468 	case VFS_CTL_TIMEO:
1469 		if (req->oldptr != NULL) {
1470 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1471 			    sizeof(nmp->nm_tprintf_initial_delay));
1472 			if (error)
1473 				return (error);
1474 		}
1475 		if (req->newptr != NULL) {
1476			error = vfs_suser(mp, req->td);
1477			if (error)
1478				return (error);
1479 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1480 			    sizeof(nmp->nm_tprintf_initial_delay));
1481 			if (error)
1482 				return (error);
1483 			if (nmp->nm_tprintf_initial_delay < 0)
1484 				nmp->nm_tprintf_initial_delay = 0;
1485 		}
1486		break;
1487	default:
1488		return (ENOTSUP);
1489	}
1490	return (0);
1491}
1492
1493/*
1494 * Extract the information needed by the nlm from the nfs vnode.
1495 */
1496static void
1497nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1498    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1499    struct timeval *timeop)
1500{
1501	struct nfsmount *nmp;
1502	struct nfsnode *np = VTONFS(vp);
1503
1504	nmp = VFSTONFS(vp->v_mount);
1505	if (fhlenp != NULL)
1506		*fhlenp = (size_t)np->n_fhp->nfh_len;
1507	if (fhp != NULL)
1508		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1509	if (sp != NULL)
1510		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1511	if (is_v3p != NULL)
1512		*is_v3p = NFS_ISV3(vp);
1513	if (sizep != NULL)
1514		*sizep = np->n_size;
1515	if (timeop != NULL) {
1516		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1517		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1518	}
1519}
1520
1521