nfs_clvfsops.c revision 221066
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 221066 2011-04-26 13:50:11Z rmacklem $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/mount.h>
55#include <sys/proc.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <netinet/in.h>
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <nfs/nfsdiskless.h>
76
77FEATURE(nfscl, "NFSv4 client");
78
79extern int nfscl_ticks;
80extern struct timeval nfsboottime;
81extern struct nfsstats	newnfsstats;
82
83MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
84MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
85
86SYSCTL_DECL(_vfs_newnfs);
87SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
88	&newnfsstats, nfsstats, "S,nfsstats");
89static int nfs_ip_paranoia = 1;
90SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
91    &nfs_ip_paranoia, 0, "");
92static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
93SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
94        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
95/* how long between console messages "nfs server foo not responding" */
96static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
97SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
98        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
99
100static int	nfs_mountroot(struct mount *);
101static void	nfs_sec_name(char *, int *);
102static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
103		    struct nfs_args *argp, const char *, struct ucred *,
104		    struct thread *);
105static int	mountnfs(struct nfs_args *, struct mount *,
106		    struct sockaddr *, char *, u_char *, int, u_char *, int,
107		    u_char *, int, struct vnode **, struct ucred *,
108		    struct thread *, int);
109static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
110		    struct sockaddr_storage *, int *, off_t *,
111		    struct timeval *);
112static vfs_mount_t nfs_mount;
113static vfs_cmount_t nfs_cmount;
114static vfs_unmount_t nfs_unmount;
115static vfs_root_t nfs_root;
116static vfs_statfs_t nfs_statfs;
117static vfs_sync_t nfs_sync;
118static vfs_sysctl_t nfs_sysctl;
119
120/*
121 * nfs vfs operations.
122 */
123static struct vfsops nfs_vfsops = {
124	.vfs_init =		ncl_init,
125	.vfs_mount =		nfs_mount,
126	.vfs_cmount =		nfs_cmount,
127	.vfs_root =		nfs_root,
128	.vfs_statfs =		nfs_statfs,
129	.vfs_sync =		nfs_sync,
130	.vfs_uninit =		ncl_uninit,
131	.vfs_unmount =		nfs_unmount,
132	.vfs_sysctl =		nfs_sysctl,
133};
134VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
135
136/* So that loader and kldload(2) can find us, wherever we are.. */
137MODULE_VERSION(newnfs, 1);
138
139/*
140 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
141 * can be shared by both NFS clients. It is declared here so that it
142 * will be defined for kernels built without NFS_ROOT, although it
143 * isn't used in that case.
144 */
145#if !defined(NFS_ROOT) && !defined(NFSCLIENT)
146struct nfs_diskless	nfs_diskless = { { { 0 } } };
147struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
148int			nfs_diskless_valid = 0;
149#endif
150
151SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
152    &nfs_diskless_valid, 0,
153    "Has the diskless struct been filled correctly");
154
155SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
156    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
157
158SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
159    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
160    "%Ssockaddr_in", "Diskless root nfs address");
161
162
163void		newnfsargs_ntoh(struct nfs_args *);
164static int	nfs_mountdiskless(char *,
165		    struct sockaddr_in *, struct nfs_args *,
166		    struct thread *, struct vnode **, struct mount *);
167static void	nfs_convert_diskless(void);
168static void	nfs_convert_oargs(struct nfs_args *args,
169		    struct onfs_args *oargs);
170
171int
172newnfs_iosize(struct nfsmount *nmp)
173{
174	int iosize, maxio;
175
176	/* First, set the upper limit for iosize */
177	if (nmp->nm_flag & NFSMNT_NFSV4) {
178		maxio = NFS_MAXBSIZE;
179	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
180		if (nmp->nm_sotype == SOCK_DGRAM)
181			maxio = NFS_MAXDGRAMDATA;
182		else
183			maxio = NFS_MAXBSIZE;
184	} else {
185		maxio = NFS_V2MAXDATA;
186	}
187	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
188		nmp->nm_rsize = maxio;
189	if (nmp->nm_rsize > MAXBSIZE)
190		nmp->nm_rsize = MAXBSIZE;
191	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
192		nmp->nm_readdirsize = maxio;
193	if (nmp->nm_readdirsize > nmp->nm_rsize)
194		nmp->nm_readdirsize = nmp->nm_rsize;
195	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
196		nmp->nm_wsize = maxio;
197	if (nmp->nm_wsize > MAXBSIZE)
198		nmp->nm_wsize = MAXBSIZE;
199
200	/*
201	 * Calculate the size used for io buffers.  Use the larger
202	 * of the two sizes to minimise nfs requests but make sure
203	 * that it is at least one VM page to avoid wasting buffer
204	 * space.
205	 */
206	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
207	iosize = imax(iosize, PAGE_SIZE);
208	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
209	return (iosize);
210}
211
212static void
213nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
214{
215
216	args->version = NFS_ARGSVERSION;
217	args->addr = oargs->addr;
218	args->addrlen = oargs->addrlen;
219	args->sotype = oargs->sotype;
220	args->proto = oargs->proto;
221	args->fh = oargs->fh;
222	args->fhsize = oargs->fhsize;
223	args->flags = oargs->flags;
224	args->wsize = oargs->wsize;
225	args->rsize = oargs->rsize;
226	args->readdirsize = oargs->readdirsize;
227	args->timeo = oargs->timeo;
228	args->retrans = oargs->retrans;
229	args->readahead = oargs->readahead;
230	args->hostname = oargs->hostname;
231}
232
233static void
234nfs_convert_diskless(void)
235{
236
237	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
238		sizeof(struct ifaliasreq));
239	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
240		sizeof(struct sockaddr_in));
241	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
242	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
243		nfsv3_diskless.root_fhsize = NFSX_MYFH;
244		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
245	} else {
246		nfsv3_diskless.root_fhsize = NFSX_V2FH;
247		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
248	}
249	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
250		sizeof(struct sockaddr_in));
251	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
252	nfsv3_diskless.root_time = nfs_diskless.root_time;
253	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
254		MAXHOSTNAMELEN);
255	nfs_diskless_valid = 3;
256}
257
258/*
259 * nfs statfs call
260 */
261static int
262nfs_statfs(struct mount *mp, struct statfs *sbp)
263{
264	struct vnode *vp;
265	struct thread *td;
266	struct nfsmount *nmp = VFSTONFS(mp);
267	struct nfsvattr nfsva;
268	struct nfsfsinfo fs;
269	struct nfsstatfs sb;
270	int error = 0, attrflag, gotfsinfo = 0, ret;
271	struct nfsnode *np;
272
273	td = curthread;
274
275	error = vfs_busy(mp, MBF_NOWAIT);
276	if (error)
277		return (error);
278	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
279	if (error) {
280		vfs_unbusy(mp);
281		return (error);
282	}
283	vp = NFSTOV(np);
284	mtx_lock(&nmp->nm_mtx);
285	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
286		mtx_unlock(&nmp->nm_mtx);
287		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
288		    &attrflag, NULL);
289		if (!error)
290			gotfsinfo = 1;
291	} else
292		mtx_unlock(&nmp->nm_mtx);
293	if (!error)
294		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
295		    &attrflag, NULL);
296	if (attrflag == 0) {
297		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
298		    td->td_ucred, td, &nfsva, NULL);
299		if (ret) {
300			/*
301			 * Just set default values to get things going.
302			 */
303			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
304			nfsva.na_vattr.va_type = VDIR;
305			nfsva.na_vattr.va_mode = 0777;
306			nfsva.na_vattr.va_nlink = 100;
307			nfsva.na_vattr.va_uid = (uid_t)0;
308			nfsva.na_vattr.va_gid = (gid_t)0;
309			nfsva.na_vattr.va_fileid = 2;
310			nfsva.na_vattr.va_gen = 1;
311			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
312			nfsva.na_vattr.va_size = 512 * 1024;
313		}
314	}
315	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
316	if (!error) {
317	    mtx_lock(&nmp->nm_mtx);
318	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
319		nfscl_loadfsinfo(nmp, &fs);
320	    nfscl_loadsbinfo(nmp, &sb, sbp);
321	    sbp->f_iosize = newnfs_iosize(nmp);
322	    mtx_unlock(&nmp->nm_mtx);
323	    if (sbp != &mp->mnt_stat) {
324		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
325		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
326	    }
327	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
328	} else if (NFS_ISV4(vp)) {
329		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
330	}
331	vput(vp);
332	vfs_unbusy(mp);
333	return (error);
334}
335
336/*
337 * nfs version 3 fsinfo rpc call
338 */
339int
340ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
341    struct thread *td)
342{
343	struct nfsfsinfo fs;
344	struct nfsvattr nfsva;
345	int error, attrflag;
346
347	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
348	if (!error) {
349		if (attrflag)
350			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
351			    1);
352		mtx_lock(&nmp->nm_mtx);
353		nfscl_loadfsinfo(nmp, &fs);
354		mtx_unlock(&nmp->nm_mtx);
355	}
356	return (error);
357}
358
359/*
360 * Mount a remote root fs via. nfs. This depends on the info in the
361 * nfs_diskless structure that has been filled in properly by some primary
362 * bootstrap.
363 * It goes something like this:
364 * - do enough of "ifconfig" by calling ifioctl() so that the system
365 *   can talk to the server
366 * - If nfs_diskless.mygateway is filled in, use that address as
367 *   a default gateway.
368 * - build the rootfs mount point and call mountnfs() to do the rest.
369 *
370 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
371 * structure, as well as other global NFS client variables here, as
372 * nfs_mountroot() will be called once in the boot before any other NFS
373 * client activity occurs.
374 */
375static int
376nfs_mountroot(struct mount *mp)
377{
378	struct thread *td = curthread;
379	struct nfsv3_diskless *nd = &nfsv3_diskless;
380	struct socket *so;
381	struct vnode *vp;
382	struct ifreq ir;
383	int error;
384	u_long l;
385	char buf[128];
386	char *cp;
387
388#if defined(BOOTP_NFSROOT) && defined(BOOTP)
389	bootpc_init();		/* use bootp to get nfs_diskless filled in */
390#elif defined(NFS_ROOT)
391	nfs_setup_diskless();
392#endif
393
394	if (nfs_diskless_valid == 0)
395		return (-1);
396	if (nfs_diskless_valid == 1)
397		nfs_convert_diskless();
398
399	/*
400	 * XXX splnet, so networks will receive...
401	 */
402	splnet();
403
404	/*
405	 * Do enough of ifconfig(8) so that the critical net interface can
406	 * talk to the server.
407	 */
408	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
409	    td->td_ucred, td);
410	if (error)
411		panic("nfs_mountroot: socreate(%04x): %d",
412			nd->myif.ifra_addr.sa_family, error);
413
414#if 0 /* XXX Bad idea */
415	/*
416	 * We might not have been told the right interface, so we pass
417	 * over the first ten interfaces of the same kind, until we get
418	 * one of them configured.
419	 */
420
421	for (i = strlen(nd->myif.ifra_name) - 1;
422		nd->myif.ifra_name[i] >= '0' &&
423		nd->myif.ifra_name[i] <= '9';
424		nd->myif.ifra_name[i] ++) {
425		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
426		if(!error)
427			break;
428	}
429#endif
430	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
431	if (error)
432		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
433	if ((cp = getenv("boot.netif.mtu")) != NULL) {
434		ir.ifr_mtu = strtol(cp, NULL, 10);
435		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
436		freeenv(cp);
437		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
438		if (error)
439			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
440	}
441	soclose(so);
442
443	/*
444	 * If the gateway field is filled in, set it as the default route.
445	 * Note that pxeboot will set a default route of 0 if the route
446	 * is not set by the DHCP server.  Check also for a value of 0
447	 * to avoid panicking inappropriately in that situation.
448	 */
449	if (nd->mygateway.sin_len != 0 &&
450	    nd->mygateway.sin_addr.s_addr != 0) {
451		struct sockaddr_in mask, sin;
452
453		bzero((caddr_t)&mask, sizeof(mask));
454		sin = mask;
455		sin.sin_family = AF_INET;
456		sin.sin_len = sizeof(sin);
457                /* XXX MRT use table 0 for this sort of thing */
458		CURVNET_SET(TD_TO_VNET(td));
459		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
460		    (struct sockaddr *)&nd->mygateway,
461		    (struct sockaddr *)&mask,
462		    RTF_UP | RTF_GATEWAY, NULL);
463		CURVNET_RESTORE();
464		if (error)
465			panic("nfs_mountroot: RTM_ADD: %d", error);
466	}
467
468	/*
469	 * Create the rootfs mount point.
470	 */
471	nd->root_args.fh = nd->root_fh;
472	nd->root_args.fhsize = nd->root_fhsize;
473	l = ntohl(nd->root_saddr.sin_addr.s_addr);
474	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
475		(l >> 24) & 0xff, (l >> 16) & 0xff,
476		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
477	printf("NFS ROOT: %s\n", buf);
478	nd->root_args.hostname = buf;
479	if ((error = nfs_mountdiskless(buf,
480	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
481		return (error);
482	}
483
484	/*
485	 * This is not really an nfs issue, but it is much easier to
486	 * set hostname here and then let the "/etc/rc.xxx" files
487	 * mount the right /var based upon its preset value.
488	 */
489	mtx_lock(&prison0.pr_mtx);
490	strlcpy(prison0.pr_hostname, nd->my_hostnam,
491	    sizeof(prison0.pr_hostname));
492	mtx_unlock(&prison0.pr_mtx);
493	inittodr(ntohl(nd->root_time));
494	return (0);
495}
496
497/*
498 * Internal version of mount system call for diskless setup.
499 */
500static int
501nfs_mountdiskless(char *path,
502    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
503    struct vnode **vpp, struct mount *mp)
504{
505	struct sockaddr *nam;
506	int dirlen, error;
507	char *dirpath;
508
509	/*
510	 * Find the directory path in "path", which also has the server's
511	 * name/ip address in it.
512	 */
513	dirpath = strchr(path, ':');
514	if (dirpath != NULL)
515		dirlen = strlen(++dirpath);
516	else
517		dirlen = 0;
518	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
519	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
520	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
521		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
522		return (error);
523	}
524	return (0);
525}
526
527static void
528nfs_sec_name(char *sec, int *flagsp)
529{
530	if (!strcmp(sec, "krb5"))
531		*flagsp |= NFSMNT_KERB;
532	else if (!strcmp(sec, "krb5i"))
533		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
534	else if (!strcmp(sec, "krb5p"))
535		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
536}
537
538static void
539nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
540    const char *hostname, struct ucred *cred, struct thread *td)
541{
542	int s;
543	int adjsock;
544	char *p;
545
546	s = splnet();
547
548	/*
549	 * Set read-only flag if requested; otherwise, clear it if this is
550	 * an update.  If this is not an update, then either the read-only
551	 * flag is already clear, or this is a root mount and it was set
552	 * intentionally at some previous point.
553	 */
554	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
555		MNT_ILOCK(mp);
556		mp->mnt_flag |= MNT_RDONLY;
557		MNT_IUNLOCK(mp);
558	} else if (mp->mnt_flag & MNT_UPDATE) {
559		MNT_ILOCK(mp);
560		mp->mnt_flag &= ~MNT_RDONLY;
561		MNT_IUNLOCK(mp);
562	}
563
564	/*
565	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
566	 * no sense in that context.  Also, set up appropriate retransmit
567	 * and soft timeout behavior.
568	 */
569	if (argp->sotype == SOCK_STREAM) {
570		nmp->nm_flag &= ~NFSMNT_NOCONN;
571		nmp->nm_timeo = NFS_MAXTIMEO;
572		if ((argp->flags & NFSMNT_NFSV4) != 0)
573			nmp->nm_retry = INT_MAX;
574		else
575			nmp->nm_retry = NFS_RETRANS_TCP;
576	}
577
578	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
579	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
580		argp->flags &= ~NFSMNT_RDIRPLUS;
581		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
582	}
583
584	/* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */
585	if ((argp->flags & NFSMNT_NFSV4) != 0) {
586		argp->flags &= ~NFSMNT_RESVPORT;
587		nmp->nm_flag &= ~NFSMNT_RESVPORT;
588	}
589
590	/* Re-bind if rsrvd port requested and wasn't on one */
591	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
592		  && (argp->flags & NFSMNT_RESVPORT);
593	/* Also re-bind if we're switching to/from a connected UDP socket */
594	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
595		    (argp->flags & NFSMNT_NOCONN));
596
597	/* Update flags atomically.  Don't change the lock bits. */
598	nmp->nm_flag = argp->flags | nmp->nm_flag;
599	splx(s);
600
601	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
602		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
603		if (nmp->nm_timeo < NFS_MINTIMEO)
604			nmp->nm_timeo = NFS_MINTIMEO;
605		else if (nmp->nm_timeo > NFS_MAXTIMEO)
606			nmp->nm_timeo = NFS_MAXTIMEO;
607	}
608
609	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
610		nmp->nm_retry = argp->retrans;
611		if (nmp->nm_retry > NFS_MAXREXMIT)
612			nmp->nm_retry = NFS_MAXREXMIT;
613	}
614
615	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
616		nmp->nm_wsize = argp->wsize;
617		/* Round down to multiple of blocksize */
618		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
619		if (nmp->nm_wsize <= 0)
620			nmp->nm_wsize = NFS_FABLKSIZE;
621	}
622
623	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
624		nmp->nm_rsize = argp->rsize;
625		/* Round down to multiple of blocksize */
626		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
627		if (nmp->nm_rsize <= 0)
628			nmp->nm_rsize = NFS_FABLKSIZE;
629	}
630
631	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
632		nmp->nm_readdirsize = argp->readdirsize;
633	}
634
635	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
636		nmp->nm_acregmin = argp->acregmin;
637	else
638		nmp->nm_acregmin = NFS_MINATTRTIMO;
639	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
640		nmp->nm_acregmax = argp->acregmax;
641	else
642		nmp->nm_acregmax = NFS_MAXATTRTIMO;
643	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
644		nmp->nm_acdirmin = argp->acdirmin;
645	else
646		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
647	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
648		nmp->nm_acdirmax = argp->acdirmax;
649	else
650		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
651	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
652		nmp->nm_acdirmin = nmp->nm_acdirmax;
653	if (nmp->nm_acregmin > nmp->nm_acregmax)
654		nmp->nm_acregmin = nmp->nm_acregmax;
655
656	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
657		if (argp->readahead <= NFS_MAXRAHEAD)
658			nmp->nm_readahead = argp->readahead;
659		else
660			nmp->nm_readahead = NFS_MAXRAHEAD;
661	}
662	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
663		if (argp->wcommitsize < nmp->nm_wsize)
664			nmp->nm_wcommitsize = nmp->nm_wsize;
665		else
666			nmp->nm_wcommitsize = argp->wcommitsize;
667	}
668
669	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
670		    (nmp->nm_soproto != argp->proto));
671
672	if (nmp->nm_client != NULL && adjsock) {
673		int haslock = 0, error = 0;
674
675		if (nmp->nm_sotype == SOCK_STREAM) {
676			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
677			if (!error)
678				haslock = 1;
679		}
680		if (!error) {
681		    newnfs_disconnect(&nmp->nm_sockreq);
682		    if (haslock)
683			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
684		    nmp->nm_sotype = argp->sotype;
685		    nmp->nm_soproto = argp->proto;
686		    if (nmp->nm_sotype == SOCK_DGRAM)
687			while (newnfs_connect(nmp, &nmp->nm_sockreq,
688			    cred, td, 0)) {
689				printf("newnfs_args: retrying connect\n");
690				(void) nfs_catnap(PSOCK, 0, "newnfscon");
691			}
692		}
693	} else {
694		nmp->nm_sotype = argp->sotype;
695		nmp->nm_soproto = argp->proto;
696	}
697
698	if (hostname != NULL) {
699		strlcpy(nmp->nm_hostname, hostname,
700		    sizeof(nmp->nm_hostname));
701		p = strchr(nmp->nm_hostname, ':');
702		if (p != NULL)
703			*p = '\0';
704	}
705}
706
707static const char *nfs_opts[] = { "from",
708    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
709    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
710    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
711    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
712    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
713    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
714    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
715    "negnametimeo",
716    NULL };
717
718/*
719 * VFS Operations.
720 *
721 * mount system call
722 * It seems a bit dumb to copyinstr() the host and path here and then
723 * bcopy() them in mountnfs(), but I wanted to detect errors before
724 * doing the sockargs() call because sockargs() allocates an mbuf and
725 * an error after that means that I have to release the mbuf.
726 */
727/* ARGSUSED */
728static int
729nfs_mount(struct mount *mp)
730{
731	struct nfs_args args = {
732	    .version = NFS_ARGSVERSION,
733	    .addr = NULL,
734	    .addrlen = sizeof (struct sockaddr_in),
735	    .sotype = SOCK_STREAM,
736	    .proto = 0,
737	    .fh = NULL,
738	    .fhsize = 0,
739	    .flags = NFSMNT_RESVPORT,
740	    .wsize = NFS_WSIZE,
741	    .rsize = NFS_RSIZE,
742	    .readdirsize = NFS_READDIRSIZE,
743	    .timeo = 10,
744	    .retrans = NFS_RETRANS,
745	    .readahead = NFS_DEFRAHEAD,
746	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
747	    .hostname = NULL,
748	    .acregmin = NFS_MINATTRTIMO,
749	    .acregmax = NFS_MAXATTRTIMO,
750	    .acdirmin = NFS_MINDIRATTRTIMO,
751	    .acdirmax = NFS_MAXDIRATTRTIMO,
752	};
753	int error = 0, ret, len;
754	struct sockaddr *nam = NULL;
755	struct vnode *vp;
756	struct thread *td;
757	char hst[MNAMELEN];
758	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
759	char *opt, *name, *secname;
760	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
761	int dirlen, krbnamelen, srvkrbnamelen;
762
763	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
764		error = EINVAL;
765		goto out;
766	}
767
768	td = curthread;
769	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
770		error = nfs_mountroot(mp);
771		goto out;
772	}
773
774	nfscl_init();
775
776	/* Handle the new style options. */
777	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
778		args.flags |= NFSMNT_NOCONN;
779	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
780		args.flags |= NFSMNT_NOCONN;
781	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
782		args.flags |= NFSMNT_NOLOCKD;
783	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
784		args.flags &= ~NFSMNT_NOLOCKD;
785	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
786		args.flags |= NFSMNT_INT;
787	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
788		args.flags |= NFSMNT_RDIRPLUS;
789	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
790		args.flags |= NFSMNT_RESVPORT;
791	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
792		args.flags &= ~NFSMNT_RESVPORT;
793	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
794		args.flags |= NFSMNT_SOFT;
795	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
796		args.flags &= ~NFSMNT_SOFT;
797	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
798		args.sotype = SOCK_DGRAM;
799	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
800		args.sotype = SOCK_DGRAM;
801	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
802		args.sotype = SOCK_STREAM;
803	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
804		args.flags |= NFSMNT_NFSV3;
805	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
806		args.flags |= NFSMNT_NFSV4;
807		args.sotype = SOCK_STREAM;
808	}
809	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
810		args.flags |= NFSMNT_ALLGSSNAME;
811	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
812		if (opt == NULL) {
813			vfs_mount_error(mp, "illegal readdirsize");
814			error = EINVAL;
815			goto out;
816		}
817		ret = sscanf(opt, "%d", &args.readdirsize);
818		if (ret != 1 || args.readdirsize <= 0) {
819			vfs_mount_error(mp, "illegal readdirsize: %s",
820			    opt);
821			error = EINVAL;
822			goto out;
823		}
824		args.flags |= NFSMNT_READDIRSIZE;
825	}
826	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
827		if (opt == NULL) {
828			vfs_mount_error(mp, "illegal readahead");
829			error = EINVAL;
830			goto out;
831		}
832		ret = sscanf(opt, "%d", &args.readahead);
833		if (ret != 1 || args.readahead <= 0) {
834			vfs_mount_error(mp, "illegal readahead: %s",
835			    opt);
836			error = EINVAL;
837			goto out;
838		}
839		args.flags |= NFSMNT_READAHEAD;
840	}
841	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
842		if (opt == NULL) {
843			vfs_mount_error(mp, "illegal wsize");
844			error = EINVAL;
845			goto out;
846		}
847		ret = sscanf(opt, "%d", &args.wsize);
848		if (ret != 1 || args.wsize <= 0) {
849			vfs_mount_error(mp, "illegal wsize: %s",
850			    opt);
851			error = EINVAL;
852			goto out;
853		}
854		args.flags |= NFSMNT_WSIZE;
855	}
856	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
857		if (opt == NULL) {
858			vfs_mount_error(mp, "illegal rsize");
859			error = EINVAL;
860			goto out;
861		}
862		ret = sscanf(opt, "%d", &args.rsize);
863		if (ret != 1 || args.rsize <= 0) {
864			vfs_mount_error(mp, "illegal wsize: %s",
865			    opt);
866			error = EINVAL;
867			goto out;
868		}
869		args.flags |= NFSMNT_RSIZE;
870	}
871	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
872		if (opt == NULL) {
873			vfs_mount_error(mp, "illegal retrans");
874			error = EINVAL;
875			goto out;
876		}
877		ret = sscanf(opt, "%d", &args.retrans);
878		if (ret != 1 || args.retrans <= 0) {
879			vfs_mount_error(mp, "illegal retrans: %s",
880			    opt);
881			error = EINVAL;
882			goto out;
883		}
884		args.flags |= NFSMNT_RETRANS;
885	}
886	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
887		ret = sscanf(opt, "%d", &args.acregmin);
888		if (ret != 1 || args.acregmin < 0) {
889			vfs_mount_error(mp, "illegal acregmin: %s",
890			    opt);
891			error = EINVAL;
892			goto out;
893		}
894		args.flags |= NFSMNT_ACREGMIN;
895	}
896	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
897		ret = sscanf(opt, "%d", &args.acregmax);
898		if (ret != 1 || args.acregmax < 0) {
899			vfs_mount_error(mp, "illegal acregmax: %s",
900			    opt);
901			error = EINVAL;
902			goto out;
903		}
904		args.flags |= NFSMNT_ACREGMAX;
905	}
906	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
907		ret = sscanf(opt, "%d", &args.acdirmin);
908		if (ret != 1 || args.acdirmin < 0) {
909			vfs_mount_error(mp, "illegal acdirmin: %s",
910			    opt);
911			error = EINVAL;
912			goto out;
913		}
914		args.flags |= NFSMNT_ACDIRMIN;
915	}
916	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
917		ret = sscanf(opt, "%d", &args.acdirmax);
918		if (ret != 1 || args.acdirmax < 0) {
919			vfs_mount_error(mp, "illegal acdirmax: %s",
920			    opt);
921			error = EINVAL;
922			goto out;
923		}
924		args.flags |= NFSMNT_ACDIRMAX;
925	}
926	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
927		ret = sscanf(opt, "%d", &args.timeo);
928		if (ret != 1 || args.timeo <= 0) {
929			vfs_mount_error(mp, "illegal timeout: %s",
930			    opt);
931			error = EINVAL;
932			goto out;
933		}
934		args.flags |= NFSMNT_TIMEO;
935	}
936	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
937	    == 0) {
938		ret = sscanf(opt, "%d", &negnametimeo);
939		if (ret != 1 || negnametimeo < 0) {
940			vfs_mount_error(mp, "illegal negnametimeo: %s",
941			    opt);
942			error = EINVAL;
943			goto out;
944		}
945	}
946	if (vfs_getopt(mp->mnt_optnew, "sec",
947		(void **) &secname, NULL) == 0)
948		nfs_sec_name(secname, &args.flags);
949
950	if (mp->mnt_flag & MNT_UPDATE) {
951		struct nfsmount *nmp = VFSTONFS(mp);
952
953		if (nmp == NULL) {
954			error = EIO;
955			goto out;
956		}
957		/*
958		 * When doing an update, we can't change version,
959		 * security, switch lockd strategies or change cookie
960		 * translation
961		 */
962		args.flags = (args.flags &
963		    ~(NFSMNT_NFSV3 |
964		      NFSMNT_NFSV4 |
965		      NFSMNT_KERB |
966		      NFSMNT_INTEGRITY |
967		      NFSMNT_PRIVACY |
968		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
969		    (nmp->nm_flag &
970			(NFSMNT_NFSV3 |
971			 NFSMNT_NFSV4 |
972			 NFSMNT_KERB |
973			 NFSMNT_INTEGRITY |
974			 NFSMNT_PRIVACY |
975			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
976		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
977		goto out;
978	}
979
980	/*
981	 * Make the nfs_ip_paranoia sysctl serve as the default connection
982	 * or no-connection mode for those protocols that support
983	 * no-connection mode (the flag will be cleared later for protocols
984	 * that do not support no-connection mode).  This will allow a client
985	 * to receive replies from a different IP then the request was
986	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
987	 * not 0.
988	 */
989	if (nfs_ip_paranoia == 0)
990		args.flags |= NFSMNT_NOCONN;
991
992	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
993	    &args.fhsize) == 0) {
994		if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
995			vfs_mount_error(mp, "Bad file handle");
996			error = EINVAL;
997			goto out;
998		}
999		bcopy(args.fh, nfh, args.fhsize);
1000	} else {
1001		args.fhsize = 0;
1002	}
1003
1004	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
1005	    &len);
1006	if (args.hostname == NULL) {
1007		vfs_mount_error(mp, "Invalid hostname");
1008		error = EINVAL;
1009		goto out;
1010	}
1011	bcopy(args.hostname, hst, MNAMELEN);
1012	hst[MNAMELEN - 1] = '\0';
1013
1014	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1015		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1016	else
1017		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1018	srvkrbnamelen = strlen(srvkrbname);
1019
1020	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1021		strlcpy(krbname, name, sizeof (krbname));
1022	else
1023		krbname[0] = '\0';
1024	krbnamelen = strlen(krbname);
1025
1026	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1027		strlcpy(dirpath, name, sizeof (dirpath));
1028	else
1029		dirpath[0] = '\0';
1030	dirlen = strlen(dirpath);
1031
1032	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1033	    &args.addrlen) == 0) {
1034		if (args.addrlen > SOCK_MAXADDRLEN) {
1035			error = ENAMETOOLONG;
1036			goto out;
1037		}
1038		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1039		bcopy(args.addr, nam, args.addrlen);
1040		nam->sa_len = args.addrlen;
1041	}
1042
1043	args.fh = nfh;
1044	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1045	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1046	    negnametimeo);
1047out:
1048	if (!error) {
1049		MNT_ILOCK(mp);
1050		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1051		MNT_IUNLOCK(mp);
1052	}
1053	return (error);
1054}
1055
1056
1057/*
1058 * VFS Operations.
1059 *
1060 * mount system call
1061 * It seems a bit dumb to copyinstr() the host and path here and then
1062 * bcopy() them in mountnfs(), but I wanted to detect errors before
1063 * doing the sockargs() call because sockargs() allocates an mbuf and
1064 * an error after that means that I have to release the mbuf.
1065 */
1066/* ARGSUSED */
1067static int
1068nfs_cmount(struct mntarg *ma, void *data, int flags)
1069{
1070	int error;
1071	struct nfs_args args;
1072
1073	error = copyin(data, &args, sizeof (struct nfs_args));
1074	if (error)
1075		return error;
1076
1077	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1078
1079	error = kernel_mount(ma, flags);
1080	return (error);
1081}
1082
1083/*
1084 * Common code for mount and mountroot
1085 */
1086static int
1087mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1088    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1089    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1090    struct ucred *cred, struct thread *td, int negnametimeo)
1091{
1092	struct nfsmount *nmp;
1093	struct nfsnode *np;
1094	int error, trycnt, ret;
1095	struct nfsvattr nfsva;
1096	static u_int64_t clval = 0;
1097
1098	if (mp->mnt_flag & MNT_UPDATE) {
1099		nmp = VFSTONFS(mp);
1100		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1101		FREE(nam, M_SONAME);
1102		return (0);
1103	} else {
1104		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1105		    krbnamelen + dirlen + srvkrbnamelen + 2,
1106		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1107		TAILQ_INIT(&nmp->nm_bufq);
1108		if (clval == 0)
1109			clval = (u_int64_t)nfsboottime.tv_sec;
1110		nmp->nm_clval = clval++;
1111		nmp->nm_krbnamelen = krbnamelen;
1112		nmp->nm_dirpathlen = dirlen;
1113		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1114		if (td->td_ucred->cr_uid != (uid_t)0) {
1115			/*
1116			 * nm_uid is used to get KerberosV credentials for
1117			 * the nfsv4 state handling operations if there is
1118			 * no host based principal set. Use the uid of
1119			 * this user if not root, since they are doing the
1120			 * mount. I don't think setting this for root will
1121			 * work, since root normally does not have user
1122			 * credentials in a credentials cache.
1123			 */
1124			nmp->nm_uid = td->td_ucred->cr_uid;
1125		} else {
1126			/*
1127			 * Just set to -1, so it won't be used.
1128			 */
1129			nmp->nm_uid = (uid_t)-1;
1130		}
1131
1132		/* Copy and null terminate all the names */
1133		if (nmp->nm_krbnamelen > 0) {
1134			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1135			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1136		}
1137		if (nmp->nm_dirpathlen > 0) {
1138			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1139			    nmp->nm_dirpathlen);
1140			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1141			    + 1] = '\0';
1142		}
1143		if (nmp->nm_srvkrbnamelen > 0) {
1144			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1145			    nmp->nm_srvkrbnamelen);
1146			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1147			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1148		}
1149		nmp->nm_sockreq.nr_cred = crhold(cred);
1150		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1151		mp->mnt_data = nmp;
1152		nmp->nm_getinfo = nfs_getnlminfo;
1153		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1154	}
1155	vfs_getnewfsid(mp);
1156	nmp->nm_mountp = mp;
1157	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1158	nmp->nm_negnametimeo = negnametimeo;
1159
1160	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1161
1162	/*
1163	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1164	 * high, depending on whether we end up with negative offsets in
1165	 * the client or server somewhere.  2GB-1 may be safer.
1166	 *
1167	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1168	 * that we can handle until we find out otherwise.
1169	 * XXX Our "safe" limit on the client is what we can store in our
1170	 * buffer cache using signed(!) block numbers.
1171	 */
1172	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1173		nmp->nm_maxfilesize = 0xffffffffLL;
1174	else
1175		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1176
1177	nmp->nm_timeo = NFS_TIMEO;
1178	nmp->nm_retry = NFS_RETRANS;
1179	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1180		nmp->nm_wsize = NFS_WSIZE;
1181		nmp->nm_rsize = NFS_RSIZE;
1182		nmp->nm_readdirsize = NFS_READDIRSIZE;
1183	}
1184	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1185	nmp->nm_numgrps = NFS_MAXGRPS;
1186	nmp->nm_readahead = NFS_DEFRAHEAD;
1187	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1188	if (nmp->nm_tprintf_delay < 0)
1189		nmp->nm_tprintf_delay = 0;
1190	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1191	if (nmp->nm_tprintf_initial_delay < 0)
1192		nmp->nm_tprintf_initial_delay = 0;
1193	nmp->nm_fhsize = argp->fhsize;
1194	if (nmp->nm_fhsize > 0)
1195		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1196	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1197	nmp->nm_nam = nam;
1198	/* Set up the sockets and per-host congestion */
1199	nmp->nm_sotype = argp->sotype;
1200	nmp->nm_soproto = argp->proto;
1201	nmp->nm_sockreq.nr_prog = NFS_PROG;
1202	if ((argp->flags & NFSMNT_NFSV4))
1203		nmp->nm_sockreq.nr_vers = NFS_VER4;
1204	else if ((argp->flags & NFSMNT_NFSV3))
1205		nmp->nm_sockreq.nr_vers = NFS_VER3;
1206	else
1207		nmp->nm_sockreq.nr_vers = NFS_VER2;
1208
1209
1210	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1211		goto bad;
1212
1213	/*
1214	 * A reference count is needed on the nfsnode representing the
1215	 * remote root.  If this object is not persistent, then backward
1216	 * traversals of the mount point (i.e. "..") will not work if
1217	 * the nfsnode gets flushed out of the cache. Ufs does not have
1218	 * this problem, because one can identify root inodes by their
1219	 * number == ROOTINO (2).
1220	 */
1221	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1222	    nmp->nm_dirpathlen > 0) {
1223		/*
1224		 * If the fhsize on the mount point == 0 for V4, the mount
1225		 * path needs to be looked up.
1226		 */
1227		trycnt = 3;
1228		do {
1229			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1230			    cred, td);
1231			if (error)
1232				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1233		} while (error && --trycnt > 0);
1234		if (error) {
1235			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1236			goto bad;
1237		}
1238	}
1239	if (nmp->nm_fhsize > 0) {
1240		/*
1241		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1242		 * non-zero for the root vnode. f_iosize will be set correctly
1243		 * by nfs_statfs() before any I/O occurs.
1244		 */
1245		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1246		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1247		    LK_EXCLUSIVE);
1248		if (error)
1249			goto bad;
1250		*vpp = NFSTOV(np);
1251
1252		/*
1253		 * Get file attributes and transfer parameters for the
1254		 * mountpoint.  This has the side effect of filling in
1255		 * (*vpp)->v_type with the correct value.
1256		 */
1257		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1258		    cred, td, &nfsva, NULL);
1259		if (ret) {
1260			/*
1261			 * Just set default values to get things going.
1262			 */
1263			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1264			nfsva.na_vattr.va_type = VDIR;
1265			nfsva.na_vattr.va_mode = 0777;
1266			nfsva.na_vattr.va_nlink = 100;
1267			nfsva.na_vattr.va_uid = (uid_t)0;
1268			nfsva.na_vattr.va_gid = (gid_t)0;
1269			nfsva.na_vattr.va_fileid = 2;
1270			nfsva.na_vattr.va_gen = 1;
1271			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1272			nfsva.na_vattr.va_size = 512 * 1024;
1273		}
1274		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1275		if (argp->flags & NFSMNT_NFSV3)
1276			ncl_fsinfo(nmp, *vpp, cred, td);
1277
1278		/*
1279		 * Lose the lock but keep the ref.
1280		 */
1281		VOP_UNLOCK(*vpp, 0);
1282		return (0);
1283	}
1284	error = EIO;
1285
1286bad:
1287	newnfs_disconnect(&nmp->nm_sockreq);
1288	crfree(nmp->nm_sockreq.nr_cred);
1289	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1290	mtx_destroy(&nmp->nm_mtx);
1291	FREE(nmp, M_NEWNFSMNT);
1292	FREE(nam, M_SONAME);
1293	return (error);
1294}
1295
1296/*
1297 * unmount system call
1298 */
1299static int
1300nfs_unmount(struct mount *mp, int mntflags)
1301{
1302	struct thread *td;
1303	struct nfsmount *nmp;
1304	int error, flags = 0, trycnt = 0;
1305
1306	td = curthread;
1307
1308	if (mntflags & MNT_FORCE)
1309		flags |= FORCECLOSE;
1310	nmp = VFSTONFS(mp);
1311	/*
1312	 * Goes something like this..
1313	 * - Call vflush() to clear out vnodes for this filesystem
1314	 * - Close the socket
1315	 * - Free up the data structures
1316	 */
1317	/* In the forced case, cancel any outstanding requests. */
1318	if (mntflags & MNT_FORCE) {
1319		error = newnfs_nmcancelreqs(nmp);
1320		if (error)
1321			goto out;
1322		/* For a forced close, get rid of the renew thread now */
1323		nfscl_umount(nmp, td);
1324	}
1325	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1326	do {
1327		error = vflush(mp, 1, flags, td);
1328		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1329			(void) nfs_catnap(PSOCK, error, "newndm");
1330	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1331	if (error)
1332		goto out;
1333
1334	/*
1335	 * We are now committed to the unmount.
1336	 */
1337	if ((mntflags & MNT_FORCE) == 0)
1338		nfscl_umount(nmp, td);
1339	newnfs_disconnect(&nmp->nm_sockreq);
1340	crfree(nmp->nm_sockreq.nr_cred);
1341	FREE(nmp->nm_nam, M_SONAME);
1342
1343	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1344	mtx_destroy(&nmp->nm_mtx);
1345	FREE(nmp, M_NEWNFSMNT);
1346out:
1347	return (error);
1348}
1349
1350/*
1351 * Return root of a filesystem
1352 */
1353static int
1354nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1355{
1356	struct vnode *vp;
1357	struct nfsmount *nmp;
1358	struct nfsnode *np;
1359	int error;
1360
1361	nmp = VFSTONFS(mp);
1362	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1363	if (error)
1364		return error;
1365	vp = NFSTOV(np);
1366	/*
1367	 * Get transfer parameters and attributes for root vnode once.
1368	 */
1369	mtx_lock(&nmp->nm_mtx);
1370	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1371		mtx_unlock(&nmp->nm_mtx);
1372		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1373	} else
1374		mtx_unlock(&nmp->nm_mtx);
1375	if (vp->v_type == VNON)
1376	    vp->v_type = VDIR;
1377	vp->v_vflag |= VV_ROOT;
1378	*vpp = vp;
1379	return (0);
1380}
1381
1382/*
1383 * Flush out the buffer cache
1384 */
1385/* ARGSUSED */
1386static int
1387nfs_sync(struct mount *mp, int waitfor)
1388{
1389	struct vnode *vp, *mvp;
1390	struct thread *td;
1391	int error, allerror = 0;
1392
1393	td = curthread;
1394
1395	/*
1396	 * Force stale buffer cache information to be flushed.
1397	 */
1398	MNT_ILOCK(mp);
1399loop:
1400	MNT_VNODE_FOREACH(vp, mp, mvp) {
1401		VI_LOCK(vp);
1402		MNT_IUNLOCK(mp);
1403		/* XXX Racy bv_cnt check. */
1404		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1405		    waitfor == MNT_LAZY) {
1406			VI_UNLOCK(vp);
1407			MNT_ILOCK(mp);
1408			continue;
1409		}
1410		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1411			MNT_ILOCK(mp);
1412			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1413			goto loop;
1414		}
1415		error = VOP_FSYNC(vp, waitfor, td);
1416		if (error)
1417			allerror = error;
1418		VOP_UNLOCK(vp, 0);
1419		vrele(vp);
1420
1421		MNT_ILOCK(mp);
1422	}
1423	MNT_IUNLOCK(mp);
1424	return (allerror);
1425}
1426
1427static int
1428nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1429{
1430	struct nfsmount *nmp = VFSTONFS(mp);
1431	struct vfsquery vq;
1432	int error;
1433
1434	bzero(&vq, sizeof(vq));
1435	switch (op) {
1436#if 0
1437	case VFS_CTL_NOLOCKS:
1438		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1439 		if (req->oldptr != NULL) {
1440 			error = SYSCTL_OUT(req, &val, sizeof(val));
1441 			if (error)
1442 				return (error);
1443 		}
1444 		if (req->newptr != NULL) {
1445 			error = SYSCTL_IN(req, &val, sizeof(val));
1446 			if (error)
1447 				return (error);
1448			if (val)
1449				nmp->nm_flag |= NFSMNT_NOLOCKS;
1450			else
1451				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1452 		}
1453		break;
1454#endif
1455	case VFS_CTL_QUERY:
1456		mtx_lock(&nmp->nm_mtx);
1457		if (nmp->nm_state & NFSSTA_TIMEO)
1458			vq.vq_flags |= VQ_NOTRESP;
1459		mtx_unlock(&nmp->nm_mtx);
1460#if 0
1461		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1462		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1463			vq.vq_flags |= VQ_NOTRESPLOCK;
1464#endif
1465		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1466		break;
1467 	case VFS_CTL_TIMEO:
1468 		if (req->oldptr != NULL) {
1469 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1470 			    sizeof(nmp->nm_tprintf_initial_delay));
1471 			if (error)
1472 				return (error);
1473 		}
1474 		if (req->newptr != NULL) {
1475			error = vfs_suser(mp, req->td);
1476			if (error)
1477				return (error);
1478 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1479 			    sizeof(nmp->nm_tprintf_initial_delay));
1480 			if (error)
1481 				return (error);
1482 			if (nmp->nm_tprintf_initial_delay < 0)
1483 				nmp->nm_tprintf_initial_delay = 0;
1484 		}
1485		break;
1486	default:
1487		return (ENOTSUP);
1488	}
1489	return (0);
1490}
1491
1492/*
1493 * Extract the information needed by the nlm from the nfs vnode.
1494 */
1495static void
1496nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1497    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1498    struct timeval *timeop)
1499{
1500	struct nfsmount *nmp;
1501	struct nfsnode *np = VTONFS(vp);
1502
1503	nmp = VFSTONFS(vp->v_mount);
1504	if (fhlenp != NULL)
1505		*fhlenp = (size_t)np->n_fhp->nfh_len;
1506	if (fhp != NULL)
1507		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1508	if (sp != NULL)
1509		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1510	if (is_v3p != NULL)
1511		*is_v3p = NFS_ISV3(vp);
1512	if (sizep != NULL)
1513		*sizep = np->n_size;
1514	if (timeop != NULL) {
1515		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1516		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1517	}
1518}
1519
1520