nfs_clvfsops.c revision 221139
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 221139 2011-04-27 20:42:30Z rmacklem $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/mount.h>
55#include <sys/proc.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <netinet/in.h>
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <nfs/nfsdiskless.h>
76
77FEATURE(nfscl, "NFSv4 client");
78
79extern int nfscl_ticks;
80extern struct timeval nfsboottime;
81extern struct nfsstats	newnfsstats;
82
83MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
84MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
85
86SYSCTL_DECL(_vfs_newnfs);
87SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
88	&newnfsstats, nfsstats, "S,nfsstats");
89static int nfs_ip_paranoia = 1;
90SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
91    &nfs_ip_paranoia, 0, "");
92static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
93SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
94        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
95/* how long between console messages "nfs server foo not responding" */
96static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
97SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
98        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
99
100static int	nfs_mountroot(struct mount *);
101static void	nfs_sec_name(char *, int *);
102static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
103		    struct nfs_args *argp, const char *, struct ucred *,
104		    struct thread *);
105static int	mountnfs(struct nfs_args *, struct mount *,
106		    struct sockaddr *, char *, u_char *, int, u_char *, int,
107		    u_char *, int, struct vnode **, struct ucred *,
108		    struct thread *, int);
109static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
110		    struct sockaddr_storage *, int *, off_t *,
111		    struct timeval *);
112static vfs_mount_t nfs_mount;
113static vfs_cmount_t nfs_cmount;
114static vfs_unmount_t nfs_unmount;
115static vfs_root_t nfs_root;
116static vfs_statfs_t nfs_statfs;
117static vfs_sync_t nfs_sync;
118static vfs_sysctl_t nfs_sysctl;
119
120/*
121 * nfs vfs operations.
122 */
123static struct vfsops nfs_vfsops = {
124	.vfs_init =		ncl_init,
125	.vfs_mount =		nfs_mount,
126	.vfs_cmount =		nfs_cmount,
127	.vfs_root =		nfs_root,
128	.vfs_statfs =		nfs_statfs,
129	.vfs_sync =		nfs_sync,
130	.vfs_uninit =		ncl_uninit,
131	.vfs_unmount =		nfs_unmount,
132	.vfs_sysctl =		nfs_sysctl,
133};
134VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
135
136/* So that loader and kldload(2) can find us, wherever we are.. */
137MODULE_VERSION(nfs, 1);
138MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
139MODULE_DEPEND(nfs, krpc, 1, 1, 1);
140MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
141MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
142
143/*
144 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
145 * can be shared by both NFS clients. It is declared here so that it
146 * will be defined for kernels built without NFS_ROOT, although it
147 * isn't used in that case.
148 */
149#if !defined(NFS_ROOT) && !defined(NFSCLIENT)
150struct nfs_diskless	nfs_diskless = { { { 0 } } };
151struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
152int			nfs_diskless_valid = 0;
153#endif
154
155SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
156    &nfs_diskless_valid, 0,
157    "Has the diskless struct been filled correctly");
158
159SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
160    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
161
162SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
163    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
164    "%Ssockaddr_in", "Diskless root nfs address");
165
166
167void		newnfsargs_ntoh(struct nfs_args *);
168static int	nfs_mountdiskless(char *,
169		    struct sockaddr_in *, struct nfs_args *,
170		    struct thread *, struct vnode **, struct mount *);
171static void	nfs_convert_diskless(void);
172static void	nfs_convert_oargs(struct nfs_args *args,
173		    struct onfs_args *oargs);
174
175int
176newnfs_iosize(struct nfsmount *nmp)
177{
178	int iosize, maxio;
179
180	/* First, set the upper limit for iosize */
181	if (nmp->nm_flag & NFSMNT_NFSV4) {
182		maxio = NFS_MAXBSIZE;
183	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
184		if (nmp->nm_sotype == SOCK_DGRAM)
185			maxio = NFS_MAXDGRAMDATA;
186		else
187			maxio = NFS_MAXBSIZE;
188	} else {
189		maxio = NFS_V2MAXDATA;
190	}
191	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
192		nmp->nm_rsize = maxio;
193	if (nmp->nm_rsize > MAXBSIZE)
194		nmp->nm_rsize = MAXBSIZE;
195	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
196		nmp->nm_readdirsize = maxio;
197	if (nmp->nm_readdirsize > nmp->nm_rsize)
198		nmp->nm_readdirsize = nmp->nm_rsize;
199	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
200		nmp->nm_wsize = maxio;
201	if (nmp->nm_wsize > MAXBSIZE)
202		nmp->nm_wsize = MAXBSIZE;
203
204	/*
205	 * Calculate the size used for io buffers.  Use the larger
206	 * of the two sizes to minimise nfs requests but make sure
207	 * that it is at least one VM page to avoid wasting buffer
208	 * space.
209	 */
210	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
211	iosize = imax(iosize, PAGE_SIZE);
212	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
213	return (iosize);
214}
215
216static void
217nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
218{
219
220	args->version = NFS_ARGSVERSION;
221	args->addr = oargs->addr;
222	args->addrlen = oargs->addrlen;
223	args->sotype = oargs->sotype;
224	args->proto = oargs->proto;
225	args->fh = oargs->fh;
226	args->fhsize = oargs->fhsize;
227	args->flags = oargs->flags;
228	args->wsize = oargs->wsize;
229	args->rsize = oargs->rsize;
230	args->readdirsize = oargs->readdirsize;
231	args->timeo = oargs->timeo;
232	args->retrans = oargs->retrans;
233	args->readahead = oargs->readahead;
234	args->hostname = oargs->hostname;
235}
236
237static void
238nfs_convert_diskless(void)
239{
240
241	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
242		sizeof(struct ifaliasreq));
243	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
244		sizeof(struct sockaddr_in));
245	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
246	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
247		nfsv3_diskless.root_fhsize = NFSX_MYFH;
248		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
249	} else {
250		nfsv3_diskless.root_fhsize = NFSX_V2FH;
251		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
252	}
253	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
254		sizeof(struct sockaddr_in));
255	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
256	nfsv3_diskless.root_time = nfs_diskless.root_time;
257	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
258		MAXHOSTNAMELEN);
259	nfs_diskless_valid = 3;
260}
261
262/*
263 * nfs statfs call
264 */
265static int
266nfs_statfs(struct mount *mp, struct statfs *sbp)
267{
268	struct vnode *vp;
269	struct thread *td;
270	struct nfsmount *nmp = VFSTONFS(mp);
271	struct nfsvattr nfsva;
272	struct nfsfsinfo fs;
273	struct nfsstatfs sb;
274	int error = 0, attrflag, gotfsinfo = 0, ret;
275	struct nfsnode *np;
276
277	td = curthread;
278
279	error = vfs_busy(mp, MBF_NOWAIT);
280	if (error)
281		return (error);
282	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
283	if (error) {
284		vfs_unbusy(mp);
285		return (error);
286	}
287	vp = NFSTOV(np);
288	mtx_lock(&nmp->nm_mtx);
289	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
290		mtx_unlock(&nmp->nm_mtx);
291		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
292		    &attrflag, NULL);
293		if (!error)
294			gotfsinfo = 1;
295	} else
296		mtx_unlock(&nmp->nm_mtx);
297	if (!error)
298		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
299		    &attrflag, NULL);
300	if (attrflag == 0) {
301		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
302		    td->td_ucred, td, &nfsva, NULL);
303		if (ret) {
304			/*
305			 * Just set default values to get things going.
306			 */
307			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
308			nfsva.na_vattr.va_type = VDIR;
309			nfsva.na_vattr.va_mode = 0777;
310			nfsva.na_vattr.va_nlink = 100;
311			nfsva.na_vattr.va_uid = (uid_t)0;
312			nfsva.na_vattr.va_gid = (gid_t)0;
313			nfsva.na_vattr.va_fileid = 2;
314			nfsva.na_vattr.va_gen = 1;
315			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
316			nfsva.na_vattr.va_size = 512 * 1024;
317		}
318	}
319	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
320	if (!error) {
321	    mtx_lock(&nmp->nm_mtx);
322	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
323		nfscl_loadfsinfo(nmp, &fs);
324	    nfscl_loadsbinfo(nmp, &sb, sbp);
325	    sbp->f_iosize = newnfs_iosize(nmp);
326	    mtx_unlock(&nmp->nm_mtx);
327	    if (sbp != &mp->mnt_stat) {
328		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
329		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
330	    }
331	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
332	} else if (NFS_ISV4(vp)) {
333		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
334	}
335	vput(vp);
336	vfs_unbusy(mp);
337	return (error);
338}
339
340/*
341 * nfs version 3 fsinfo rpc call
342 */
343int
344ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
345    struct thread *td)
346{
347	struct nfsfsinfo fs;
348	struct nfsvattr nfsva;
349	int error, attrflag;
350
351	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
352	if (!error) {
353		if (attrflag)
354			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
355			    1);
356		mtx_lock(&nmp->nm_mtx);
357		nfscl_loadfsinfo(nmp, &fs);
358		mtx_unlock(&nmp->nm_mtx);
359	}
360	return (error);
361}
362
363/*
364 * Mount a remote root fs via. nfs. This depends on the info in the
365 * nfs_diskless structure that has been filled in properly by some primary
366 * bootstrap.
367 * It goes something like this:
368 * - do enough of "ifconfig" by calling ifioctl() so that the system
369 *   can talk to the server
370 * - If nfs_diskless.mygateway is filled in, use that address as
371 *   a default gateway.
372 * - build the rootfs mount point and call mountnfs() to do the rest.
373 *
374 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
375 * structure, as well as other global NFS client variables here, as
376 * nfs_mountroot() will be called once in the boot before any other NFS
377 * client activity occurs.
378 */
379static int
380nfs_mountroot(struct mount *mp)
381{
382	struct thread *td = curthread;
383	struct nfsv3_diskless *nd = &nfsv3_diskless;
384	struct socket *so;
385	struct vnode *vp;
386	struct ifreq ir;
387	int error;
388	u_long l;
389	char buf[128];
390	char *cp;
391
392#if defined(BOOTP_NFSROOT) && defined(BOOTP)
393	bootpc_init();		/* use bootp to get nfs_diskless filled in */
394#elif defined(NFS_ROOT)
395	nfs_setup_diskless();
396#endif
397
398	if (nfs_diskless_valid == 0)
399		return (-1);
400	if (nfs_diskless_valid == 1)
401		nfs_convert_diskless();
402
403	/*
404	 * XXX splnet, so networks will receive...
405	 */
406	splnet();
407
408	/*
409	 * Do enough of ifconfig(8) so that the critical net interface can
410	 * talk to the server.
411	 */
412	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
413	    td->td_ucred, td);
414	if (error)
415		panic("nfs_mountroot: socreate(%04x): %d",
416			nd->myif.ifra_addr.sa_family, error);
417
418#if 0 /* XXX Bad idea */
419	/*
420	 * We might not have been told the right interface, so we pass
421	 * over the first ten interfaces of the same kind, until we get
422	 * one of them configured.
423	 */
424
425	for (i = strlen(nd->myif.ifra_name) - 1;
426		nd->myif.ifra_name[i] >= '0' &&
427		nd->myif.ifra_name[i] <= '9';
428		nd->myif.ifra_name[i] ++) {
429		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
430		if(!error)
431			break;
432	}
433#endif
434	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
435	if (error)
436		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
437	if ((cp = getenv("boot.netif.mtu")) != NULL) {
438		ir.ifr_mtu = strtol(cp, NULL, 10);
439		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
440		freeenv(cp);
441		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
442		if (error)
443			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
444	}
445	soclose(so);
446
447	/*
448	 * If the gateway field is filled in, set it as the default route.
449	 * Note that pxeboot will set a default route of 0 if the route
450	 * is not set by the DHCP server.  Check also for a value of 0
451	 * to avoid panicking inappropriately in that situation.
452	 */
453	if (nd->mygateway.sin_len != 0 &&
454	    nd->mygateway.sin_addr.s_addr != 0) {
455		struct sockaddr_in mask, sin;
456
457		bzero((caddr_t)&mask, sizeof(mask));
458		sin = mask;
459		sin.sin_family = AF_INET;
460		sin.sin_len = sizeof(sin);
461                /* XXX MRT use table 0 for this sort of thing */
462		CURVNET_SET(TD_TO_VNET(td));
463		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
464		    (struct sockaddr *)&nd->mygateway,
465		    (struct sockaddr *)&mask,
466		    RTF_UP | RTF_GATEWAY, NULL);
467		CURVNET_RESTORE();
468		if (error)
469			panic("nfs_mountroot: RTM_ADD: %d", error);
470	}
471
472	/*
473	 * Create the rootfs mount point.
474	 */
475	nd->root_args.fh = nd->root_fh;
476	nd->root_args.fhsize = nd->root_fhsize;
477	l = ntohl(nd->root_saddr.sin_addr.s_addr);
478	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
479		(l >> 24) & 0xff, (l >> 16) & 0xff,
480		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
481	printf("NFS ROOT: %s\n", buf);
482	nd->root_args.hostname = buf;
483	if ((error = nfs_mountdiskless(buf,
484	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
485		return (error);
486	}
487
488	/*
489	 * This is not really an nfs issue, but it is much easier to
490	 * set hostname here and then let the "/etc/rc.xxx" files
491	 * mount the right /var based upon its preset value.
492	 */
493	mtx_lock(&prison0.pr_mtx);
494	strlcpy(prison0.pr_hostname, nd->my_hostnam,
495	    sizeof(prison0.pr_hostname));
496	mtx_unlock(&prison0.pr_mtx);
497	inittodr(ntohl(nd->root_time));
498	return (0);
499}
500
501/*
502 * Internal version of mount system call for diskless setup.
503 */
504static int
505nfs_mountdiskless(char *path,
506    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
507    struct vnode **vpp, struct mount *mp)
508{
509	struct sockaddr *nam;
510	int dirlen, error;
511	char *dirpath;
512
513	/*
514	 * Find the directory path in "path", which also has the server's
515	 * name/ip address in it.
516	 */
517	dirpath = strchr(path, ':');
518	if (dirpath != NULL)
519		dirlen = strlen(++dirpath);
520	else
521		dirlen = 0;
522	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
523	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
524	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
525		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
526		return (error);
527	}
528	return (0);
529}
530
531static void
532nfs_sec_name(char *sec, int *flagsp)
533{
534	if (!strcmp(sec, "krb5"))
535		*flagsp |= NFSMNT_KERB;
536	else if (!strcmp(sec, "krb5i"))
537		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
538	else if (!strcmp(sec, "krb5p"))
539		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
540}
541
542static void
543nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
544    const char *hostname, struct ucred *cred, struct thread *td)
545{
546	int s;
547	int adjsock;
548	char *p;
549
550	s = splnet();
551
552	/*
553	 * Set read-only flag if requested; otherwise, clear it if this is
554	 * an update.  If this is not an update, then either the read-only
555	 * flag is already clear, or this is a root mount and it was set
556	 * intentionally at some previous point.
557	 */
558	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
559		MNT_ILOCK(mp);
560		mp->mnt_flag |= MNT_RDONLY;
561		MNT_IUNLOCK(mp);
562	} else if (mp->mnt_flag & MNT_UPDATE) {
563		MNT_ILOCK(mp);
564		mp->mnt_flag &= ~MNT_RDONLY;
565		MNT_IUNLOCK(mp);
566	}
567
568	/*
569	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
570	 * no sense in that context.  Also, set up appropriate retransmit
571	 * and soft timeout behavior.
572	 */
573	if (argp->sotype == SOCK_STREAM) {
574		nmp->nm_flag &= ~NFSMNT_NOCONN;
575		nmp->nm_timeo = NFS_MAXTIMEO;
576		if ((argp->flags & NFSMNT_NFSV4) != 0)
577			nmp->nm_retry = INT_MAX;
578		else
579			nmp->nm_retry = NFS_RETRANS_TCP;
580	}
581
582	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
583	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
584		argp->flags &= ~NFSMNT_RDIRPLUS;
585		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
586	}
587
588	/* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */
589	if ((argp->flags & NFSMNT_NFSV4) != 0) {
590		argp->flags &= ~NFSMNT_RESVPORT;
591		nmp->nm_flag &= ~NFSMNT_RESVPORT;
592	}
593
594	/* Re-bind if rsrvd port requested and wasn't on one */
595	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
596		  && (argp->flags & NFSMNT_RESVPORT);
597	/* Also re-bind if we're switching to/from a connected UDP socket */
598	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
599		    (argp->flags & NFSMNT_NOCONN));
600
601	/* Update flags atomically.  Don't change the lock bits. */
602	nmp->nm_flag = argp->flags | nmp->nm_flag;
603	splx(s);
604
605	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
606		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
607		if (nmp->nm_timeo < NFS_MINTIMEO)
608			nmp->nm_timeo = NFS_MINTIMEO;
609		else if (nmp->nm_timeo > NFS_MAXTIMEO)
610			nmp->nm_timeo = NFS_MAXTIMEO;
611	}
612
613	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
614		nmp->nm_retry = argp->retrans;
615		if (nmp->nm_retry > NFS_MAXREXMIT)
616			nmp->nm_retry = NFS_MAXREXMIT;
617	}
618
619	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
620		nmp->nm_wsize = argp->wsize;
621		/* Round down to multiple of blocksize */
622		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
623		if (nmp->nm_wsize <= 0)
624			nmp->nm_wsize = NFS_FABLKSIZE;
625	}
626
627	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
628		nmp->nm_rsize = argp->rsize;
629		/* Round down to multiple of blocksize */
630		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
631		if (nmp->nm_rsize <= 0)
632			nmp->nm_rsize = NFS_FABLKSIZE;
633	}
634
635	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
636		nmp->nm_readdirsize = argp->readdirsize;
637	}
638
639	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
640		nmp->nm_acregmin = argp->acregmin;
641	else
642		nmp->nm_acregmin = NFS_MINATTRTIMO;
643	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
644		nmp->nm_acregmax = argp->acregmax;
645	else
646		nmp->nm_acregmax = NFS_MAXATTRTIMO;
647	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
648		nmp->nm_acdirmin = argp->acdirmin;
649	else
650		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
651	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
652		nmp->nm_acdirmax = argp->acdirmax;
653	else
654		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
655	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
656		nmp->nm_acdirmin = nmp->nm_acdirmax;
657	if (nmp->nm_acregmin > nmp->nm_acregmax)
658		nmp->nm_acregmin = nmp->nm_acregmax;
659
660	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
661		if (argp->readahead <= NFS_MAXRAHEAD)
662			nmp->nm_readahead = argp->readahead;
663		else
664			nmp->nm_readahead = NFS_MAXRAHEAD;
665	}
666	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
667		if (argp->wcommitsize < nmp->nm_wsize)
668			nmp->nm_wcommitsize = nmp->nm_wsize;
669		else
670			nmp->nm_wcommitsize = argp->wcommitsize;
671	}
672
673	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
674		    (nmp->nm_soproto != argp->proto));
675
676	if (nmp->nm_client != NULL && adjsock) {
677		int haslock = 0, error = 0;
678
679		if (nmp->nm_sotype == SOCK_STREAM) {
680			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
681			if (!error)
682				haslock = 1;
683		}
684		if (!error) {
685		    newnfs_disconnect(&nmp->nm_sockreq);
686		    if (haslock)
687			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
688		    nmp->nm_sotype = argp->sotype;
689		    nmp->nm_soproto = argp->proto;
690		    if (nmp->nm_sotype == SOCK_DGRAM)
691			while (newnfs_connect(nmp, &nmp->nm_sockreq,
692			    cred, td, 0)) {
693				printf("newnfs_args: retrying connect\n");
694				(void) nfs_catnap(PSOCK, 0, "newnfscon");
695			}
696		}
697	} else {
698		nmp->nm_sotype = argp->sotype;
699		nmp->nm_soproto = argp->proto;
700	}
701
702	if (hostname != NULL) {
703		strlcpy(nmp->nm_hostname, hostname,
704		    sizeof(nmp->nm_hostname));
705		p = strchr(nmp->nm_hostname, ':');
706		if (p != NULL)
707			*p = '\0';
708	}
709}
710
711static const char *nfs_opts[] = { "from",
712    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
713    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
714    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
715    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
716    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
717    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
718    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
719    "negnametimeo",
720    NULL };
721
722/*
723 * VFS Operations.
724 *
725 * mount system call
726 * It seems a bit dumb to copyinstr() the host and path here and then
727 * bcopy() them in mountnfs(), but I wanted to detect errors before
728 * doing the sockargs() call because sockargs() allocates an mbuf and
729 * an error after that means that I have to release the mbuf.
730 */
731/* ARGSUSED */
732static int
733nfs_mount(struct mount *mp)
734{
735	struct nfs_args args = {
736	    .version = NFS_ARGSVERSION,
737	    .addr = NULL,
738	    .addrlen = sizeof (struct sockaddr_in),
739	    .sotype = SOCK_STREAM,
740	    .proto = 0,
741	    .fh = NULL,
742	    .fhsize = 0,
743	    .flags = NFSMNT_RESVPORT,
744	    .wsize = NFS_WSIZE,
745	    .rsize = NFS_RSIZE,
746	    .readdirsize = NFS_READDIRSIZE,
747	    .timeo = 10,
748	    .retrans = NFS_RETRANS,
749	    .readahead = NFS_DEFRAHEAD,
750	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
751	    .hostname = NULL,
752	    .acregmin = NFS_MINATTRTIMO,
753	    .acregmax = NFS_MAXATTRTIMO,
754	    .acdirmin = NFS_MINDIRATTRTIMO,
755	    .acdirmax = NFS_MAXDIRATTRTIMO,
756	};
757	int error = 0, ret, len;
758	struct sockaddr *nam = NULL;
759	struct vnode *vp;
760	struct thread *td;
761	char hst[MNAMELEN];
762	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
763	char *opt, *name, *secname;
764	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
765	int dirlen, krbnamelen, srvkrbnamelen;
766
767	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
768		error = EINVAL;
769		goto out;
770	}
771
772	td = curthread;
773	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
774		error = nfs_mountroot(mp);
775		goto out;
776	}
777
778	nfscl_init();
779
780	/* Handle the new style options. */
781	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
782		args.flags |= NFSMNT_NOCONN;
783	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
784		args.flags |= NFSMNT_NOCONN;
785	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
786		args.flags |= NFSMNT_NOLOCKD;
787	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
788		args.flags &= ~NFSMNT_NOLOCKD;
789	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
790		args.flags |= NFSMNT_INT;
791	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
792		args.flags |= NFSMNT_RDIRPLUS;
793	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
794		args.flags |= NFSMNT_RESVPORT;
795	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
796		args.flags &= ~NFSMNT_RESVPORT;
797	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
798		args.flags |= NFSMNT_SOFT;
799	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
800		args.flags &= ~NFSMNT_SOFT;
801	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
802		args.sotype = SOCK_DGRAM;
803	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
804		args.sotype = SOCK_DGRAM;
805	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
806		args.sotype = SOCK_STREAM;
807	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
808		args.flags |= NFSMNT_NFSV3;
809	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
810		args.flags |= NFSMNT_NFSV4;
811		args.sotype = SOCK_STREAM;
812	}
813	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
814		args.flags |= NFSMNT_ALLGSSNAME;
815	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
816		if (opt == NULL) {
817			vfs_mount_error(mp, "illegal readdirsize");
818			error = EINVAL;
819			goto out;
820		}
821		ret = sscanf(opt, "%d", &args.readdirsize);
822		if (ret != 1 || args.readdirsize <= 0) {
823			vfs_mount_error(mp, "illegal readdirsize: %s",
824			    opt);
825			error = EINVAL;
826			goto out;
827		}
828		args.flags |= NFSMNT_READDIRSIZE;
829	}
830	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
831		if (opt == NULL) {
832			vfs_mount_error(mp, "illegal readahead");
833			error = EINVAL;
834			goto out;
835		}
836		ret = sscanf(opt, "%d", &args.readahead);
837		if (ret != 1 || args.readahead <= 0) {
838			vfs_mount_error(mp, "illegal readahead: %s",
839			    opt);
840			error = EINVAL;
841			goto out;
842		}
843		args.flags |= NFSMNT_READAHEAD;
844	}
845	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
846		if (opt == NULL) {
847			vfs_mount_error(mp, "illegal wsize");
848			error = EINVAL;
849			goto out;
850		}
851		ret = sscanf(opt, "%d", &args.wsize);
852		if (ret != 1 || args.wsize <= 0) {
853			vfs_mount_error(mp, "illegal wsize: %s",
854			    opt);
855			error = EINVAL;
856			goto out;
857		}
858		args.flags |= NFSMNT_WSIZE;
859	}
860	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
861		if (opt == NULL) {
862			vfs_mount_error(mp, "illegal rsize");
863			error = EINVAL;
864			goto out;
865		}
866		ret = sscanf(opt, "%d", &args.rsize);
867		if (ret != 1 || args.rsize <= 0) {
868			vfs_mount_error(mp, "illegal wsize: %s",
869			    opt);
870			error = EINVAL;
871			goto out;
872		}
873		args.flags |= NFSMNT_RSIZE;
874	}
875	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
876		if (opt == NULL) {
877			vfs_mount_error(mp, "illegal retrans");
878			error = EINVAL;
879			goto out;
880		}
881		ret = sscanf(opt, "%d", &args.retrans);
882		if (ret != 1 || args.retrans <= 0) {
883			vfs_mount_error(mp, "illegal retrans: %s",
884			    opt);
885			error = EINVAL;
886			goto out;
887		}
888		args.flags |= NFSMNT_RETRANS;
889	}
890	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
891		ret = sscanf(opt, "%d", &args.acregmin);
892		if (ret != 1 || args.acregmin < 0) {
893			vfs_mount_error(mp, "illegal acregmin: %s",
894			    opt);
895			error = EINVAL;
896			goto out;
897		}
898		args.flags |= NFSMNT_ACREGMIN;
899	}
900	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
901		ret = sscanf(opt, "%d", &args.acregmax);
902		if (ret != 1 || args.acregmax < 0) {
903			vfs_mount_error(mp, "illegal acregmax: %s",
904			    opt);
905			error = EINVAL;
906			goto out;
907		}
908		args.flags |= NFSMNT_ACREGMAX;
909	}
910	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
911		ret = sscanf(opt, "%d", &args.acdirmin);
912		if (ret != 1 || args.acdirmin < 0) {
913			vfs_mount_error(mp, "illegal acdirmin: %s",
914			    opt);
915			error = EINVAL;
916			goto out;
917		}
918		args.flags |= NFSMNT_ACDIRMIN;
919	}
920	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
921		ret = sscanf(opt, "%d", &args.acdirmax);
922		if (ret != 1 || args.acdirmax < 0) {
923			vfs_mount_error(mp, "illegal acdirmax: %s",
924			    opt);
925			error = EINVAL;
926			goto out;
927		}
928		args.flags |= NFSMNT_ACDIRMAX;
929	}
930	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
931		ret = sscanf(opt, "%d", &args.timeo);
932		if (ret != 1 || args.timeo <= 0) {
933			vfs_mount_error(mp, "illegal timeout: %s",
934			    opt);
935			error = EINVAL;
936			goto out;
937		}
938		args.flags |= NFSMNT_TIMEO;
939	}
940	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
941	    == 0) {
942		ret = sscanf(opt, "%d", &negnametimeo);
943		if (ret != 1 || negnametimeo < 0) {
944			vfs_mount_error(mp, "illegal negnametimeo: %s",
945			    opt);
946			error = EINVAL;
947			goto out;
948		}
949	}
950	if (vfs_getopt(mp->mnt_optnew, "sec",
951		(void **) &secname, NULL) == 0)
952		nfs_sec_name(secname, &args.flags);
953
954	if (mp->mnt_flag & MNT_UPDATE) {
955		struct nfsmount *nmp = VFSTONFS(mp);
956
957		if (nmp == NULL) {
958			error = EIO;
959			goto out;
960		}
961		/*
962		 * When doing an update, we can't change version,
963		 * security, switch lockd strategies or change cookie
964		 * translation
965		 */
966		args.flags = (args.flags &
967		    ~(NFSMNT_NFSV3 |
968		      NFSMNT_NFSV4 |
969		      NFSMNT_KERB |
970		      NFSMNT_INTEGRITY |
971		      NFSMNT_PRIVACY |
972		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
973		    (nmp->nm_flag &
974			(NFSMNT_NFSV3 |
975			 NFSMNT_NFSV4 |
976			 NFSMNT_KERB |
977			 NFSMNT_INTEGRITY |
978			 NFSMNT_PRIVACY |
979			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
980		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
981		goto out;
982	}
983
984	/*
985	 * Make the nfs_ip_paranoia sysctl serve as the default connection
986	 * or no-connection mode for those protocols that support
987	 * no-connection mode (the flag will be cleared later for protocols
988	 * that do not support no-connection mode).  This will allow a client
989	 * to receive replies from a different IP then the request was
990	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
991	 * not 0.
992	 */
993	if (nfs_ip_paranoia == 0)
994		args.flags |= NFSMNT_NOCONN;
995
996	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
997	    &args.fhsize) == 0) {
998		if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
999			vfs_mount_error(mp, "Bad file handle");
1000			error = EINVAL;
1001			goto out;
1002		}
1003		bcopy(args.fh, nfh, args.fhsize);
1004	} else {
1005		args.fhsize = 0;
1006	}
1007
1008	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
1009	    &len);
1010	if (args.hostname == NULL) {
1011		vfs_mount_error(mp, "Invalid hostname");
1012		error = EINVAL;
1013		goto out;
1014	}
1015	bcopy(args.hostname, hst, MNAMELEN);
1016	hst[MNAMELEN - 1] = '\0';
1017
1018	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1019		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1020	else
1021		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1022	srvkrbnamelen = strlen(srvkrbname);
1023
1024	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1025		strlcpy(krbname, name, sizeof (krbname));
1026	else
1027		krbname[0] = '\0';
1028	krbnamelen = strlen(krbname);
1029
1030	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1031		strlcpy(dirpath, name, sizeof (dirpath));
1032	else
1033		dirpath[0] = '\0';
1034	dirlen = strlen(dirpath);
1035
1036	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1037	    &args.addrlen) == 0) {
1038		if (args.addrlen > SOCK_MAXADDRLEN) {
1039			error = ENAMETOOLONG;
1040			goto out;
1041		}
1042		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1043		bcopy(args.addr, nam, args.addrlen);
1044		nam->sa_len = args.addrlen;
1045	}
1046
1047	args.fh = nfh;
1048	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1049	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1050	    negnametimeo);
1051out:
1052	if (!error) {
1053		MNT_ILOCK(mp);
1054		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1055		MNT_IUNLOCK(mp);
1056	}
1057	return (error);
1058}
1059
1060
1061/*
1062 * VFS Operations.
1063 *
1064 * mount system call
1065 * It seems a bit dumb to copyinstr() the host and path here and then
1066 * bcopy() them in mountnfs(), but I wanted to detect errors before
1067 * doing the sockargs() call because sockargs() allocates an mbuf and
1068 * an error after that means that I have to release the mbuf.
1069 */
1070/* ARGSUSED */
1071static int
1072nfs_cmount(struct mntarg *ma, void *data, int flags)
1073{
1074	int error;
1075	struct nfs_args args;
1076
1077	error = copyin(data, &args, sizeof (struct nfs_args));
1078	if (error)
1079		return error;
1080
1081	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1082
1083	error = kernel_mount(ma, flags);
1084	return (error);
1085}
1086
1087/*
1088 * Common code for mount and mountroot
1089 */
1090static int
1091mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1092    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1093    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1094    struct ucred *cred, struct thread *td, int negnametimeo)
1095{
1096	struct nfsmount *nmp;
1097	struct nfsnode *np;
1098	int error, trycnt, ret;
1099	struct nfsvattr nfsva;
1100	static u_int64_t clval = 0;
1101
1102	if (mp->mnt_flag & MNT_UPDATE) {
1103		nmp = VFSTONFS(mp);
1104		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1105		FREE(nam, M_SONAME);
1106		return (0);
1107	} else {
1108		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1109		    krbnamelen + dirlen + srvkrbnamelen + 2,
1110		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1111		TAILQ_INIT(&nmp->nm_bufq);
1112		if (clval == 0)
1113			clval = (u_int64_t)nfsboottime.tv_sec;
1114		nmp->nm_clval = clval++;
1115		nmp->nm_krbnamelen = krbnamelen;
1116		nmp->nm_dirpathlen = dirlen;
1117		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1118		if (td->td_ucred->cr_uid != (uid_t)0) {
1119			/*
1120			 * nm_uid is used to get KerberosV credentials for
1121			 * the nfsv4 state handling operations if there is
1122			 * no host based principal set. Use the uid of
1123			 * this user if not root, since they are doing the
1124			 * mount. I don't think setting this for root will
1125			 * work, since root normally does not have user
1126			 * credentials in a credentials cache.
1127			 */
1128			nmp->nm_uid = td->td_ucred->cr_uid;
1129		} else {
1130			/*
1131			 * Just set to -1, so it won't be used.
1132			 */
1133			nmp->nm_uid = (uid_t)-1;
1134		}
1135
1136		/* Copy and null terminate all the names */
1137		if (nmp->nm_krbnamelen > 0) {
1138			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1139			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1140		}
1141		if (nmp->nm_dirpathlen > 0) {
1142			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1143			    nmp->nm_dirpathlen);
1144			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1145			    + 1] = '\0';
1146		}
1147		if (nmp->nm_srvkrbnamelen > 0) {
1148			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1149			    nmp->nm_srvkrbnamelen);
1150			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1151			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1152		}
1153		nmp->nm_sockreq.nr_cred = crhold(cred);
1154		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1155		mp->mnt_data = nmp;
1156		nmp->nm_getinfo = nfs_getnlminfo;
1157		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1158	}
1159	vfs_getnewfsid(mp);
1160	nmp->nm_mountp = mp;
1161	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1162	nmp->nm_negnametimeo = negnametimeo;
1163
1164	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1165
1166	/*
1167	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1168	 * high, depending on whether we end up with negative offsets in
1169	 * the client or server somewhere.  2GB-1 may be safer.
1170	 *
1171	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1172	 * that we can handle until we find out otherwise.
1173	 * XXX Our "safe" limit on the client is what we can store in our
1174	 * buffer cache using signed(!) block numbers.
1175	 */
1176	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1177		nmp->nm_maxfilesize = 0xffffffffLL;
1178	else
1179		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1180
1181	nmp->nm_timeo = NFS_TIMEO;
1182	nmp->nm_retry = NFS_RETRANS;
1183	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1184		nmp->nm_wsize = NFS_WSIZE;
1185		nmp->nm_rsize = NFS_RSIZE;
1186		nmp->nm_readdirsize = NFS_READDIRSIZE;
1187	}
1188	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1189	nmp->nm_numgrps = NFS_MAXGRPS;
1190	nmp->nm_readahead = NFS_DEFRAHEAD;
1191	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1192	if (nmp->nm_tprintf_delay < 0)
1193		nmp->nm_tprintf_delay = 0;
1194	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1195	if (nmp->nm_tprintf_initial_delay < 0)
1196		nmp->nm_tprintf_initial_delay = 0;
1197	nmp->nm_fhsize = argp->fhsize;
1198	if (nmp->nm_fhsize > 0)
1199		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1200	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1201	nmp->nm_nam = nam;
1202	/* Set up the sockets and per-host congestion */
1203	nmp->nm_sotype = argp->sotype;
1204	nmp->nm_soproto = argp->proto;
1205	nmp->nm_sockreq.nr_prog = NFS_PROG;
1206	if ((argp->flags & NFSMNT_NFSV4))
1207		nmp->nm_sockreq.nr_vers = NFS_VER4;
1208	else if ((argp->flags & NFSMNT_NFSV3))
1209		nmp->nm_sockreq.nr_vers = NFS_VER3;
1210	else
1211		nmp->nm_sockreq.nr_vers = NFS_VER2;
1212
1213
1214	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1215		goto bad;
1216
1217	/*
1218	 * A reference count is needed on the nfsnode representing the
1219	 * remote root.  If this object is not persistent, then backward
1220	 * traversals of the mount point (i.e. "..") will not work if
1221	 * the nfsnode gets flushed out of the cache. Ufs does not have
1222	 * this problem, because one can identify root inodes by their
1223	 * number == ROOTINO (2).
1224	 */
1225	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1226	    nmp->nm_dirpathlen > 0) {
1227		/*
1228		 * If the fhsize on the mount point == 0 for V4, the mount
1229		 * path needs to be looked up.
1230		 */
1231		trycnt = 3;
1232		do {
1233			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1234			    cred, td);
1235			if (error)
1236				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1237		} while (error && --trycnt > 0);
1238		if (error) {
1239			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1240			goto bad;
1241		}
1242	}
1243	if (nmp->nm_fhsize > 0) {
1244		/*
1245		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1246		 * non-zero for the root vnode. f_iosize will be set correctly
1247		 * by nfs_statfs() before any I/O occurs.
1248		 */
1249		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1250		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1251		    LK_EXCLUSIVE);
1252		if (error)
1253			goto bad;
1254		*vpp = NFSTOV(np);
1255
1256		/*
1257		 * Get file attributes and transfer parameters for the
1258		 * mountpoint.  This has the side effect of filling in
1259		 * (*vpp)->v_type with the correct value.
1260		 */
1261		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1262		    cred, td, &nfsva, NULL);
1263		if (ret) {
1264			/*
1265			 * Just set default values to get things going.
1266			 */
1267			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1268			nfsva.na_vattr.va_type = VDIR;
1269			nfsva.na_vattr.va_mode = 0777;
1270			nfsva.na_vattr.va_nlink = 100;
1271			nfsva.na_vattr.va_uid = (uid_t)0;
1272			nfsva.na_vattr.va_gid = (gid_t)0;
1273			nfsva.na_vattr.va_fileid = 2;
1274			nfsva.na_vattr.va_gen = 1;
1275			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1276			nfsva.na_vattr.va_size = 512 * 1024;
1277		}
1278		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1279		if (argp->flags & NFSMNT_NFSV3)
1280			ncl_fsinfo(nmp, *vpp, cred, td);
1281
1282		/*
1283		 * Lose the lock but keep the ref.
1284		 */
1285		VOP_UNLOCK(*vpp, 0);
1286		return (0);
1287	}
1288	error = EIO;
1289
1290bad:
1291	newnfs_disconnect(&nmp->nm_sockreq);
1292	crfree(nmp->nm_sockreq.nr_cred);
1293	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1294	mtx_destroy(&nmp->nm_mtx);
1295	FREE(nmp, M_NEWNFSMNT);
1296	FREE(nam, M_SONAME);
1297	return (error);
1298}
1299
1300/*
1301 * unmount system call
1302 */
1303static int
1304nfs_unmount(struct mount *mp, int mntflags)
1305{
1306	struct thread *td;
1307	struct nfsmount *nmp;
1308	int error, flags = 0, trycnt = 0;
1309
1310	td = curthread;
1311
1312	if (mntflags & MNT_FORCE)
1313		flags |= FORCECLOSE;
1314	nmp = VFSTONFS(mp);
1315	/*
1316	 * Goes something like this..
1317	 * - Call vflush() to clear out vnodes for this filesystem
1318	 * - Close the socket
1319	 * - Free up the data structures
1320	 */
1321	/* In the forced case, cancel any outstanding requests. */
1322	if (mntflags & MNT_FORCE) {
1323		error = newnfs_nmcancelreqs(nmp);
1324		if (error)
1325			goto out;
1326		/* For a forced close, get rid of the renew thread now */
1327		nfscl_umount(nmp, td);
1328	}
1329	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1330	do {
1331		error = vflush(mp, 1, flags, td);
1332		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1333			(void) nfs_catnap(PSOCK, error, "newndm");
1334	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1335	if (error)
1336		goto out;
1337
1338	/*
1339	 * We are now committed to the unmount.
1340	 */
1341	if ((mntflags & MNT_FORCE) == 0)
1342		nfscl_umount(nmp, td);
1343	newnfs_disconnect(&nmp->nm_sockreq);
1344	crfree(nmp->nm_sockreq.nr_cred);
1345	FREE(nmp->nm_nam, M_SONAME);
1346
1347	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1348	mtx_destroy(&nmp->nm_mtx);
1349	FREE(nmp, M_NEWNFSMNT);
1350out:
1351	return (error);
1352}
1353
1354/*
1355 * Return root of a filesystem
1356 */
1357static int
1358nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1359{
1360	struct vnode *vp;
1361	struct nfsmount *nmp;
1362	struct nfsnode *np;
1363	int error;
1364
1365	nmp = VFSTONFS(mp);
1366	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1367	if (error)
1368		return error;
1369	vp = NFSTOV(np);
1370	/*
1371	 * Get transfer parameters and attributes for root vnode once.
1372	 */
1373	mtx_lock(&nmp->nm_mtx);
1374	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1375		mtx_unlock(&nmp->nm_mtx);
1376		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1377	} else
1378		mtx_unlock(&nmp->nm_mtx);
1379	if (vp->v_type == VNON)
1380	    vp->v_type = VDIR;
1381	vp->v_vflag |= VV_ROOT;
1382	*vpp = vp;
1383	return (0);
1384}
1385
1386/*
1387 * Flush out the buffer cache
1388 */
1389/* ARGSUSED */
1390static int
1391nfs_sync(struct mount *mp, int waitfor)
1392{
1393	struct vnode *vp, *mvp;
1394	struct thread *td;
1395	int error, allerror = 0;
1396
1397	td = curthread;
1398
1399	/*
1400	 * Force stale buffer cache information to be flushed.
1401	 */
1402	MNT_ILOCK(mp);
1403loop:
1404	MNT_VNODE_FOREACH(vp, mp, mvp) {
1405		VI_LOCK(vp);
1406		MNT_IUNLOCK(mp);
1407		/* XXX Racy bv_cnt check. */
1408		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1409		    waitfor == MNT_LAZY) {
1410			VI_UNLOCK(vp);
1411			MNT_ILOCK(mp);
1412			continue;
1413		}
1414		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1415			MNT_ILOCK(mp);
1416			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1417			goto loop;
1418		}
1419		error = VOP_FSYNC(vp, waitfor, td);
1420		if (error)
1421			allerror = error;
1422		VOP_UNLOCK(vp, 0);
1423		vrele(vp);
1424
1425		MNT_ILOCK(mp);
1426	}
1427	MNT_IUNLOCK(mp);
1428	return (allerror);
1429}
1430
1431static int
1432nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1433{
1434	struct nfsmount *nmp = VFSTONFS(mp);
1435	struct vfsquery vq;
1436	int error;
1437
1438	bzero(&vq, sizeof(vq));
1439	switch (op) {
1440#if 0
1441	case VFS_CTL_NOLOCKS:
1442		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1443 		if (req->oldptr != NULL) {
1444 			error = SYSCTL_OUT(req, &val, sizeof(val));
1445 			if (error)
1446 				return (error);
1447 		}
1448 		if (req->newptr != NULL) {
1449 			error = SYSCTL_IN(req, &val, sizeof(val));
1450 			if (error)
1451 				return (error);
1452			if (val)
1453				nmp->nm_flag |= NFSMNT_NOLOCKS;
1454			else
1455				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1456 		}
1457		break;
1458#endif
1459	case VFS_CTL_QUERY:
1460		mtx_lock(&nmp->nm_mtx);
1461		if (nmp->nm_state & NFSSTA_TIMEO)
1462			vq.vq_flags |= VQ_NOTRESP;
1463		mtx_unlock(&nmp->nm_mtx);
1464#if 0
1465		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1466		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1467			vq.vq_flags |= VQ_NOTRESPLOCK;
1468#endif
1469		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1470		break;
1471 	case VFS_CTL_TIMEO:
1472 		if (req->oldptr != NULL) {
1473 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1474 			    sizeof(nmp->nm_tprintf_initial_delay));
1475 			if (error)
1476 				return (error);
1477 		}
1478 		if (req->newptr != NULL) {
1479			error = vfs_suser(mp, req->td);
1480			if (error)
1481				return (error);
1482 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1483 			    sizeof(nmp->nm_tprintf_initial_delay));
1484 			if (error)
1485 				return (error);
1486 			if (nmp->nm_tprintf_initial_delay < 0)
1487 				nmp->nm_tprintf_initial_delay = 0;
1488 		}
1489		break;
1490	default:
1491		return (ENOTSUP);
1492	}
1493	return (0);
1494}
1495
1496/*
1497 * Extract the information needed by the nlm from the nfs vnode.
1498 */
1499static void
1500nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1501    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1502    struct timeval *timeop)
1503{
1504	struct nfsmount *nmp;
1505	struct nfsnode *np = VTONFS(vp);
1506
1507	nmp = VFSTONFS(vp->v_mount);
1508	if (fhlenp != NULL)
1509		*fhlenp = (size_t)np->n_fhp->nfh_len;
1510	if (fhp != NULL)
1511		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1512	if (sp != NULL)
1513		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1514	if (is_v3p != NULL)
1515		*is_v3p = NFS_ISV3(vp);
1516	if (sizep != NULL)
1517		*sizep = np->n_size;
1518	if (timeop != NULL) {
1519		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1520		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1521	}
1522}
1523
1524