nfs_clvfsops.c revision 195762
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 195762 2009-07-19 16:44:26Z rmacklem $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/lock.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/module.h>
53#include <sys/mount.h>
54#include <sys/proc.h>
55#include <sys/socket.h>
56#include <sys/socketvar.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/vnode.h>
60#include <sys/signalvar.h>
61
62#include <vm/vm.h>
63#include <vm/vm_extern.h>
64#include <vm/uma.h>
65
66#include <net/if.h>
67#include <net/route.h>
68#include <netinet/in.h>
69
70#include <fs/nfs/nfsport.h>
71#include <fs/nfsclient/nfsnode.h>
72#include <fs/nfsclient/nfsmount.h>
73#include <fs/nfsclient/nfs.h>
74#include <fs/nfsclient/nfsdiskless.h>
75
76extern int nfscl_ticks;
77extern struct timeval nfsboottime;
78extern struct nfsstats	newnfsstats;
79
80MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
81MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
82
83SYSCTL_DECL(_vfs_newnfs);
84SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
85	&newnfsstats, nfsstats, "S,nfsstats");
86static int nfs_ip_paranoia = 1;
87SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
88    &nfs_ip_paranoia, 0, "");
89static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
90SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
91        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
92/* how long between console messages "nfs server foo not responding" */
93static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
94SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
95        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
96
97static void	nfs_sec_name(char *, int *);
98static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
99		    struct nfs_args *argp, struct ucred *, struct thread *);
100static int	mountnfs(struct nfs_args *, struct mount *,
101		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
102		    struct vnode **, struct ucred *, struct thread *);
103static vfs_mount_t nfs_mount;
104static vfs_cmount_t nfs_cmount;
105static vfs_unmount_t nfs_unmount;
106static vfs_root_t nfs_root;
107static vfs_statfs_t nfs_statfs;
108static vfs_sync_t nfs_sync;
109static vfs_sysctl_t nfs_sysctl;
110
111/*
112 * nfs vfs operations.
113 */
114static struct vfsops nfs_vfsops = {
115	.vfs_init =		ncl_init,
116	.vfs_mount =		nfs_mount,
117	.vfs_cmount =		nfs_cmount,
118	.vfs_root =		nfs_root,
119	.vfs_statfs =		nfs_statfs,
120	.vfs_sync =		nfs_sync,
121	.vfs_uninit =		ncl_uninit,
122	.vfs_unmount =		nfs_unmount,
123	.vfs_sysctl =		nfs_sysctl,
124};
125VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
126
127/* So that loader and kldload(2) can find us, wherever we are.. */
128MODULE_VERSION(newnfs, 1);
129
130/*
131 * This structure must be filled in by a primary bootstrap or bootstrap
132 * server for a diskless/dataless machine. It is initialized below just
133 * to ensure that it is allocated to initialized data (.data not .bss).
134 */
135struct nfs_diskless newnfs_diskless = { { { 0 } } };
136struct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
137int newnfs_diskless_valid = 0;
138
139SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
140    &newnfs_diskless_valid, 0,
141    "Has the diskless struct been filled correctly");
142
143SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
144    newnfsv3_diskless.root_hostnam, 0, "Path to nfs root");
145
146SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
147    &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
148    "%Ssockaddr_in", "Diskless root nfs address");
149
150
151void		newnfsargs_ntoh(struct nfs_args *);
152static int	nfs_mountdiskless(char *,
153		    struct sockaddr_in *, struct nfs_args *,
154		    struct thread *, struct vnode **, struct mount *);
155static void	nfs_convert_diskless(void);
156static void	nfs_convert_oargs(struct nfs_args *args,
157		    struct onfs_args *oargs);
158
159int
160newnfs_iosize(struct nfsmount *nmp)
161{
162	int iosize, maxio;
163
164	/* First, set the upper limit for iosize */
165	if (nmp->nm_flag & NFSMNT_NFSV4) {
166		maxio = NFS_MAXBSIZE;
167	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
168		if (nmp->nm_sotype == SOCK_DGRAM)
169			maxio = NFS_MAXDGRAMDATA;
170		else
171			maxio = NFS_MAXBSIZE;
172	} else {
173		maxio = NFS_V2MAXDATA;
174	}
175	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
176		nmp->nm_rsize = maxio;
177	if (nmp->nm_rsize > MAXBSIZE)
178		nmp->nm_rsize = MAXBSIZE;
179	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
180		nmp->nm_readdirsize = maxio;
181	if (nmp->nm_readdirsize > nmp->nm_rsize)
182		nmp->nm_readdirsize = nmp->nm_rsize;
183	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
184		nmp->nm_wsize = maxio;
185	if (nmp->nm_wsize > MAXBSIZE)
186		nmp->nm_wsize = MAXBSIZE;
187
188	/*
189	 * Calculate the size used for io buffers.  Use the larger
190	 * of the two sizes to minimise nfs requests but make sure
191	 * that it is at least one VM page to avoid wasting buffer
192	 * space.
193	 */
194	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
195	iosize = imax(iosize, PAGE_SIZE);
196	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
197	return (iosize);
198}
199
200static void
201nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
202{
203
204	args->version = NFS_ARGSVERSION;
205	args->addr = oargs->addr;
206	args->addrlen = oargs->addrlen;
207	args->sotype = oargs->sotype;
208	args->proto = oargs->proto;
209	args->fh = oargs->fh;
210	args->fhsize = oargs->fhsize;
211	args->flags = oargs->flags;
212	args->wsize = oargs->wsize;
213	args->rsize = oargs->rsize;
214	args->readdirsize = oargs->readdirsize;
215	args->timeo = oargs->timeo;
216	args->retrans = oargs->retrans;
217	args->readahead = oargs->readahead;
218	args->hostname = oargs->hostname;
219}
220
221static void
222nfs_convert_diskless(void)
223{
224
225	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
226	    sizeof (struct ifaliasreq));
227	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
228	    sizeof (struct sockaddr_in));
229	nfs_convert_oargs(&newnfsv3_diskless.root_args,
230	    &newnfs_diskless.root_args);
231	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
232		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
233		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
234		    NFSX_MYFH);
235	} else {
236		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
237		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
238		    NFSX_V2FH);
239	}
240	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
241	    sizeof(struct sockaddr_in));
242	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam,
243	    MNAMELEN);
244	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
245	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
246	    MAXHOSTNAMELEN);
247	newnfs_diskless_valid = 3;
248}
249
250/*
251 * nfs statfs call
252 */
253static int
254nfs_statfs(struct mount *mp, struct statfs *sbp)
255{
256	struct vnode *vp;
257	struct thread *td;
258	struct nfsmount *nmp = VFSTONFS(mp);
259	struct nfsvattr nfsva;
260	struct nfsfsinfo fs;
261	struct nfsstatfs sb;
262	int error = 0, attrflag, gotfsinfo = 0, ret;
263	struct nfsnode *np;
264
265	td = curthread;
266
267	error = vfs_busy(mp, MBF_NOWAIT);
268	if (error)
269		return (error);
270	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
271	if (error) {
272		vfs_unbusy(mp);
273		return (error);
274	}
275	vp = NFSTOV(np);
276	mtx_lock(&nmp->nm_mtx);
277	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
278		mtx_unlock(&nmp->nm_mtx);
279		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
280		    &attrflag, NULL);
281		if (!error)
282			gotfsinfo = 1;
283	} else
284		mtx_unlock(&nmp->nm_mtx);
285	if (!error)
286		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
287		    &attrflag, NULL);
288	if (attrflag == 0) {
289		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
290		    td->td_ucred, td, &nfsva, NULL);
291		if (ret) {
292			/*
293			 * Just set default values to get things going.
294			 */
295			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
296			nfsva.na_vattr.va_type = VDIR;
297			nfsva.na_vattr.va_mode = 0777;
298			nfsva.na_vattr.va_nlink = 100;
299			nfsva.na_vattr.va_uid = (uid_t)0;
300			nfsva.na_vattr.va_gid = (gid_t)0;
301			nfsva.na_vattr.va_fileid = 2;
302			nfsva.na_vattr.va_gen = 1;
303			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
304			nfsva.na_vattr.va_size = 512 * 1024;
305		}
306	}
307	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
308	if (!error) {
309	    mtx_lock(&nmp->nm_mtx);
310	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
311		nfscl_loadfsinfo(nmp, &fs);
312	    nfscl_loadsbinfo(nmp, &sb, sbp);
313	    sbp->f_flags = nmp->nm_flag;
314	    sbp->f_iosize = newnfs_iosize(nmp);
315	    mtx_unlock(&nmp->nm_mtx);
316	    if (sbp != &mp->mnt_stat) {
317		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
318		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
319	    }
320	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
321	} else if (NFS_ISV4(vp)) {
322		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
323	}
324	vput(vp);
325	vfs_unbusy(mp);
326	return (error);
327}
328
329/*
330 * nfs version 3 fsinfo rpc call
331 */
332int
333ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
334    struct thread *td)
335{
336	struct nfsfsinfo fs;
337	struct nfsvattr nfsva;
338	int error, attrflag;
339
340	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
341	if (!error) {
342		if (attrflag)
343			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
344			    1);
345		mtx_lock(&nmp->nm_mtx);
346		nfscl_loadfsinfo(nmp, &fs);
347		mtx_unlock(&nmp->nm_mtx);
348	}
349	return (error);
350}
351
352/*
353 * Mount a remote root fs via. nfs. This depends on the info in the
354 * newnfs_diskless structure that has been filled in properly by some primary
355 * bootstrap.
356 * It goes something like this:
357 * - do enough of "ifconfig" by calling ifioctl() so that the system
358 *   can talk to the server
359 * - If newnfs_diskless.mygateway is filled in, use that address as
360 *   a default gateway.
361 * - build the rootfs mount point and call mountnfs() to do the rest.
362 *
363 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
364 * structure, as well as other global NFS client variables here, as
365 * nfs_mountroot() will be called once in the boot before any other NFS
366 * client activity occurs.
367 */
368int
369ncl_mountroot(struct mount *mp)
370{
371	struct thread *td = curthread;
372	struct nfsv3_diskless *nd = &newnfsv3_diskless;
373	struct socket *so;
374	struct vnode *vp;
375	struct ifreq ir;
376	int error;
377	u_long l;
378	char buf[128];
379	char *cp;
380
381#if defined(BOOTP_NFSROOT) && defined(BOOTP)
382	bootpc_init();		/* use bootp to get nfs_diskless filled in */
383#elif defined(NFS_ROOT)
384	nfs_setup_diskless();
385#endif
386
387	if (newnfs_diskless_valid == 0)
388		return (-1);
389	if (newnfs_diskless_valid == 1)
390		nfs_convert_diskless();
391
392	/*
393	 * XXX splnet, so networks will receive...
394	 */
395	splnet();
396
397	/*
398	 * Do enough of ifconfig(8) so that the critical net interface can
399	 * talk to the server.
400	 */
401	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
402	    td->td_ucred, td);
403	if (error)
404		panic("nfs_mountroot: socreate(%04x): %d",
405			nd->myif.ifra_addr.sa_family, error);
406
407#if 0 /* XXX Bad idea */
408	/*
409	 * We might not have been told the right interface, so we pass
410	 * over the first ten interfaces of the same kind, until we get
411	 * one of them configured.
412	 */
413
414	for (i = strlen(nd->myif.ifra_name) - 1;
415		nd->myif.ifra_name[i] >= '0' &&
416		nd->myif.ifra_name[i] <= '9';
417		nd->myif.ifra_name[i] ++) {
418		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
419		if(!error)
420			break;
421	}
422#endif
423	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
424	if (error)
425		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
426	if ((cp = getenv("boot.netif.mtu")) != NULL) {
427		ir.ifr_mtu = strtol(cp, NULL, 10);
428		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
429		freeenv(cp);
430		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
431		if (error)
432			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
433	}
434	soclose(so);
435
436	/*
437	 * If the gateway field is filled in, set it as the default route.
438	 * Note that pxeboot will set a default route of 0 if the route
439	 * is not set by the DHCP server.  Check also for a value of 0
440	 * to avoid panicking inappropriately in that situation.
441	 */
442	if (nd->mygateway.sin_len != 0 &&
443	    nd->mygateway.sin_addr.s_addr != 0) {
444		struct sockaddr_in mask, sin;
445
446		bzero((caddr_t)&mask, sizeof(mask));
447		sin = mask;
448		sin.sin_family = AF_INET;
449		sin.sin_len = sizeof(sin);
450                /* XXX MRT use table 0 for this sort of thing */
451		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
452		    (struct sockaddr *)&nd->mygateway,
453		    (struct sockaddr *)&mask,
454		    RTF_UP | RTF_GATEWAY, NULL);
455		if (error)
456			panic("nfs_mountroot: RTM_ADD: %d", error);
457	}
458
459	/*
460	 * Create the rootfs mount point.
461	 */
462	nd->root_args.fh = nd->root_fh;
463	nd->root_args.fhsize = nd->root_fhsize;
464	l = ntohl(nd->root_saddr.sin_addr.s_addr);
465	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
466		(l >> 24) & 0xff, (l >> 16) & 0xff,
467		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
468	printf("NFS ROOT: %s\n", buf);
469	nd->root_args.hostname = buf;
470	if ((error = nfs_mountdiskless(buf,
471	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
472		return (error);
473	}
474
475	/*
476	 * This is not really an nfs issue, but it is much easier to
477	 * set hostname here and then let the "/etc/rc.xxx" files
478	 * mount the right /var based upon its preset value.
479	 */
480	mtx_lock(&prison0.pr_mtx);
481	strlcpy(prison0.pr_hostname, nd->my_hostnam,
482	    sizeof(prison0.pr_hostname));
483	mtx_unlock(&prison0.pr_mtx);
484	inittodr(ntohl(nd->root_time));
485	return (0);
486}
487
488/*
489 * Internal version of mount system call for diskless setup.
490 */
491static int
492nfs_mountdiskless(char *path,
493    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
494    struct vnode **vpp, struct mount *mp)
495{
496	struct sockaddr *nam;
497	int error;
498
499	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
500	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
501	    td->td_ucred, td)) != 0) {
502		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
503		return (error);
504	}
505	return (0);
506}
507
508static void
509nfs_sec_name(char *sec, int *flagsp)
510{
511	if (!strcmp(sec, "krb5"))
512		*flagsp |= NFSMNT_KERB;
513	else if (!strcmp(sec, "krb5i"))
514		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
515	else if (!strcmp(sec, "krb5p"))
516		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
517}
518
519static void
520nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
521    struct ucred *cred, struct thread *td)
522{
523	int s;
524	int adjsock;
525
526	s = splnet();
527
528	/*
529	 * Set read-only flag if requested; otherwise, clear it if this is
530	 * an update.  If this is not an update, then either the read-only
531	 * flag is already clear, or this is a root mount and it was set
532	 * intentionally at some previous point.
533	 */
534	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
535		MNT_ILOCK(mp);
536		mp->mnt_flag |= MNT_RDONLY;
537		MNT_IUNLOCK(mp);
538	} else if (mp->mnt_flag & MNT_UPDATE) {
539		MNT_ILOCK(mp);
540		mp->mnt_flag &= ~MNT_RDONLY;
541		MNT_IUNLOCK(mp);
542	}
543
544	/*
545	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
546	 * no sense in that context.  Also, set up appropriate retransmit
547	 * and soft timeout behavior.
548	 */
549	if (argp->sotype == SOCK_STREAM) {
550		nmp->nm_flag &= ~NFSMNT_NOCONN;
551		nmp->nm_timeo = NFS_MAXTIMEO;
552	}
553
554	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
555	if ((argp->flags & NFSMNT_NFSV3) == 0)
556		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
557
558	/* Also re-bind if we're switching to/from a connected UDP socket */
559	adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) !=
560		    (argp->flags & NFSMNT_NOCONN));
561
562	/* Update flags atomically.  Don't change the lock bits. */
563	nmp->nm_flag = argp->flags | nmp->nm_flag;
564	splx(s);
565
566	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
567		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
568		if (nmp->nm_timeo < NFS_MINTIMEO)
569			nmp->nm_timeo = NFS_MINTIMEO;
570		else if (nmp->nm_timeo > NFS_MAXTIMEO)
571			nmp->nm_timeo = NFS_MAXTIMEO;
572	}
573
574	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
575		nmp->nm_retry = argp->retrans;
576		if (nmp->nm_retry > NFS_MAXREXMIT)
577			nmp->nm_retry = NFS_MAXREXMIT;
578	}
579
580	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
581		nmp->nm_wsize = argp->wsize;
582		/* Round down to multiple of blocksize */
583		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
584		if (nmp->nm_wsize <= 0)
585			nmp->nm_wsize = NFS_FABLKSIZE;
586	}
587
588	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
589		nmp->nm_rsize = argp->rsize;
590		/* Round down to multiple of blocksize */
591		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
592		if (nmp->nm_rsize <= 0)
593			nmp->nm_rsize = NFS_FABLKSIZE;
594	}
595
596	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
597		nmp->nm_readdirsize = argp->readdirsize;
598	}
599
600	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
601		nmp->nm_acregmin = argp->acregmin;
602	else
603		nmp->nm_acregmin = NFS_MINATTRTIMO;
604	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
605		nmp->nm_acregmax = argp->acregmax;
606	else
607		nmp->nm_acregmax = NFS_MAXATTRTIMO;
608	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
609		nmp->nm_acdirmin = argp->acdirmin;
610	else
611		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
612	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
613		nmp->nm_acdirmax = argp->acdirmax;
614	else
615		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
616	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
617		nmp->nm_acdirmin = nmp->nm_acdirmax;
618	if (nmp->nm_acregmin > nmp->nm_acregmax)
619		nmp->nm_acregmin = nmp->nm_acregmax;
620
621	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
622		if (argp->readahead <= NFS_MAXRAHEAD)
623			nmp->nm_readahead = argp->readahead;
624		else
625			nmp->nm_readahead = NFS_MAXRAHEAD;
626	}
627	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
628		if (argp->wcommitsize < nmp->nm_wsize)
629			nmp->nm_wcommitsize = nmp->nm_wsize;
630		else
631			nmp->nm_wcommitsize = argp->wcommitsize;
632	}
633
634	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
635		    (nmp->nm_soproto != argp->proto));
636
637	if (nmp->nm_client != NULL && adjsock) {
638		int haslock = 0, error = 0;
639
640		if (nmp->nm_sotype == SOCK_STREAM) {
641			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
642			if (!error)
643				haslock = 1;
644		}
645		if (!error) {
646		    newnfs_disconnect(&nmp->nm_sockreq);
647		    if (haslock)
648			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
649		    nmp->nm_sotype = argp->sotype;
650		    nmp->nm_soproto = argp->proto;
651		    if (nmp->nm_sotype == SOCK_DGRAM)
652			while (newnfs_connect(nmp, &nmp->nm_sockreq,
653			    cred, td, 0)) {
654				printf("newnfs_args: retrying connect\n");
655				(void) nfs_catnap(PSOCK, "newnfscon");
656			}
657		}
658	} else {
659		nmp->nm_sotype = argp->sotype;
660		nmp->nm_soproto = argp->proto;
661	}
662}
663
664static const char *nfs_opts[] = { "from",
665    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
666    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
667    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
668    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
669    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
670    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
671    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
672    NULL };
673
674/*
675 * VFS Operations.
676 *
677 * mount system call
678 * It seems a bit dumb to copyinstr() the host and path here and then
679 * bcopy() them in mountnfs(), but I wanted to detect errors before
680 * doing the sockargs() call because sockargs() allocates an mbuf and
681 * an error after that means that I have to release the mbuf.
682 */
683/* ARGSUSED */
684static int
685nfs_mount(struct mount *mp)
686{
687	struct nfs_args args = {
688	    .version = NFS_ARGSVERSION,
689	    .addr = NULL,
690	    .addrlen = sizeof (struct sockaddr_in),
691	    .sotype = SOCK_STREAM,
692	    .proto = 0,
693	    .fh = NULL,
694	    .fhsize = 0,
695	    .flags = 0,
696	    .wsize = NFS_WSIZE,
697	    .rsize = NFS_RSIZE,
698	    .readdirsize = NFS_READDIRSIZE,
699	    .timeo = 10,
700	    .retrans = NFS_RETRANS,
701	    .readahead = NFS_DEFRAHEAD,
702	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
703	    .hostname = NULL,
704	    /* args version 4 */
705	    .acregmin = NFS_MINATTRTIMO,
706	    .acregmax = NFS_MAXATTRTIMO,
707	    .acdirmin = NFS_MINDIRATTRTIMO,
708	    .acdirmax = NFS_MAXDIRATTRTIMO,
709	    .dirlen = 0,
710	    .krbnamelen = 0,
711	    .srvkrbnamelen = 0,
712	};
713	int error = 0, ret, len;
714	struct sockaddr *nam = NULL;
715	struct vnode *vp;
716	struct thread *td;
717	char hst[MNAMELEN];
718	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
719	char *opt, *name, *secname;
720
721	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
722		error = EINVAL;
723		goto out;
724	}
725
726	td = curthread;
727	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
728		error = ncl_mountroot(mp);
729		goto out;
730	}
731
732	nfscl_init();
733
734	/* Handle the new style options. */
735	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
736		args.flags |= NFSMNT_NOCONN;
737	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
738		args.flags |= NFSMNT_NOCONN;
739	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
740		args.flags |= NFSMNT_NOLOCKD;
741	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
742		args.flags &= ~NFSMNT_NOLOCKD;
743	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
744		args.flags |= NFSMNT_INT;
745	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
746		args.flags |= NFSMNT_RDIRPLUS;
747	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
748		args.flags |= NFSMNT_RESVPORT;
749	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
750		args.flags &= ~NFSMNT_RESVPORT;
751	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
752		args.flags |= NFSMNT_SOFT;
753	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
754		args.flags &= ~NFSMNT_SOFT;
755	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
756		args.sotype = SOCK_DGRAM;
757	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
758		args.sotype = SOCK_DGRAM;
759	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
760		args.sotype = SOCK_STREAM;
761	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
762		args.flags |= NFSMNT_NFSV3;
763	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
764		args.flags |= NFSMNT_NFSV4;
765		args.sotype = SOCK_STREAM;
766	}
767	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
768		args.flags |= NFSMNT_ALLGSSNAME;
769	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
770		if (opt == NULL) {
771			vfs_mount_error(mp, "illegal readdirsize");
772			error = EINVAL;
773			goto out;
774		}
775		ret = sscanf(opt, "%d", &args.readdirsize);
776		if (ret != 1 || args.readdirsize <= 0) {
777			vfs_mount_error(mp, "illegal readdirsize: %s",
778			    opt);
779			error = EINVAL;
780			goto out;
781		}
782		args.flags |= NFSMNT_READDIRSIZE;
783	}
784	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
785		if (opt == NULL) {
786			vfs_mount_error(mp, "illegal readahead");
787			error = EINVAL;
788			goto out;
789		}
790		ret = sscanf(opt, "%d", &args.readahead);
791		if (ret != 1 || args.readahead <= 0) {
792			vfs_mount_error(mp, "illegal readahead: %s",
793			    opt);
794			error = EINVAL;
795			goto out;
796		}
797		args.flags |= NFSMNT_READAHEAD;
798	}
799	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
800		if (opt == NULL) {
801			vfs_mount_error(mp, "illegal wsize");
802			error = EINVAL;
803			goto out;
804		}
805		ret = sscanf(opt, "%d", &args.wsize);
806		if (ret != 1 || args.wsize <= 0) {
807			vfs_mount_error(mp, "illegal wsize: %s",
808			    opt);
809			error = EINVAL;
810			goto out;
811		}
812		args.flags |= NFSMNT_WSIZE;
813	}
814	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
815		if (opt == NULL) {
816			vfs_mount_error(mp, "illegal rsize");
817			error = EINVAL;
818			goto out;
819		}
820		ret = sscanf(opt, "%d", &args.rsize);
821		if (ret != 1 || args.rsize <= 0) {
822			vfs_mount_error(mp, "illegal wsize: %s",
823			    opt);
824			error = EINVAL;
825			goto out;
826		}
827		args.flags |= NFSMNT_RSIZE;
828	}
829	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
830		if (opt == NULL) {
831			vfs_mount_error(mp, "illegal retrans");
832			error = EINVAL;
833			goto out;
834		}
835		ret = sscanf(opt, "%d", &args.retrans);
836		if (ret != 1 || args.retrans <= 0) {
837			vfs_mount_error(mp, "illegal retrans: %s",
838			    opt);
839			error = EINVAL;
840			goto out;
841		}
842		args.flags |= NFSMNT_RETRANS;
843	}
844	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
845		ret = sscanf(opt, "%d", &args.acregmin);
846		if (ret != 1 || args.acregmin < 0) {
847			vfs_mount_error(mp, "illegal acregmin: %s",
848			    opt);
849			error = EINVAL;
850			goto out;
851		}
852		args.flags |= NFSMNT_ACREGMIN;
853	}
854	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
855		ret = sscanf(opt, "%d", &args.acregmax);
856		if (ret != 1 || args.acregmax < 0) {
857			vfs_mount_error(mp, "illegal acregmax: %s",
858			    opt);
859			error = EINVAL;
860			goto out;
861		}
862		args.flags |= NFSMNT_ACREGMAX;
863	}
864	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
865		ret = sscanf(opt, "%d", &args.acdirmin);
866		if (ret != 1 || args.acdirmin < 0) {
867			vfs_mount_error(mp, "illegal acdirmin: %s",
868			    opt);
869			error = EINVAL;
870			goto out;
871		}
872		args.flags |= NFSMNT_ACDIRMIN;
873	}
874	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
875		ret = sscanf(opt, "%d", &args.acdirmax);
876		if (ret != 1 || args.acdirmax < 0) {
877			vfs_mount_error(mp, "illegal acdirmax: %s",
878			    opt);
879			error = EINVAL;
880			goto out;
881		}
882		args.flags |= NFSMNT_ACDIRMAX;
883	}
884	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
885		ret = sscanf(opt, "%d", &args.timeo);
886		if (ret != 1 || args.timeo <= 0) {
887			vfs_mount_error(mp, "illegal timeout: %s",
888			    opt);
889			error = EINVAL;
890			goto out;
891		}
892		args.flags |= NFSMNT_TIMEO;
893	}
894	if (vfs_getopt(mp->mnt_optnew, "sec",
895		(void **) &secname, NULL) == 0)
896		nfs_sec_name(secname, &args.flags);
897
898	if (mp->mnt_flag & MNT_UPDATE) {
899		struct nfsmount *nmp = VFSTONFS(mp);
900
901		if (nmp == NULL) {
902			error = EIO;
903			goto out;
904		}
905		/*
906		 * When doing an update, we can't change version,
907		 * security, switch lockd strategies or change cookie
908		 * translation
909		 */
910		args.flags = (args.flags &
911		    ~(NFSMNT_NFSV3 |
912		      NFSMNT_NFSV4 |
913		      NFSMNT_KERB |
914		      NFSMNT_INTEGRITY |
915		      NFSMNT_PRIVACY |
916		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
917		    (nmp->nm_flag &
918			(NFSMNT_NFSV3 |
919			 NFSMNT_NFSV4 |
920			 NFSMNT_KERB |
921			 NFSMNT_INTEGRITY |
922			 NFSMNT_PRIVACY |
923			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
924		nfs_decode_args(mp, nmp, &args, td->td_ucred, td);
925		goto out;
926	}
927
928	/*
929	 * Make the nfs_ip_paranoia sysctl serve as the default connection
930	 * or no-connection mode for those protocols that support
931	 * no-connection mode (the flag will be cleared later for protocols
932	 * that do not support no-connection mode).  This will allow a client
933	 * to receive replies from a different IP then the request was
934	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
935	 * not 0.
936	 */
937	if (nfs_ip_paranoia == 0)
938		args.flags |= NFSMNT_NOCONN;
939
940	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
941	    &args.fhsize) == 0) {
942		if (args.fhsize > NFSX_FHMAX) {
943			vfs_mount_error(mp, "Bad file handle");
944			error = EINVAL;
945			goto out;
946		}
947		bcopy(args.fh, nfh, args.fhsize);
948	} else {
949		args.fhsize = 0;
950	}
951
952	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
953	    &len);
954	if (args.hostname == NULL) {
955		vfs_mount_error(mp, "Invalid hostname");
956		error = EINVAL;
957		goto out;
958	}
959	bcopy(args.hostname, hst, MNAMELEN);
960	hst[MNAMELEN - 1] = '\0';
961
962	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
963		strlcpy(srvkrbname, name, sizeof (srvkrbname));
964	else
965		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
966	args.srvkrbnamelen = strlen(srvkrbname);
967
968	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
969		strlcpy(krbname, name, sizeof (krbname));
970	else
971		krbname[0] = '\0';
972	args.krbnamelen = strlen(krbname);
973
974	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
975		strlcpy(dirpath, name, sizeof (dirpath));
976	else
977		dirpath[0] = '\0';
978	args.dirlen = strlen(dirpath);
979
980	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
981	    &args.addrlen) == 0) {
982		if (args.addrlen > SOCK_MAXADDRLEN) {
983			error = ENAMETOOLONG;
984			goto out;
985		}
986		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
987		bcopy(args.addr, nam, args.addrlen);
988		nam->sa_len = args.addrlen;
989	}
990
991	args.fh = nfh;
992	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
993	    &vp, td->td_ucred, td);
994out:
995	if (!error) {
996		MNT_ILOCK(mp);
997		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
998		MNT_IUNLOCK(mp);
999	}
1000	return (error);
1001}
1002
1003
1004/*
1005 * VFS Operations.
1006 *
1007 * mount system call
1008 * It seems a bit dumb to copyinstr() the host and path here and then
1009 * bcopy() them in mountnfs(), but I wanted to detect errors before
1010 * doing the sockargs() call because sockargs() allocates an mbuf and
1011 * an error after that means that I have to release the mbuf.
1012 */
1013/* ARGSUSED */
1014static int
1015nfs_cmount(struct mntarg *ma, void *data, int flags)
1016{
1017	int error;
1018	struct nfs_args args;
1019
1020	error = copyin(data, &args, sizeof (struct nfs_args));
1021	if (error)
1022		return error;
1023
1024	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1025
1026	error = kernel_mount(ma, flags);
1027	return (error);
1028}
1029
1030/*
1031 * Common code for mount and mountroot
1032 */
1033static int
1034mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1035    char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
1036    struct vnode **vpp, struct ucred *cred, struct thread *td)
1037{
1038	struct nfsmount *nmp;
1039	struct nfsnode *np;
1040	int error, trycnt, ret;
1041	struct nfsvattr nfsva;
1042	static u_int64_t clval = 0;
1043
1044	if (mp->mnt_flag & MNT_UPDATE) {
1045		nmp = VFSTONFS(mp);
1046		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1047		FREE(nam, M_SONAME);
1048		return (0);
1049	} else {
1050		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1051		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
1052		    M_NEWNFSMNT, M_WAITOK);
1053		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
1054		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
1055		TAILQ_INIT(&nmp->nm_bufq);
1056		if (clval == 0)
1057			clval = (u_int64_t)nfsboottime.tv_sec;
1058		nmp->nm_clval = clval++;
1059		nmp->nm_krbnamelen = argp->krbnamelen;
1060		nmp->nm_dirpathlen = argp->dirlen;
1061		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
1062		if (td->td_ucred->cr_uid != (uid_t)0) {
1063			/*
1064			 * nm_uid is used to get KerberosV credentials for
1065			 * the nfsv4 state handling operations if there is
1066			 * no host based principal set. Use the uid of
1067			 * this user if not root, since they are doing the
1068			 * mount. I don't think setting this for root will
1069			 * work, since root normally does not have user
1070			 * credentials in a credentials cache.
1071			 */
1072			nmp->nm_uid = td->td_ucred->cr_uid;
1073		} else {
1074			/*
1075			 * Just set to -1, so it won't be used.
1076			 */
1077			nmp->nm_uid = (uid_t)-1;
1078		}
1079
1080		/* Copy and null terminate all the names */
1081		if (nmp->nm_krbnamelen > 0) {
1082			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1083			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1084		}
1085		if (nmp->nm_dirpathlen > 0) {
1086			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1087			    nmp->nm_dirpathlen);
1088			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1089			    + 1] = '\0';
1090		}
1091		if (nmp->nm_srvkrbnamelen > 0) {
1092			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1093			    nmp->nm_srvkrbnamelen);
1094			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1095			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1096		}
1097		nmp->nm_sockreq.nr_cred = crhold(cred);
1098		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1099		mp->mnt_data = nmp;
1100	}
1101	vfs_getnewfsid(mp);
1102	nmp->nm_mountp = mp;
1103	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1104
1105	nfs_decode_args(mp, nmp, argp, cred, td);
1106
1107	/*
1108	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1109	 * high, depending on whether we end up with negative offsets in
1110	 * the client or server somewhere.  2GB-1 may be safer.
1111	 *
1112	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1113	 * that we can handle until we find out otherwise.
1114	 * XXX Our "safe" limit on the client is what we can store in our
1115	 * buffer cache using signed(!) block numbers.
1116	 */
1117	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1118		nmp->nm_maxfilesize = 0xffffffffLL;
1119	else
1120		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1121
1122	nmp->nm_timeo = NFS_TIMEO;
1123	nmp->nm_retry = NFS_RETRANS;
1124	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1125		nmp->nm_wsize = NFS_WSIZE;
1126		nmp->nm_rsize = NFS_RSIZE;
1127		nmp->nm_readdirsize = NFS_READDIRSIZE;
1128	}
1129	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1130	nmp->nm_numgrps = NFS_MAXGRPS;
1131	nmp->nm_readahead = NFS_DEFRAHEAD;
1132	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1133	if (nmp->nm_tprintf_delay < 0)
1134		nmp->nm_tprintf_delay = 0;
1135	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1136	if (nmp->nm_tprintf_initial_delay < 0)
1137		nmp->nm_tprintf_initial_delay = 0;
1138	nmp->nm_fhsize = argp->fhsize;
1139	if (nmp->nm_fhsize > 0)
1140		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1141	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1142	nmp->nm_nam = nam;
1143	/* Set up the sockets and per-host congestion */
1144	nmp->nm_sotype = argp->sotype;
1145	nmp->nm_soproto = argp->proto;
1146	nmp->nm_sockreq.nr_prog = NFS_PROG;
1147	if ((argp->flags & NFSMNT_NFSV4))
1148		nmp->nm_sockreq.nr_vers = NFS_VER4;
1149	else if ((argp->flags & NFSMNT_NFSV3))
1150		nmp->nm_sockreq.nr_vers = NFS_VER3;
1151	else
1152		nmp->nm_sockreq.nr_vers = NFS_VER2;
1153
1154
1155	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1156		goto bad;
1157
1158	/*
1159	 * A reference count is needed on the nfsnode representing the
1160	 * remote root.  If this object is not persistent, then backward
1161	 * traversals of the mount point (i.e. "..") will not work if
1162	 * the nfsnode gets flushed out of the cache. Ufs does not have
1163	 * this problem, because one can identify root inodes by their
1164	 * number == ROOTINO (2).
1165	 */
1166	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1167	    nmp->nm_dirpathlen > 0) {
1168		/*
1169		 * If the fhsize on the mount point == 0 for V4, the mount
1170		 * path needs to be looked up.
1171		 */
1172		trycnt = 3;
1173		do {
1174			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1175			    cred, td);
1176			if (error)
1177				(void) nfs_catnap(PZERO, "nfsgetdirp");
1178		} while (error && --trycnt > 0);
1179		if (error) {
1180			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1181			goto bad;
1182		}
1183	}
1184	if (nmp->nm_fhsize > 0) {
1185		/*
1186		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1187		 * non-zero for the root vnode. f_iosize will be set correctly
1188		 * by nfs_statfs() before any I/O occurs.
1189		 */
1190		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1191		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1192		if (error)
1193			goto bad;
1194		*vpp = NFSTOV(np);
1195
1196		/*
1197		 * Get file attributes and transfer parameters for the
1198		 * mountpoint.  This has the side effect of filling in
1199		 * (*vpp)->v_type with the correct value.
1200		 */
1201		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1202		    cred, td, &nfsva, NULL);
1203		if (ret) {
1204			/*
1205			 * Just set default values to get things going.
1206			 */
1207			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1208			nfsva.na_vattr.va_type = VDIR;
1209			nfsva.na_vattr.va_mode = 0777;
1210			nfsva.na_vattr.va_nlink = 100;
1211			nfsva.na_vattr.va_uid = (uid_t)0;
1212			nfsva.na_vattr.va_gid = (gid_t)0;
1213			nfsva.na_vattr.va_fileid = 2;
1214			nfsva.na_vattr.va_gen = 1;
1215			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1216			nfsva.na_vattr.va_size = 512 * 1024;
1217		}
1218		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1219		if (argp->flags & NFSMNT_NFSV3)
1220			ncl_fsinfo(nmp, *vpp, cred, td);
1221
1222		/*
1223		 * Lose the lock but keep the ref.
1224		 */
1225		VOP_UNLOCK(*vpp, 0);
1226		return (0);
1227	}
1228	error = EIO;
1229
1230bad:
1231	newnfs_disconnect(&nmp->nm_sockreq);
1232	crfree(nmp->nm_sockreq.nr_cred);
1233	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1234	mtx_destroy(&nmp->nm_mtx);
1235	FREE(nmp, M_NEWNFSMNT);
1236	FREE(nam, M_SONAME);
1237	return (error);
1238}
1239
1240/*
1241 * unmount system call
1242 */
1243static int
1244nfs_unmount(struct mount *mp, int mntflags)
1245{
1246	struct thread *td;
1247	struct nfsmount *nmp;
1248	int error, flags = 0, trycnt = 0;
1249
1250	td = curthread;
1251
1252	if (mntflags & MNT_FORCE)
1253		flags |= FORCECLOSE;
1254	nmp = VFSTONFS(mp);
1255	/*
1256	 * Goes something like this..
1257	 * - Call vflush() to clear out vnodes for this filesystem
1258	 * - Close the socket
1259	 * - Free up the data structures
1260	 */
1261	/* In the forced case, cancel any outstanding requests. */
1262	if (mntflags & MNT_FORCE) {
1263		error = newnfs_nmcancelreqs(nmp);
1264		if (error)
1265			goto out;
1266		/* For a forced close, get rid of the renew thread now */
1267		nfscl_umount(nmp, td);
1268	}
1269	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1270	do {
1271		error = vflush(mp, 1, flags, td);
1272		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1273			(void) nfs_catnap(PSOCK, "newndm");
1274	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1275	if (error)
1276		goto out;
1277
1278	/*
1279	 * We are now committed to the unmount.
1280	 */
1281	if ((mntflags & MNT_FORCE) == 0)
1282		nfscl_umount(nmp, td);
1283	newnfs_disconnect(&nmp->nm_sockreq);
1284	crfree(nmp->nm_sockreq.nr_cred);
1285	FREE(nmp->nm_nam, M_SONAME);
1286
1287	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1288	mtx_destroy(&nmp->nm_mtx);
1289	FREE(nmp, M_NEWNFSMNT);
1290out:
1291	return (error);
1292}
1293
1294/*
1295 * Return root of a filesystem
1296 */
1297static int
1298nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1299{
1300	struct vnode *vp;
1301	struct nfsmount *nmp;
1302	struct nfsnode *np;
1303	int error;
1304
1305	nmp = VFSTONFS(mp);
1306	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1307	if (error)
1308		return error;
1309	vp = NFSTOV(np);
1310	/*
1311	 * Get transfer parameters and attributes for root vnode once.
1312	 */
1313	mtx_lock(&nmp->nm_mtx);
1314	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1315		mtx_unlock(&nmp->nm_mtx);
1316		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1317	} else
1318		mtx_unlock(&nmp->nm_mtx);
1319	if (vp->v_type == VNON)
1320	    vp->v_type = VDIR;
1321	vp->v_vflag |= VV_ROOT;
1322	*vpp = vp;
1323	return (0);
1324}
1325
1326/*
1327 * Flush out the buffer cache
1328 */
1329/* ARGSUSED */
1330static int
1331nfs_sync(struct mount *mp, int waitfor)
1332{
1333	struct vnode *vp, *mvp;
1334	struct thread *td;
1335	int error, allerror = 0;
1336
1337	td = curthread;
1338
1339	/*
1340	 * Force stale buffer cache information to be flushed.
1341	 */
1342	MNT_ILOCK(mp);
1343loop:
1344	MNT_VNODE_FOREACH(vp, mp, mvp) {
1345		VI_LOCK(vp);
1346		MNT_IUNLOCK(mp);
1347		/* XXX Racy bv_cnt check. */
1348		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1349		    waitfor == MNT_LAZY) {
1350			VI_UNLOCK(vp);
1351			MNT_ILOCK(mp);
1352			continue;
1353		}
1354		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1355			MNT_ILOCK(mp);
1356			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1357			goto loop;
1358		}
1359		error = VOP_FSYNC(vp, waitfor, td);
1360		if (error)
1361			allerror = error;
1362		VOP_UNLOCK(vp, 0);
1363		vrele(vp);
1364
1365		MNT_ILOCK(mp);
1366	}
1367	MNT_IUNLOCK(mp);
1368	return (allerror);
1369}
1370
1371static int
1372nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1373{
1374	struct nfsmount *nmp = VFSTONFS(mp);
1375	struct vfsquery vq;
1376	int error;
1377
1378	bzero(&vq, sizeof(vq));
1379	switch (op) {
1380#if 0
1381	case VFS_CTL_NOLOCKS:
1382		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1383 		if (req->oldptr != NULL) {
1384 			error = SYSCTL_OUT(req, &val, sizeof(val));
1385 			if (error)
1386 				return (error);
1387 		}
1388 		if (req->newptr != NULL) {
1389 			error = SYSCTL_IN(req, &val, sizeof(val));
1390 			if (error)
1391 				return (error);
1392			if (val)
1393				nmp->nm_flag |= NFSMNT_NOLOCKS;
1394			else
1395				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1396 		}
1397		break;
1398#endif
1399	case VFS_CTL_QUERY:
1400		mtx_lock(&nmp->nm_mtx);
1401		if (nmp->nm_state & NFSSTA_TIMEO)
1402			vq.vq_flags |= VQ_NOTRESP;
1403		mtx_unlock(&nmp->nm_mtx);
1404#if 0
1405		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1406		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1407			vq.vq_flags |= VQ_NOTRESPLOCK;
1408#endif
1409		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1410		break;
1411 	case VFS_CTL_TIMEO:
1412 		if (req->oldptr != NULL) {
1413 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1414 			    sizeof(nmp->nm_tprintf_initial_delay));
1415 			if (error)
1416 				return (error);
1417 		}
1418 		if (req->newptr != NULL) {
1419			error = vfs_suser(mp, req->td);
1420			if (error)
1421				return (error);
1422 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1423 			    sizeof(nmp->nm_tprintf_initial_delay));
1424 			if (error)
1425 				return (error);
1426 			if (nmp->nm_tprintf_initial_delay < 0)
1427 				nmp->nm_tprintf_initial_delay = 0;
1428 		}
1429		break;
1430	default:
1431		return (ENOTSUP);
1432	}
1433	return (0);
1434}
1435
1436