nfs_clvfsops.c revision 194118
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 194118 2009-06-13 15:39:12Z jamie $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/lock.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/module.h>
53#include <sys/mount.h>
54#include <sys/proc.h>
55#include <sys/socket.h>
56#include <sys/socketvar.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/vnode.h>
60#include <sys/signalvar.h>
61
62#include <vm/vm.h>
63#include <vm/vm_extern.h>
64#include <vm/uma.h>
65
66#include <net/if.h>
67#include <net/route.h>
68#include <netinet/in.h>
69
70#include <fs/nfs/nfsport.h>
71#include <fs/nfsclient/nfsnode.h>
72#include <fs/nfsclient/nfsmount.h>
73#include <fs/nfsclient/nfs.h>
74#include <fs/nfsclient/nfsdiskless.h>
75
76extern int nfscl_ticks;
77extern struct timeval nfsboottime;
78extern struct nfsstats	newnfsstats;
79
80MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
81MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
82
83SYSCTL_DECL(_vfs_newnfs);
84SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
85	&newnfsstats, nfsstats, "S,nfsstats");
86static int nfs_ip_paranoia = 1;
87SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
88    &nfs_ip_paranoia, 0, "");
89static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
90SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
91        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
92/* how long between console messages "nfs server foo not responding" */
93static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
94SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
95        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
96
97static void	nfs_sec_name(char *, int *);
98static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
99		    struct nfs_args *argp, struct ucred *, struct thread *);
100static int	mountnfs(struct nfs_args *, struct mount *,
101		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
102		    struct vnode **, struct ucred *, struct thread *);
103static vfs_mount_t nfs_mount;
104static vfs_cmount_t nfs_cmount;
105static vfs_unmount_t nfs_unmount;
106static vfs_root_t nfs_root;
107static vfs_statfs_t nfs_statfs;
108static vfs_sync_t nfs_sync;
109static vfs_sysctl_t nfs_sysctl;
110
111/*
112 * nfs vfs operations.
113 */
114static struct vfsops nfs_vfsops = {
115	.vfs_init =		ncl_init,
116	.vfs_mount =		nfs_mount,
117	.vfs_cmount =		nfs_cmount,
118	.vfs_root =		nfs_root,
119	.vfs_statfs =		nfs_statfs,
120	.vfs_sync =		nfs_sync,
121	.vfs_uninit =		ncl_uninit,
122	.vfs_unmount =		nfs_unmount,
123	.vfs_sysctl =		nfs_sysctl,
124};
125VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
126
127/* So that loader and kldload(2) can find us, wherever we are.. */
128MODULE_VERSION(newnfs, 1);
129
130/*
131 * This structure must be filled in by a primary bootstrap or bootstrap
132 * server for a diskless/dataless machine. It is initialized below just
133 * to ensure that it is allocated to initialized data (.data not .bss).
134 */
135struct nfs_diskless newnfs_diskless = { { { 0 } } };
136struct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
137int newnfs_diskless_valid = 0;
138
139SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
140    &newnfs_diskless_valid, 0,
141    "Has the diskless struct been filled correctly");
142
143SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
144    newnfsv3_diskless.root_hostnam, 0, "Path to nfs root");
145
146SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
147    &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
148    "%Ssockaddr_in", "Diskless root nfs address");
149
150
151void		newnfsargs_ntoh(struct nfs_args *);
152static int	nfs_mountdiskless(char *,
153		    struct sockaddr_in *, struct nfs_args *,
154		    struct thread *, struct vnode **, struct mount *);
155static void	nfs_convert_diskless(void);
156static void	nfs_convert_oargs(struct nfs_args *args,
157		    struct onfs_args *oargs);
158
159int
160newnfs_iosize(struct nfsmount *nmp)
161{
162	int iosize, maxio;
163
164	/* First, set the upper limit for iosize */
165	if (nmp->nm_flag & NFSMNT_NFSV4) {
166		maxio = NFS_MAXBSIZE;
167	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
168		if (nmp->nm_sotype == SOCK_DGRAM)
169			maxio = NFS_MAXDGRAMDATA;
170		else
171			maxio = NFS_MAXBSIZE;
172	} else {
173		maxio = NFS_V2MAXDATA;
174	}
175	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
176		nmp->nm_rsize = maxio;
177	if (nmp->nm_rsize > MAXBSIZE)
178		nmp->nm_rsize = MAXBSIZE;
179	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
180		nmp->nm_readdirsize = maxio;
181	if (nmp->nm_readdirsize > nmp->nm_rsize)
182		nmp->nm_readdirsize = nmp->nm_rsize;
183	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
184		nmp->nm_wsize = maxio;
185	if (nmp->nm_wsize > MAXBSIZE)
186		nmp->nm_wsize = MAXBSIZE;
187
188	/*
189	 * Calculate the size used for io buffers.  Use the larger
190	 * of the two sizes to minimise nfs requests but make sure
191	 * that it is at least one VM page to avoid wasting buffer
192	 * space.
193	 */
194	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
195	iosize = imax(iosize, PAGE_SIZE);
196	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
197	return (iosize);
198}
199
200static void
201nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
202{
203
204	args->version = NFS_ARGSVERSION;
205	args->addr = oargs->addr;
206	args->addrlen = oargs->addrlen;
207	args->sotype = oargs->sotype;
208	args->proto = oargs->proto;
209	args->fh = oargs->fh;
210	args->fhsize = oargs->fhsize;
211	args->flags = oargs->flags;
212	args->wsize = oargs->wsize;
213	args->rsize = oargs->rsize;
214	args->readdirsize = oargs->readdirsize;
215	args->timeo = oargs->timeo;
216	args->retrans = oargs->retrans;
217	args->readahead = oargs->readahead;
218	args->hostname = oargs->hostname;
219}
220
221static void
222nfs_convert_diskless(void)
223{
224
225	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
226	    sizeof (struct ifaliasreq));
227	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
228	    sizeof (struct sockaddr_in));
229	nfs_convert_oargs(&newnfsv3_diskless.root_args,
230	    &newnfs_diskless.root_args);
231	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
232		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
233		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
234		    NFSX_MYFH);
235	} else {
236		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
237		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
238		    NFSX_V2FH);
239	}
240	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
241	    sizeof(struct sockaddr_in));
242	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam,
243	    MNAMELEN);
244	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
245	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
246	    MAXHOSTNAMELEN);
247	newnfs_diskless_valid = 3;
248}
249
250/*
251 * nfs statfs call
252 */
253static int
254nfs_statfs(struct mount *mp, struct statfs *sbp)
255{
256	struct vnode *vp;
257	struct thread *td;
258	struct nfsmount *nmp = VFSTONFS(mp);
259	struct nfsvattr nfsva;
260	struct nfsfsinfo fs;
261	struct nfsstatfs sb;
262	int error = 0, attrflag, gotfsinfo = 0, ret;
263	struct nfsnode *np;
264
265	td = curthread;
266
267	error = vfs_busy(mp, MBF_NOWAIT);
268	if (error)
269		return (error);
270	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
271	if (error) {
272		vfs_unbusy(mp);
273		return (error);
274	}
275	vp = NFSTOV(np);
276	mtx_lock(&nmp->nm_mtx);
277	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
278		mtx_unlock(&nmp->nm_mtx);
279		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
280		    &attrflag, NULL);
281		if (!error)
282			gotfsinfo = 1;
283	} else
284		mtx_unlock(&nmp->nm_mtx);
285	if (!error)
286		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
287		    &attrflag, NULL);
288	if (attrflag == 0) {
289		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
290		    td->td_ucred, td, &nfsva, NULL);
291		if (ret) {
292			/*
293			 * Just set default values to get things going.
294			 */
295			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
296			nfsva.na_vattr.va_type = VDIR;
297			nfsva.na_vattr.va_mode = 0777;
298			nfsva.na_vattr.va_nlink = 100;
299			nfsva.na_vattr.va_uid = (uid_t)0;
300			nfsva.na_vattr.va_gid = (gid_t)0;
301			nfsva.na_vattr.va_fileid = 2;
302			nfsva.na_vattr.va_gen = 1;
303			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
304			nfsva.na_vattr.va_size = 512 * 1024;
305		}
306	}
307	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
308	if (!error) {
309	    mtx_lock(&nmp->nm_mtx);
310	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
311		nfscl_loadfsinfo(nmp, &fs);
312	    nfscl_loadsbinfo(nmp, &sb, sbp);
313	    sbp->f_flags = nmp->nm_flag;
314	    sbp->f_iosize = newnfs_iosize(nmp);
315	    mtx_unlock(&nmp->nm_mtx);
316	    if (sbp != &mp->mnt_stat) {
317		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
318		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
319	    }
320	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
321	} else if (NFS_ISV4(vp)) {
322		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
323	}
324	vput(vp);
325	vfs_unbusy(mp);
326	return (error);
327}
328
329/*
330 * nfs version 3 fsinfo rpc call
331 */
332int
333ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
334    struct thread *td)
335{
336	struct nfsfsinfo fs;
337	struct nfsvattr nfsva;
338	int error, attrflag;
339
340	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
341	if (!error) {
342		if (attrflag)
343			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
344			    1);
345		mtx_lock(&nmp->nm_mtx);
346		nfscl_loadfsinfo(nmp, &fs);
347		mtx_unlock(&nmp->nm_mtx);
348	}
349	return (error);
350}
351
352/*
353 * Mount a remote root fs via. nfs. This depends on the info in the
354 * newnfs_diskless structure that has been filled in properly by some primary
355 * bootstrap.
356 * It goes something like this:
357 * - do enough of "ifconfig" by calling ifioctl() so that the system
358 *   can talk to the server
359 * - If newnfs_diskless.mygateway is filled in, use that address as
360 *   a default gateway.
361 * - build the rootfs mount point and call mountnfs() to do the rest.
362 *
363 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
364 * structure, as well as other global NFS client variables here, as
365 * nfs_mountroot() will be called once in the boot before any other NFS
366 * client activity occurs.
367 */
368int
369ncl_mountroot(struct mount *mp)
370{
371	struct thread *td = curthread;
372	struct nfsv3_diskless *nd = &newnfsv3_diskless;
373	struct socket *so;
374	struct vnode *vp;
375	struct ifreq ir;
376	int error;
377	u_long l;
378	char buf[128];
379	char *cp;
380
381#if defined(BOOTP_NFSROOT) && defined(BOOTP)
382	bootpc_init();		/* use bootp to get nfs_diskless filled in */
383#elif defined(NFS_ROOT)
384	nfs_setup_diskless();
385#endif
386
387	if (newnfs_diskless_valid == 0)
388		return (-1);
389	if (newnfs_diskless_valid == 1)
390		nfs_convert_diskless();
391
392	/*
393	 * XXX splnet, so networks will receive...
394	 */
395	splnet();
396
397	/*
398	 * Do enough of ifconfig(8) so that the critical net interface can
399	 * talk to the server.
400	 */
401	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
402	    td->td_ucred, td);
403	if (error)
404		panic("nfs_mountroot: socreate(%04x): %d",
405			nd->myif.ifra_addr.sa_family, error);
406
407#if 0 /* XXX Bad idea */
408	/*
409	 * We might not have been told the right interface, so we pass
410	 * over the first ten interfaces of the same kind, until we get
411	 * one of them configured.
412	 */
413
414	for (i = strlen(nd->myif.ifra_name) - 1;
415		nd->myif.ifra_name[i] >= '0' &&
416		nd->myif.ifra_name[i] <= '9';
417		nd->myif.ifra_name[i] ++) {
418		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
419		if(!error)
420			break;
421	}
422#endif
423	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
424	if (error)
425		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
426	if ((cp = getenv("boot.netif.mtu")) != NULL) {
427		ir.ifr_mtu = strtol(cp, NULL, 10);
428		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
429		freeenv(cp);
430		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
431		if (error)
432			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
433	}
434	soclose(so);
435
436	/*
437	 * If the gateway field is filled in, set it as the default route.
438	 * Note that pxeboot will set a default route of 0 if the route
439	 * is not set by the DHCP server.  Check also for a value of 0
440	 * to avoid panicking inappropriately in that situation.
441	 */
442	if (nd->mygateway.sin_len != 0 &&
443	    nd->mygateway.sin_addr.s_addr != 0) {
444		struct sockaddr_in mask, sin;
445
446		bzero((caddr_t)&mask, sizeof(mask));
447		sin = mask;
448		sin.sin_family = AF_INET;
449		sin.sin_len = sizeof(sin);
450                /* XXX MRT use table 0 for this sort of thing */
451		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
452		    (struct sockaddr *)&nd->mygateway,
453		    (struct sockaddr *)&mask,
454		    RTF_UP | RTF_GATEWAY, NULL);
455		if (error)
456			panic("nfs_mountroot: RTM_ADD: %d", error);
457	}
458
459	/*
460	 * Create the rootfs mount point.
461	 */
462	nd->root_args.fh = nd->root_fh;
463	nd->root_args.fhsize = nd->root_fhsize;
464	l = ntohl(nd->root_saddr.sin_addr.s_addr);
465	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
466		(l >> 24) & 0xff, (l >> 16) & 0xff,
467		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
468	printf("NFS ROOT: %s\n", buf);
469	nd->root_args.hostname = buf;
470	if ((error = nfs_mountdiskless(buf,
471	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
472		return (error);
473	}
474
475	/*
476	 * This is not really an nfs issue, but it is much easier to
477	 * set hostname here and then let the "/etc/rc.xxx" files
478	 * mount the right /var based upon its preset value.
479	 */
480	mtx_lock(&prison0.pr_mtx);
481	strlcpy(prison0.pr_hostname, nd->my_hostnam,
482	    sizeof(prison0.pr_hostname));
483	mtx_unlock(&prison0.pr_mtx);
484	inittodr(ntohl(nd->root_time));
485	return (0);
486}
487
488/*
489 * Internal version of mount system call for diskless setup.
490 */
491static int
492nfs_mountdiskless(char *path,
493    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
494    struct vnode **vpp, struct mount *mp)
495{
496	struct sockaddr *nam;
497	int error;
498
499	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
500	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
501	    td->td_ucred, td)) != 0) {
502		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
503		return (error);
504	}
505	return (0);
506}
507
508static void
509nfs_sec_name(char *sec, int *flagsp)
510{
511	if (!strcmp(sec, "krb5"))
512		*flagsp |= NFSMNT_KERB;
513	else if (!strcmp(sec, "krb5i"))
514		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
515	else if (!strcmp(sec, "krb5p"))
516		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
517}
518
519static void
520nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
521    struct ucred *cred, struct thread *td)
522{
523	int s;
524	int adjsock;
525
526	s = splnet();
527
528	/*
529	 * Set read-only flag if requested; otherwise, clear it if this is
530	 * an update.  If this is not an update, then either the read-only
531	 * flag is already clear, or this is a root mount and it was set
532	 * intentionally at some previous point.
533	 */
534	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
535		MNT_ILOCK(mp);
536		mp->mnt_flag |= MNT_RDONLY;
537		MNT_IUNLOCK(mp);
538	} else if (mp->mnt_flag & MNT_UPDATE) {
539		MNT_ILOCK(mp);
540		mp->mnt_flag &= ~MNT_RDONLY;
541		MNT_IUNLOCK(mp);
542	}
543
544	/*
545	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
546	 * no sense in that context.  Also, set up appropriate retransmit
547	 * and soft timeout behavior.
548	 */
549	if (argp->sotype == SOCK_STREAM) {
550		nmp->nm_flag &= ~NFSMNT_NOCONN;
551		nmp->nm_timeo = NFS_MAXTIMEO;
552	}
553
554	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
555	if ((argp->flags & NFSMNT_NFSV3) == 0)
556		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
557
558	/* Also re-bind if we're switching to/from a connected UDP socket */
559	adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) !=
560		    (argp->flags & NFSMNT_NOCONN));
561
562	/* Update flags atomically.  Don't change the lock bits. */
563	nmp->nm_flag = argp->flags | nmp->nm_flag;
564	splx(s);
565
566	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
567		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
568		if (nmp->nm_timeo < NFS_MINTIMEO)
569			nmp->nm_timeo = NFS_MINTIMEO;
570		else if (nmp->nm_timeo > NFS_MAXTIMEO)
571			nmp->nm_timeo = NFS_MAXTIMEO;
572	}
573
574	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
575		nmp->nm_retry = argp->retrans;
576		if (nmp->nm_retry > NFS_MAXREXMIT)
577			nmp->nm_retry = NFS_MAXREXMIT;
578	}
579
580	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
581		nmp->nm_wsize = argp->wsize;
582		/* Round down to multiple of blocksize */
583		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
584		if (nmp->nm_wsize <= 0)
585			nmp->nm_wsize = NFS_FABLKSIZE;
586	}
587
588	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
589		nmp->nm_rsize = argp->rsize;
590		/* Round down to multiple of blocksize */
591		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
592		if (nmp->nm_rsize <= 0)
593			nmp->nm_rsize = NFS_FABLKSIZE;
594	}
595
596	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
597		nmp->nm_readdirsize = argp->readdirsize;
598	}
599
600	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
601		nmp->nm_acregmin = argp->acregmin;
602	else
603		nmp->nm_acregmin = NFS_MINATTRTIMO;
604	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
605		nmp->nm_acregmax = argp->acregmax;
606	else
607		nmp->nm_acregmax = NFS_MAXATTRTIMO;
608	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
609		nmp->nm_acdirmin = argp->acdirmin;
610	else
611		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
612	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
613		nmp->nm_acdirmax = argp->acdirmax;
614	else
615		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
616	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
617		nmp->nm_acdirmin = nmp->nm_acdirmax;
618	if (nmp->nm_acregmin > nmp->nm_acregmax)
619		nmp->nm_acregmin = nmp->nm_acregmax;
620
621	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
622		if (argp->readahead <= NFS_MAXRAHEAD)
623			nmp->nm_readahead = argp->readahead;
624		else
625			nmp->nm_readahead = NFS_MAXRAHEAD;
626	}
627	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
628		if (argp->wcommitsize < nmp->nm_wsize)
629			nmp->nm_wcommitsize = nmp->nm_wsize;
630		else
631			nmp->nm_wcommitsize = argp->wcommitsize;
632	}
633
634	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
635		    (nmp->nm_soproto != argp->proto));
636
637	if (nmp->nm_client != NULL && adjsock) {
638		int haslock = 0, error = 0;
639
640		if (nmp->nm_sotype == SOCK_STREAM) {
641			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
642			if (!error)
643				haslock = 1;
644		}
645		if (!error) {
646		    newnfs_disconnect(&nmp->nm_sockreq);
647		    if (haslock)
648			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
649		    nmp->nm_sotype = argp->sotype;
650		    nmp->nm_soproto = argp->proto;
651		    if (nmp->nm_sotype == SOCK_DGRAM)
652			while (newnfs_connect(nmp, &nmp->nm_sockreq,
653			    cred, td, 0)) {
654				printf("newnfs_args: retrying connect\n");
655				(void) nfs_catnap(PSOCK, "newnfscon");
656			}
657		}
658	} else {
659		nmp->nm_sotype = argp->sotype;
660		nmp->nm_soproto = argp->proto;
661	}
662}
663
664static const char *nfs_opts[] = { "from",
665    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
666    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
667    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
668    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
669    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
670    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
671    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
672    NULL };
673
674/*
675 * VFS Operations.
676 *
677 * mount system call
678 * It seems a bit dumb to copyinstr() the host and path here and then
679 * bcopy() them in mountnfs(), but I wanted to detect errors before
680 * doing the sockargs() call because sockargs() allocates an mbuf and
681 * an error after that means that I have to release the mbuf.
682 */
683/* ARGSUSED */
684static int
685nfs_mount(struct mount *mp)
686{
687	struct nfs_args args = {
688	    .version = NFS_ARGSVERSION,
689	    .addr = NULL,
690	    .addrlen = sizeof (struct sockaddr_in),
691	    .sotype = SOCK_STREAM,
692	    .proto = 0,
693	    .fh = NULL,
694	    .fhsize = 0,
695	    .flags = 0,
696	    .wsize = NFS_WSIZE,
697	    .rsize = NFS_RSIZE,
698	    .readdirsize = NFS_READDIRSIZE,
699	    .timeo = 10,
700	    .retrans = NFS_RETRANS,
701	    .readahead = NFS_DEFRAHEAD,
702	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
703	    .hostname = NULL,
704	    /* args version 4 */
705	    .acregmin = NFS_MINATTRTIMO,
706	    .acregmax = NFS_MAXATTRTIMO,
707	    .acdirmin = NFS_MINDIRATTRTIMO,
708	    .acdirmax = NFS_MAXDIRATTRTIMO,
709	    .dirlen = 0,
710	    .krbnamelen = 0,
711	    .srvkrbnamelen = 0,
712	};
713	int error = 0, ret, len;
714	struct sockaddr *nam = NULL;
715	struct vnode *vp;
716	struct thread *td;
717	char hst[MNAMELEN];
718	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
719	char *opt, *name, *secname;
720
721	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
722		error = EINVAL;
723		goto out;
724	}
725
726	td = curthread;
727	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
728		error = ncl_mountroot(mp);
729		goto out;
730	}
731
732	nfscl_init();
733
734	/* Handle the new style options. */
735	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
736		args.flags |= NFSMNT_NOCONN;
737	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
738		args.flags |= NFSMNT_NOCONN;
739	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
740		args.flags |= NFSMNT_NOLOCKD;
741	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
742		args.flags &= ~NFSMNT_NOLOCKD;
743	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
744		args.flags |= NFSMNT_INT;
745	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
746		args.flags |= NFSMNT_RDIRPLUS;
747	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
748		args.flags |= NFSMNT_RESVPORT;
749	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
750		args.flags &= ~NFSMNT_RESVPORT;
751	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
752		args.flags |= NFSMNT_SOFT;
753	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
754		args.flags &= ~NFSMNT_SOFT;
755	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
756		args.sotype = SOCK_DGRAM;
757	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
758		args.sotype = SOCK_DGRAM;
759	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
760		args.sotype = SOCK_STREAM;
761	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
762		args.flags |= NFSMNT_NFSV3;
763	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
764		args.flags |= NFSMNT_NFSV4;
765		args.sotype = SOCK_STREAM;
766	}
767	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
768		args.flags |= NFSMNT_ALLGSSNAME;
769	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
770		if (opt == NULL) {
771			vfs_mount_error(mp, "illegal readdirsize");
772			error = EINVAL;
773			goto out;
774		}
775		ret = sscanf(opt, "%d", &args.readdirsize);
776		if (ret != 1 || args.readdirsize <= 0) {
777			vfs_mount_error(mp, "illegal readdirsize: %s",
778			    opt);
779			error = EINVAL;
780			goto out;
781		}
782		args.flags |= NFSMNT_READDIRSIZE;
783	}
784	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
785		if (opt == NULL) {
786			vfs_mount_error(mp, "illegal readahead");
787			error = EINVAL;
788			goto out;
789		}
790		ret = sscanf(opt, "%d", &args.readahead);
791		if (ret != 1 || args.readahead <= 0) {
792			vfs_mount_error(mp, "illegal readahead: %s",
793			    opt);
794			error = EINVAL;
795			goto out;
796		}
797		args.flags |= NFSMNT_READAHEAD;
798	}
799	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
800		if (opt == NULL) {
801			vfs_mount_error(mp, "illegal wsize");
802			error = EINVAL;
803			goto out;
804		}
805		ret = sscanf(opt, "%d", &args.wsize);
806		if (ret != 1 || args.wsize <= 0) {
807			vfs_mount_error(mp, "illegal wsize: %s",
808			    opt);
809			error = EINVAL;
810			goto out;
811		}
812		args.flags |= NFSMNT_WSIZE;
813	}
814	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
815		if (opt == NULL) {
816			vfs_mount_error(mp, "illegal rsize");
817			error = EINVAL;
818			goto out;
819		}
820		ret = sscanf(opt, "%d", &args.rsize);
821		if (ret != 1 || args.rsize <= 0) {
822			vfs_mount_error(mp, "illegal wsize: %s",
823			    opt);
824			error = EINVAL;
825			goto out;
826		}
827		args.flags |= NFSMNT_RSIZE;
828	}
829	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
830		if (opt == NULL) {
831			vfs_mount_error(mp, "illegal retrans");
832			error = EINVAL;
833			goto out;
834		}
835		ret = sscanf(opt, "%d", &args.retrans);
836		if (ret != 1 || args.retrans <= 0) {
837			vfs_mount_error(mp, "illegal retrans: %s",
838			    opt);
839			error = EINVAL;
840			goto out;
841		}
842		args.flags |= NFSMNT_RETRANS;
843	}
844	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
845		ret = sscanf(opt, "%d", &args.acregmin);
846		if (ret != 1 || args.acregmin < 0) {
847			vfs_mount_error(mp, "illegal acregmin: %s",
848			    opt);
849			error = EINVAL;
850			goto out;
851		}
852		args.flags |= NFSMNT_ACREGMIN;
853	}
854	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
855		ret = sscanf(opt, "%d", &args.acregmax);
856		if (ret != 1 || args.acregmax < 0) {
857			vfs_mount_error(mp, "illegal acregmax: %s",
858			    opt);
859			error = EINVAL;
860			goto out;
861		}
862		args.flags |= NFSMNT_ACREGMAX;
863	}
864	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
865		ret = sscanf(opt, "%d", &args.acdirmin);
866		if (ret != 1 || args.acdirmin < 0) {
867			vfs_mount_error(mp, "illegal acdirmin: %s",
868			    opt);
869			error = EINVAL;
870			goto out;
871		}
872		args.flags |= NFSMNT_ACDIRMIN;
873	}
874	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
875		ret = sscanf(opt, "%d", &args.acdirmax);
876		if (ret != 1 || args.acdirmax < 0) {
877			vfs_mount_error(mp, "illegal acdirmax: %s",
878			    opt);
879			error = EINVAL;
880			goto out;
881		}
882		args.flags |= NFSMNT_ACDIRMAX;
883	}
884	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
885		ret = sscanf(opt, "%d", &args.timeo);
886		if (ret != 1 || args.timeo <= 0) {
887			vfs_mount_error(mp, "illegal timeout: %s",
888			    opt);
889			error = EINVAL;
890			goto out;
891		}
892		args.flags |= NFSMNT_TIMEO;
893	}
894	if (vfs_getopt(mp->mnt_optnew, "sec",
895		(void **) &secname, NULL) == 0)
896		nfs_sec_name(secname, &args.flags);
897
898	if (mp->mnt_flag & MNT_UPDATE) {
899		struct nfsmount *nmp = VFSTONFS(mp);
900
901		if (nmp == NULL) {
902			error = EIO;
903			goto out;
904		}
905		/*
906		 * When doing an update, we can't change version,
907		 * security, switch lockd strategies or change cookie
908		 * translation
909		 */
910		args.flags = (args.flags &
911		    ~(NFSMNT_NFSV3 |
912		      NFSMNT_NFSV4 |
913		      NFSMNT_KERB |
914		      NFSMNT_INTEGRITY |
915		      NFSMNT_PRIVACY |
916		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
917		    (nmp->nm_flag &
918			(NFSMNT_NFSV3 |
919			 NFSMNT_NFSV4 |
920			 NFSMNT_KERB |
921			 NFSMNT_INTEGRITY |
922			 NFSMNT_PRIVACY |
923			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
924		nfs_decode_args(mp, nmp, &args, td->td_ucred, td);
925		goto out;
926	}
927
928	/*
929	 * Make the nfs_ip_paranoia sysctl serve as the default connection
930	 * or no-connection mode for those protocols that support
931	 * no-connection mode (the flag will be cleared later for protocols
932	 * that do not support no-connection mode).  This will allow a client
933	 * to receive replies from a different IP then the request was
934	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
935	 * not 0.
936	 */
937	if (nfs_ip_paranoia == 0)
938		args.flags |= NFSMNT_NOCONN;
939
940	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
941	    &args.fhsize) == 0) {
942		if (args.fhsize > NFSX_FHMAX) {
943			vfs_mount_error(mp, "Bad file handle");
944			error = EINVAL;
945			goto out;
946		}
947		bcopy(args.fh, nfh, args.fhsize);
948	} else {
949		args.fhsize = 0;
950	}
951
952	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
953	    &len);
954	if (args.hostname == NULL) {
955		vfs_mount_error(mp, "Invalid hostname");
956		error = EINVAL;
957		goto out;
958	}
959	bcopy(args.hostname, hst, MNAMELEN);
960	hst[MNAMELEN - 1] = '\0';
961
962	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
963		strlcpy(srvkrbname, name, sizeof (srvkrbname));
964	else
965		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
966	args.srvkrbnamelen = strlen(srvkrbname);
967
968	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
969		strlcpy(krbname, name, sizeof (krbname));
970	else
971		krbname[0] = '\0';
972	args.krbnamelen = strlen(krbname);
973
974	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
975		strlcpy(dirpath, name, sizeof (dirpath));
976	else
977		dirpath[0] = '\0';
978	args.dirlen = strlen(dirpath);
979
980	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
981	    &args.addrlen) == 0) {
982		if (args.addrlen > SOCK_MAXADDRLEN) {
983			error = ENAMETOOLONG;
984			goto out;
985		}
986		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
987		bcopy(args.addr, nam, args.addrlen);
988		nam->sa_len = args.addrlen;
989	}
990
991	args.fh = nfh;
992	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
993	    &vp, td->td_ucred, td);
994out:
995	if (!error) {
996		MNT_ILOCK(mp);
997		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
998		MNT_IUNLOCK(mp);
999	}
1000	return (error);
1001}
1002
1003
1004/*
1005 * VFS Operations.
1006 *
1007 * mount system call
1008 * It seems a bit dumb to copyinstr() the host and path here and then
1009 * bcopy() them in mountnfs(), but I wanted to detect errors before
1010 * doing the sockargs() call because sockargs() allocates an mbuf and
1011 * an error after that means that I have to release the mbuf.
1012 */
1013/* ARGSUSED */
1014static int
1015nfs_cmount(struct mntarg *ma, void *data, int flags)
1016{
1017	int error;
1018	struct nfs_args args;
1019
1020	error = copyin(data, &args, sizeof (struct nfs_args));
1021	if (error)
1022		return error;
1023
1024	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1025
1026	error = kernel_mount(ma, flags);
1027	return (error);
1028}
1029
1030/*
1031 * Common code for mount and mountroot
1032 */
1033static int
1034mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1035    char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
1036    struct vnode **vpp, struct ucred *cred, struct thread *td)
1037{
1038	struct nfsmount *nmp;
1039	struct nfsnode *np;
1040	int error, trycnt, ret, clearintr;
1041	struct nfsvattr nfsva;
1042	static u_int64_t clval = 0;
1043
1044	if (mp->mnt_flag & MNT_UPDATE) {
1045		nmp = VFSTONFS(mp);
1046		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1047		FREE(nam, M_SONAME);
1048		return (0);
1049	} else {
1050		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1051		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
1052		    M_NEWNFSMNT, M_WAITOK);
1053		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
1054		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
1055		TAILQ_INIT(&nmp->nm_bufq);
1056		if (clval == 0)
1057			clval = (u_int64_t)nfsboottime.tv_sec;
1058		nmp->nm_clval = clval++;
1059		nmp->nm_krbnamelen = argp->krbnamelen;
1060		nmp->nm_dirpathlen = argp->dirlen;
1061		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
1062		if (td->td_ucred->cr_uid != (uid_t)0) {
1063			/*
1064			 * nm_uid is used to get KerberosV credentials for
1065			 * the nfsv4 state handling operations if there is
1066			 * no host based principal set. Use the uid of
1067			 * this user if not root, since they are doing the
1068			 * mount. I don't think setting this for root will
1069			 * work, since root normally does not have user
1070			 * credentials in a credentials cache.
1071			 */
1072			nmp->nm_uid = td->td_ucred->cr_uid;
1073		} else {
1074			/*
1075			 * Just set to -1, so it won't be used.
1076			 */
1077			nmp->nm_uid = (uid_t)-1;
1078		}
1079
1080		/* Copy and null terminate all the names */
1081		if (nmp->nm_krbnamelen > 0) {
1082			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1083			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1084		}
1085		if (nmp->nm_dirpathlen > 0) {
1086			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1087			    nmp->nm_dirpathlen);
1088			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1089			    + 1] = '\0';
1090		}
1091		if (nmp->nm_srvkrbnamelen > 0) {
1092			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1093			    nmp->nm_srvkrbnamelen);
1094			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1095			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1096		}
1097		nmp->nm_sockreq.nr_cred = crhold(cred);
1098		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1099		mp->mnt_data = nmp;
1100	}
1101	vfs_getnewfsid(mp);
1102	nmp->nm_mountp = mp;
1103	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1104
1105	nfs_decode_args(mp, nmp, argp, cred, td);
1106
1107	/*
1108	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1109	 * high, depending on whether we end up with negative offsets in
1110	 * the client or server somewhere.  2GB-1 may be safer.
1111	 *
1112	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1113	 * that we can handle until we find out otherwise.
1114	 * XXX Our "safe" limit on the client is what we can store in our
1115	 * buffer cache using signed(!) block numbers.
1116	 */
1117	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1118		nmp->nm_maxfilesize = 0xffffffffLL;
1119	else
1120		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1121
1122	nmp->nm_timeo = NFS_TIMEO;
1123	nmp->nm_retry = NFS_RETRANS;
1124	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1125		nmp->nm_wsize = NFS_WSIZE;
1126		nmp->nm_rsize = NFS_RSIZE;
1127		nmp->nm_readdirsize = NFS_READDIRSIZE;
1128	}
1129	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1130	nmp->nm_numgrps = NFS_MAXGRPS;
1131	nmp->nm_readahead = NFS_DEFRAHEAD;
1132	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1133	if (nmp->nm_tprintf_delay < 0)
1134		nmp->nm_tprintf_delay = 0;
1135	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1136	if (nmp->nm_tprintf_initial_delay < 0)
1137		nmp->nm_tprintf_initial_delay = 0;
1138	nmp->nm_fhsize = argp->fhsize;
1139	if (nmp->nm_fhsize > 0)
1140		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1141	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1142	nmp->nm_nam = nam;
1143	/* Set up the sockets and per-host congestion */
1144	nmp->nm_sotype = argp->sotype;
1145	nmp->nm_soproto = argp->proto;
1146	nmp->nm_sockreq.nr_prog = NFS_PROG;
1147	if ((argp->flags & NFSMNT_NFSV4))
1148		nmp->nm_sockreq.nr_vers = NFS_VER4;
1149	else if ((argp->flags & NFSMNT_NFSV3))
1150		nmp->nm_sockreq.nr_vers = NFS_VER3;
1151	else
1152		nmp->nm_sockreq.nr_vers = NFS_VER2;
1153
1154
1155	/*
1156	 * For Connection based sockets (TCP,...) do the connect here,
1157	 * but make it interruptible, even for non-interuptible mounts.
1158	 */
1159	if ((nmp->nm_flag & NFSMNT_INT) == 0) {
1160		nmp->nm_flag |= NFSMNT_INT;
1161		clearintr = 1;
1162	} else {
1163		clearintr = 0;
1164	}
1165	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1166		goto bad;
1167	if (clearintr)
1168		nmp->nm_flag &= ~NFSMNT_INT;
1169
1170	/*
1171	 * A reference count is needed on the nfsnode representing the
1172	 * remote root.  If this object is not persistent, then backward
1173	 * traversals of the mount point (i.e. "..") will not work if
1174	 * the nfsnode gets flushed out of the cache. Ufs does not have
1175	 * this problem, because one can identify root inodes by their
1176	 * number == ROOTINO (2).
1177	 */
1178	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1179	    nmp->nm_dirpathlen > 0) {
1180		/*
1181		 * If the fhsize on the mount point == 0 for V4, the mount
1182		 * path needs to be looked up.
1183		 */
1184		trycnt = 3;
1185		do {
1186			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1187			    cred, td);
1188			if (error)
1189				(void) nfs_catnap(PZERO, "nfsgetdirp");
1190		} while (error && --trycnt > 0);
1191		if (error) {
1192			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1193			goto bad;
1194		}
1195	}
1196	if (nmp->nm_fhsize > 0) {
1197		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1198		if (error)
1199			goto bad;
1200		*vpp = NFSTOV(np);
1201
1202		/*
1203		 * Get file attributes and transfer parameters for the
1204		 * mountpoint.  This has the side effect of filling in
1205		 * (*vpp)->v_type with the correct value.
1206		 */
1207		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1208		    cred, td, &nfsva, NULL);
1209		if (ret) {
1210			/*
1211			 * Just set default values to get things going.
1212			 */
1213			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1214			nfsva.na_vattr.va_type = VDIR;
1215			nfsva.na_vattr.va_mode = 0777;
1216			nfsva.na_vattr.va_nlink = 100;
1217			nfsva.na_vattr.va_uid = (uid_t)0;
1218			nfsva.na_vattr.va_gid = (gid_t)0;
1219			nfsva.na_vattr.va_fileid = 2;
1220			nfsva.na_vattr.va_gen = 1;
1221			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1222			nfsva.na_vattr.va_size = 512 * 1024;
1223		}
1224		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1225		if (argp->flags & NFSMNT_NFSV3)
1226			ncl_fsinfo(nmp, *vpp, cred, td);
1227
1228		/*
1229		 * Lose the lock but keep the ref.
1230		 */
1231		VOP_UNLOCK(*vpp, 0);
1232		return (0);
1233	}
1234	error = EIO;
1235
1236bad:
1237	newnfs_disconnect(&nmp->nm_sockreq);
1238	crfree(nmp->nm_sockreq.nr_cred);
1239	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1240	mtx_destroy(&nmp->nm_mtx);
1241	FREE(nmp, M_NEWNFSMNT);
1242	FREE(nam, M_SONAME);
1243	return (error);
1244}
1245
1246/*
1247 * unmount system call
1248 */
1249static int
1250nfs_unmount(struct mount *mp, int mntflags)
1251{
1252	struct thread *td;
1253	struct nfsmount *nmp;
1254	int error, flags = 0, trycnt = 0;
1255
1256	td = curthread;
1257
1258	if (mntflags & MNT_FORCE)
1259		flags |= FORCECLOSE;
1260	nmp = VFSTONFS(mp);
1261	/*
1262	 * Goes something like this..
1263	 * - Call vflush() to clear out vnodes for this filesystem
1264	 * - Close the socket
1265	 * - Free up the data structures
1266	 */
1267	/* In the forced case, cancel any outstanding requests. */
1268	if (mntflags & MNT_FORCE) {
1269		error = newnfs_nmcancelreqs(nmp);
1270		if (error)
1271			goto out;
1272		/* For a forced close, get rid of the renew thread now */
1273		nfscl_umount(nmp, td);
1274	}
1275	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1276	do {
1277		error = vflush(mp, 1, flags, td);
1278		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1279			(void) nfs_catnap(PSOCK, "newndm");
1280	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1281	if (error)
1282		goto out;
1283
1284	/*
1285	 * We are now committed to the unmount.
1286	 */
1287	if ((mntflags & MNT_FORCE) == 0)
1288		nfscl_umount(nmp, td);
1289	newnfs_disconnect(&nmp->nm_sockreq);
1290	crfree(nmp->nm_sockreq.nr_cred);
1291	FREE(nmp->nm_nam, M_SONAME);
1292
1293	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1294	mtx_destroy(&nmp->nm_mtx);
1295	FREE(nmp, M_NEWNFSMNT);
1296out:
1297	return (error);
1298}
1299
1300/*
1301 * Return root of a filesystem
1302 */
1303static int
1304nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1305{
1306	struct vnode *vp;
1307	struct nfsmount *nmp;
1308	struct nfsnode *np;
1309	int error;
1310
1311	nmp = VFSTONFS(mp);
1312	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1313	if (error)
1314		return error;
1315	vp = NFSTOV(np);
1316	/*
1317	 * Get transfer parameters and attributes for root vnode once.
1318	 */
1319	mtx_lock(&nmp->nm_mtx);
1320	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1321		mtx_unlock(&nmp->nm_mtx);
1322		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1323	} else
1324		mtx_unlock(&nmp->nm_mtx);
1325	if (vp->v_type == VNON)
1326	    vp->v_type = VDIR;
1327	vp->v_vflag |= VV_ROOT;
1328	*vpp = vp;
1329	return (0);
1330}
1331
1332/*
1333 * Flush out the buffer cache
1334 */
1335/* ARGSUSED */
1336static int
1337nfs_sync(struct mount *mp, int waitfor)
1338{
1339	struct vnode *vp, *mvp;
1340	struct thread *td;
1341	int error, allerror = 0;
1342
1343	td = curthread;
1344
1345	/*
1346	 * Force stale buffer cache information to be flushed.
1347	 */
1348	MNT_ILOCK(mp);
1349loop:
1350	MNT_VNODE_FOREACH(vp, mp, mvp) {
1351		VI_LOCK(vp);
1352		MNT_IUNLOCK(mp);
1353		/* XXX Racy bv_cnt check. */
1354		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1355		    waitfor == MNT_LAZY) {
1356			VI_UNLOCK(vp);
1357			MNT_ILOCK(mp);
1358			continue;
1359		}
1360		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1361			MNT_ILOCK(mp);
1362			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1363			goto loop;
1364		}
1365		error = VOP_FSYNC(vp, waitfor, td);
1366		if (error)
1367			allerror = error;
1368		VOP_UNLOCK(vp, 0);
1369		vrele(vp);
1370
1371		MNT_ILOCK(mp);
1372	}
1373	MNT_IUNLOCK(mp);
1374	return (allerror);
1375}
1376
1377static int
1378nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1379{
1380	struct nfsmount *nmp = VFSTONFS(mp);
1381	struct vfsquery vq;
1382	int error;
1383
1384	bzero(&vq, sizeof(vq));
1385	switch (op) {
1386#if 0
1387	case VFS_CTL_NOLOCKS:
1388		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1389 		if (req->oldptr != NULL) {
1390 			error = SYSCTL_OUT(req, &val, sizeof(val));
1391 			if (error)
1392 				return (error);
1393 		}
1394 		if (req->newptr != NULL) {
1395 			error = SYSCTL_IN(req, &val, sizeof(val));
1396 			if (error)
1397 				return (error);
1398			if (val)
1399				nmp->nm_flag |= NFSMNT_NOLOCKS;
1400			else
1401				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1402 		}
1403		break;
1404#endif
1405	case VFS_CTL_QUERY:
1406		mtx_lock(&nmp->nm_mtx);
1407		if (nmp->nm_state & NFSSTA_TIMEO)
1408			vq.vq_flags |= VQ_NOTRESP;
1409		mtx_unlock(&nmp->nm_mtx);
1410#if 0
1411		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1412		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1413			vq.vq_flags |= VQ_NOTRESPLOCK;
1414#endif
1415		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1416		break;
1417 	case VFS_CTL_TIMEO:
1418 		if (req->oldptr != NULL) {
1419 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1420 			    sizeof(nmp->nm_tprintf_initial_delay));
1421 			if (error)
1422 				return (error);
1423 		}
1424 		if (req->newptr != NULL) {
1425			error = vfs_suser(mp, req->td);
1426			if (error)
1427				return (error);
1428 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1429 			    sizeof(nmp->nm_tprintf_initial_delay));
1430 			if (error)
1431 				return (error);
1432 			if (nmp->nm_tprintf_initial_delay < 0)
1433 				nmp->nm_tprintf_initial_delay = 0;
1434 		}
1435		break;
1436	default:
1437		return (ENOTSUP);
1438	}
1439	return (0);
1440}
1441
1442