nfs_clvfsops.c revision 221436
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 221436 2011-05-04 13:27:45Z ru $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/mount.h>
55#include <sys/proc.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <netinet/in.h>
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <nfs/nfsdiskless.h>
76
77FEATURE(nfscl, "NFSv4 client");
78
79extern int nfscl_ticks;
80extern struct timeval nfsboottime;
81extern struct nfsstats	newnfsstats;
82
83MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
84MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
85
86SYSCTL_DECL(_vfs_newnfs);
87SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
88	&newnfsstats, nfsstats, "S,nfsstats");
89static int nfs_ip_paranoia = 1;
90SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
91    &nfs_ip_paranoia, 0, "");
92static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
93SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
94        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
95/* how long between console messages "nfs server foo not responding" */
96static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
97SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
98        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
99
100static int	nfs_mountroot(struct mount *);
101static void	nfs_sec_name(char *, int *);
102static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
103		    struct nfs_args *argp, const char *, struct ucred *,
104		    struct thread *);
105static int	mountnfs(struct nfs_args *, struct mount *,
106		    struct sockaddr *, char *, u_char *, int, u_char *, int,
107		    u_char *, int, struct vnode **, struct ucred *,
108		    struct thread *, int);
109static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
110		    struct sockaddr_storage *, int *, off_t *,
111		    struct timeval *);
112static vfs_mount_t nfs_mount;
113static vfs_cmount_t nfs_cmount;
114static vfs_unmount_t nfs_unmount;
115static vfs_root_t nfs_root;
116static vfs_statfs_t nfs_statfs;
117static vfs_sync_t nfs_sync;
118static vfs_sysctl_t nfs_sysctl;
119
120/*
121 * nfs vfs operations.
122 */
123static struct vfsops nfs_vfsops = {
124	.vfs_init =		ncl_init,
125	.vfs_mount =		nfs_mount,
126	.vfs_cmount =		nfs_cmount,
127	.vfs_root =		nfs_root,
128	.vfs_statfs =		nfs_statfs,
129	.vfs_sync =		nfs_sync,
130	.vfs_uninit =		ncl_uninit,
131	.vfs_unmount =		nfs_unmount,
132	.vfs_sysctl =		nfs_sysctl,
133};
134VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
135
136/* So that loader and kldload(2) can find us, wherever we are.. */
137MODULE_VERSION(nfs, 1);
138MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
139MODULE_DEPEND(nfs, krpc, 1, 1, 1);
140MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
141MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
142
143/*
144 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
145 * can be shared by both NFS clients. It is declared here so that it
146 * will be defined for kernels built without NFS_ROOT, although it
147 * isn't used in that case.
148 */
149#if !defined(NFS_ROOT) && !defined(NFSCLIENT)
150struct nfs_diskless	nfs_diskless = { { { 0 } } };
151struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
152int			nfs_diskless_valid = 0;
153#endif
154
155SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
156    &nfs_diskless_valid, 0,
157    "Has the diskless struct been filled correctly");
158
159SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
160    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
161
162SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
163    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
164    "%Ssockaddr_in", "Diskless root nfs address");
165
166
167void		newnfsargs_ntoh(struct nfs_args *);
168static int	nfs_mountdiskless(char *,
169		    struct sockaddr_in *, struct nfs_args *,
170		    struct thread *, struct vnode **, struct mount *);
171static void	nfs_convert_diskless(void);
172static void	nfs_convert_oargs(struct nfs_args *args,
173		    struct onfs_args *oargs);
174
175int
176newnfs_iosize(struct nfsmount *nmp)
177{
178	int iosize, maxio;
179
180	/* First, set the upper limit for iosize */
181	if (nmp->nm_flag & NFSMNT_NFSV4) {
182		maxio = NFS_MAXBSIZE;
183	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
184		if (nmp->nm_sotype == SOCK_DGRAM)
185			maxio = NFS_MAXDGRAMDATA;
186		else
187			maxio = NFS_MAXBSIZE;
188	} else {
189		maxio = NFS_V2MAXDATA;
190	}
191	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
192		nmp->nm_rsize = maxio;
193	if (nmp->nm_rsize > MAXBSIZE)
194		nmp->nm_rsize = MAXBSIZE;
195	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
196		nmp->nm_readdirsize = maxio;
197	if (nmp->nm_readdirsize > nmp->nm_rsize)
198		nmp->nm_readdirsize = nmp->nm_rsize;
199	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
200		nmp->nm_wsize = maxio;
201	if (nmp->nm_wsize > MAXBSIZE)
202		nmp->nm_wsize = MAXBSIZE;
203
204	/*
205	 * Calculate the size used for io buffers.  Use the larger
206	 * of the two sizes to minimise nfs requests but make sure
207	 * that it is at least one VM page to avoid wasting buffer
208	 * space.
209	 */
210	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
211	iosize = imax(iosize, PAGE_SIZE);
212	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
213	return (iosize);
214}
215
216static void
217nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
218{
219
220	args->version = NFS_ARGSVERSION;
221	args->addr = oargs->addr;
222	args->addrlen = oargs->addrlen;
223	args->sotype = oargs->sotype;
224	args->proto = oargs->proto;
225	args->fh = oargs->fh;
226	args->fhsize = oargs->fhsize;
227	args->flags = oargs->flags;
228	args->wsize = oargs->wsize;
229	args->rsize = oargs->rsize;
230	args->readdirsize = oargs->readdirsize;
231	args->timeo = oargs->timeo;
232	args->retrans = oargs->retrans;
233	args->readahead = oargs->readahead;
234	args->hostname = oargs->hostname;
235}
236
237static void
238nfs_convert_diskless(void)
239{
240
241	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
242		sizeof(struct ifaliasreq));
243	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
244		sizeof(struct sockaddr_in));
245	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
246	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
247		nfsv3_diskless.root_fhsize = NFSX_MYFH;
248		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
249	} else {
250		nfsv3_diskless.root_fhsize = NFSX_V2FH;
251		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
252	}
253	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
254		sizeof(struct sockaddr_in));
255	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
256	nfsv3_diskless.root_time = nfs_diskless.root_time;
257	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
258		MAXHOSTNAMELEN);
259	nfs_diskless_valid = 3;
260}
261
262/*
263 * nfs statfs call
264 */
265static int
266nfs_statfs(struct mount *mp, struct statfs *sbp)
267{
268	struct vnode *vp;
269	struct thread *td;
270	struct nfsmount *nmp = VFSTONFS(mp);
271	struct nfsvattr nfsva;
272	struct nfsfsinfo fs;
273	struct nfsstatfs sb;
274	int error = 0, attrflag, gotfsinfo = 0, ret;
275	struct nfsnode *np;
276
277	td = curthread;
278
279	error = vfs_busy(mp, MBF_NOWAIT);
280	if (error)
281		return (error);
282	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
283	if (error) {
284		vfs_unbusy(mp);
285		return (error);
286	}
287	vp = NFSTOV(np);
288	mtx_lock(&nmp->nm_mtx);
289	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
290		mtx_unlock(&nmp->nm_mtx);
291		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
292		    &attrflag, NULL);
293		if (!error)
294			gotfsinfo = 1;
295	} else
296		mtx_unlock(&nmp->nm_mtx);
297	if (!error)
298		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
299		    &attrflag, NULL);
300	if (attrflag == 0) {
301		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
302		    td->td_ucred, td, &nfsva, NULL);
303		if (ret) {
304			/*
305			 * Just set default values to get things going.
306			 */
307			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
308			nfsva.na_vattr.va_type = VDIR;
309			nfsva.na_vattr.va_mode = 0777;
310			nfsva.na_vattr.va_nlink = 100;
311			nfsva.na_vattr.va_uid = (uid_t)0;
312			nfsva.na_vattr.va_gid = (gid_t)0;
313			nfsva.na_vattr.va_fileid = 2;
314			nfsva.na_vattr.va_gen = 1;
315			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
316			nfsva.na_vattr.va_size = 512 * 1024;
317		}
318	}
319	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
320	if (!error) {
321	    mtx_lock(&nmp->nm_mtx);
322	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
323		nfscl_loadfsinfo(nmp, &fs);
324	    nfscl_loadsbinfo(nmp, &sb, sbp);
325	    sbp->f_iosize = newnfs_iosize(nmp);
326	    mtx_unlock(&nmp->nm_mtx);
327	    if (sbp != &mp->mnt_stat) {
328		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
329		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
330	    }
331	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
332	} else if (NFS_ISV4(vp)) {
333		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
334	}
335	vput(vp);
336	vfs_unbusy(mp);
337	return (error);
338}
339
340/*
341 * nfs version 3 fsinfo rpc call
342 */
343int
344ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
345    struct thread *td)
346{
347	struct nfsfsinfo fs;
348	struct nfsvattr nfsva;
349	int error, attrflag;
350
351	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
352	if (!error) {
353		if (attrflag)
354			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
355			    1);
356		mtx_lock(&nmp->nm_mtx);
357		nfscl_loadfsinfo(nmp, &fs);
358		mtx_unlock(&nmp->nm_mtx);
359	}
360	return (error);
361}
362
363/*
364 * Mount a remote root fs via. nfs. This depends on the info in the
365 * nfs_diskless structure that has been filled in properly by some primary
366 * bootstrap.
367 * It goes something like this:
368 * - do enough of "ifconfig" by calling ifioctl() so that the system
369 *   can talk to the server
370 * - If nfs_diskless.mygateway is filled in, use that address as
371 *   a default gateway.
372 * - build the rootfs mount point and call mountnfs() to do the rest.
373 *
374 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
375 * structure, as well as other global NFS client variables here, as
376 * nfs_mountroot() will be called once in the boot before any other NFS
377 * client activity occurs.
378 */
379static int
380nfs_mountroot(struct mount *mp)
381{
382	struct thread *td = curthread;
383	struct nfsv3_diskless *nd = &nfsv3_diskless;
384	struct socket *so;
385	struct vnode *vp;
386	struct ifreq ir;
387	int error;
388	u_long l;
389	char buf[128];
390	char *cp;
391
392#if defined(BOOTP_NFSROOT) && defined(BOOTP)
393	bootpc_init();		/* use bootp to get nfs_diskless filled in */
394#elif defined(NFS_ROOT)
395	nfs_setup_diskless();
396#endif
397
398	if (nfs_diskless_valid == 0)
399		return (-1);
400	if (nfs_diskless_valid == 1)
401		nfs_convert_diskless();
402
403	/*
404	 * XXX splnet, so networks will receive...
405	 */
406	splnet();
407
408	/*
409	 * Do enough of ifconfig(8) so that the critical net interface can
410	 * talk to the server.
411	 */
412	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
413	    td->td_ucred, td);
414	if (error)
415		panic("nfs_mountroot: socreate(%04x): %d",
416			nd->myif.ifra_addr.sa_family, error);
417
418#if 0 /* XXX Bad idea */
419	/*
420	 * We might not have been told the right interface, so we pass
421	 * over the first ten interfaces of the same kind, until we get
422	 * one of them configured.
423	 */
424
425	for (i = strlen(nd->myif.ifra_name) - 1;
426		nd->myif.ifra_name[i] >= '0' &&
427		nd->myif.ifra_name[i] <= '9';
428		nd->myif.ifra_name[i] ++) {
429		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
430		if(!error)
431			break;
432	}
433#endif
434	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
435	if (error)
436		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
437	if ((cp = getenv("boot.netif.mtu")) != NULL) {
438		ir.ifr_mtu = strtol(cp, NULL, 10);
439		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
440		freeenv(cp);
441		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
442		if (error)
443			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
444	}
445	soclose(so);
446
447	/*
448	 * If the gateway field is filled in, set it as the default route.
449	 * Note that pxeboot will set a default route of 0 if the route
450	 * is not set by the DHCP server.  Check also for a value of 0
451	 * to avoid panicking inappropriately in that situation.
452	 */
453	if (nd->mygateway.sin_len != 0 &&
454	    nd->mygateway.sin_addr.s_addr != 0) {
455		struct sockaddr_in mask, sin;
456
457		bzero((caddr_t)&mask, sizeof(mask));
458		sin = mask;
459		sin.sin_family = AF_INET;
460		sin.sin_len = sizeof(sin);
461                /* XXX MRT use table 0 for this sort of thing */
462		CURVNET_SET(TD_TO_VNET(td));
463		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
464		    (struct sockaddr *)&nd->mygateway,
465		    (struct sockaddr *)&mask,
466		    RTF_UP | RTF_GATEWAY, NULL);
467		CURVNET_RESTORE();
468		if (error)
469			panic("nfs_mountroot: RTM_ADD: %d", error);
470	}
471
472	/*
473	 * Create the rootfs mount point.
474	 */
475	nd->root_args.fh = nd->root_fh;
476	nd->root_args.fhsize = nd->root_fhsize;
477	l = ntohl(nd->root_saddr.sin_addr.s_addr);
478	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
479		(l >> 24) & 0xff, (l >> 16) & 0xff,
480		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
481	printf("NFS ROOT: %s\n", buf);
482	nd->root_args.hostname = buf;
483	if ((error = nfs_mountdiskless(buf,
484	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
485		return (error);
486	}
487
488	/*
489	 * This is not really an nfs issue, but it is much easier to
490	 * set hostname here and then let the "/etc/rc.xxx" files
491	 * mount the right /var based upon its preset value.
492	 */
493	mtx_lock(&prison0.pr_mtx);
494	strlcpy(prison0.pr_hostname, nd->my_hostnam,
495	    sizeof(prison0.pr_hostname));
496	mtx_unlock(&prison0.pr_mtx);
497	inittodr(ntohl(nd->root_time));
498	return (0);
499}
500
501/*
502 * Internal version of mount system call for diskless setup.
503 */
504static int
505nfs_mountdiskless(char *path,
506    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
507    struct vnode **vpp, struct mount *mp)
508{
509	struct sockaddr *nam;
510	int dirlen, error;
511	char *dirpath;
512
513	/*
514	 * Find the directory path in "path", which also has the server's
515	 * name/ip address in it.
516	 */
517	dirpath = strchr(path, ':');
518	if (dirpath != NULL)
519		dirlen = strlen(++dirpath);
520	else
521		dirlen = 0;
522	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
523	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
524	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
525		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
526		return (error);
527	}
528	return (0);
529}
530
531static void
532nfs_sec_name(char *sec, int *flagsp)
533{
534	if (!strcmp(sec, "krb5"))
535		*flagsp |= NFSMNT_KERB;
536	else if (!strcmp(sec, "krb5i"))
537		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
538	else if (!strcmp(sec, "krb5p"))
539		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
540}
541
542static void
543nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
544    const char *hostname, struct ucred *cred, struct thread *td)
545{
546	int s;
547	int adjsock;
548	char *p;
549
550	s = splnet();
551
552	/*
553	 * Set read-only flag if requested; otherwise, clear it if this is
554	 * an update.  If this is not an update, then either the read-only
555	 * flag is already clear, or this is a root mount and it was set
556	 * intentionally at some previous point.
557	 */
558	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
559		MNT_ILOCK(mp);
560		mp->mnt_flag |= MNT_RDONLY;
561		MNT_IUNLOCK(mp);
562	} else if (mp->mnt_flag & MNT_UPDATE) {
563		MNT_ILOCK(mp);
564		mp->mnt_flag &= ~MNT_RDONLY;
565		MNT_IUNLOCK(mp);
566	}
567
568	/*
569	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
570	 * no sense in that context.  Also, set up appropriate retransmit
571	 * and soft timeout behavior.
572	 */
573	if (argp->sotype == SOCK_STREAM) {
574		nmp->nm_flag &= ~NFSMNT_NOCONN;
575		nmp->nm_timeo = NFS_MAXTIMEO;
576		if ((argp->flags & NFSMNT_NFSV4) != 0)
577			nmp->nm_retry = INT_MAX;
578		else
579			nmp->nm_retry = NFS_RETRANS_TCP;
580	}
581
582	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
583	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
584		argp->flags &= ~NFSMNT_RDIRPLUS;
585		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
586	}
587
588	/* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */
589	if ((argp->flags & NFSMNT_NFSV4) != 0) {
590		argp->flags &= ~NFSMNT_RESVPORT;
591		nmp->nm_flag &= ~NFSMNT_RESVPORT;
592	}
593
594	/* Re-bind if rsrvd port requested and wasn't on one */
595	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
596		  && (argp->flags & NFSMNT_RESVPORT);
597	/* Also re-bind if we're switching to/from a connected UDP socket */
598	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
599		    (argp->flags & NFSMNT_NOCONN));
600
601	/* Update flags atomically.  Don't change the lock bits. */
602	nmp->nm_flag = argp->flags | nmp->nm_flag;
603	splx(s);
604
605	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
606		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
607		if (nmp->nm_timeo < NFS_MINTIMEO)
608			nmp->nm_timeo = NFS_MINTIMEO;
609		else if (nmp->nm_timeo > NFS_MAXTIMEO)
610			nmp->nm_timeo = NFS_MAXTIMEO;
611	}
612
613	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
614		nmp->nm_retry = argp->retrans;
615		if (nmp->nm_retry > NFS_MAXREXMIT)
616			nmp->nm_retry = NFS_MAXREXMIT;
617	}
618
619	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
620		nmp->nm_wsize = argp->wsize;
621		/* Round down to multiple of blocksize */
622		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
623		if (nmp->nm_wsize <= 0)
624			nmp->nm_wsize = NFS_FABLKSIZE;
625	}
626
627	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
628		nmp->nm_rsize = argp->rsize;
629		/* Round down to multiple of blocksize */
630		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
631		if (nmp->nm_rsize <= 0)
632			nmp->nm_rsize = NFS_FABLKSIZE;
633	}
634
635	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
636		nmp->nm_readdirsize = argp->readdirsize;
637	}
638
639	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
640		nmp->nm_acregmin = argp->acregmin;
641	else
642		nmp->nm_acregmin = NFS_MINATTRTIMO;
643	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
644		nmp->nm_acregmax = argp->acregmax;
645	else
646		nmp->nm_acregmax = NFS_MAXATTRTIMO;
647	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
648		nmp->nm_acdirmin = argp->acdirmin;
649	else
650		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
651	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
652		nmp->nm_acdirmax = argp->acdirmax;
653	else
654		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
655	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
656		nmp->nm_acdirmin = nmp->nm_acdirmax;
657	if (nmp->nm_acregmin > nmp->nm_acregmax)
658		nmp->nm_acregmin = nmp->nm_acregmax;
659
660	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
661		if (argp->readahead <= NFS_MAXRAHEAD)
662			nmp->nm_readahead = argp->readahead;
663		else
664			nmp->nm_readahead = NFS_MAXRAHEAD;
665	}
666	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
667		if (argp->wcommitsize < nmp->nm_wsize)
668			nmp->nm_wcommitsize = nmp->nm_wsize;
669		else
670			nmp->nm_wcommitsize = argp->wcommitsize;
671	}
672
673	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
674		    (nmp->nm_soproto != argp->proto));
675
676	if (nmp->nm_client != NULL && adjsock) {
677		int haslock = 0, error = 0;
678
679		if (nmp->nm_sotype == SOCK_STREAM) {
680			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
681			if (!error)
682				haslock = 1;
683		}
684		if (!error) {
685		    newnfs_disconnect(&nmp->nm_sockreq);
686		    if (haslock)
687			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
688		    nmp->nm_sotype = argp->sotype;
689		    nmp->nm_soproto = argp->proto;
690		    if (nmp->nm_sotype == SOCK_DGRAM)
691			while (newnfs_connect(nmp, &nmp->nm_sockreq,
692			    cred, td, 0)) {
693				printf("newnfs_args: retrying connect\n");
694				(void) nfs_catnap(PSOCK, 0, "newnfscon");
695			}
696		}
697	} else {
698		nmp->nm_sotype = argp->sotype;
699		nmp->nm_soproto = argp->proto;
700	}
701
702	if (hostname != NULL) {
703		strlcpy(nmp->nm_hostname, hostname,
704		    sizeof(nmp->nm_hostname));
705		p = strchr(nmp->nm_hostname, ':');
706		if (p != NULL)
707			*p = '\0';
708	}
709}
710
711static const char *nfs_opts[] = { "from", "nfs_args",
712    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
713    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
714    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
715    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
716    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
717    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
718    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
719    "negnametimeo", "nocto",
720    NULL };
721
722/*
723 * VFS Operations.
724 *
725 * mount system call
726 * It seems a bit dumb to copyinstr() the host and path here and then
727 * bcopy() them in mountnfs(), but I wanted to detect errors before
728 * doing the sockargs() call because sockargs() allocates an mbuf and
729 * an error after that means that I have to release the mbuf.
730 */
731/* ARGSUSED */
732static int
733nfs_mount(struct mount *mp)
734{
735	struct nfs_args args = {
736	    .version = NFS_ARGSVERSION,
737	    .addr = NULL,
738	    .addrlen = sizeof (struct sockaddr_in),
739	    .sotype = SOCK_STREAM,
740	    .proto = 0,
741	    .fh = NULL,
742	    .fhsize = 0,
743	    .flags = NFSMNT_RESVPORT,
744	    .wsize = NFS_WSIZE,
745	    .rsize = NFS_RSIZE,
746	    .readdirsize = NFS_READDIRSIZE,
747	    .timeo = 10,
748	    .retrans = NFS_RETRANS,
749	    .readahead = NFS_DEFRAHEAD,
750	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
751	    .hostname = NULL,
752	    .acregmin = NFS_MINATTRTIMO,
753	    .acregmax = NFS_MAXATTRTIMO,
754	    .acdirmin = NFS_MINDIRATTRTIMO,
755	    .acdirmax = NFS_MAXDIRATTRTIMO,
756	};
757	int error = 0, ret, len;
758	struct sockaddr *nam = NULL;
759	struct vnode *vp;
760	struct thread *td;
761	char hst[MNAMELEN];
762	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
763	char *opt, *name, *secname;
764	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
765	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
766	size_t hstlen;
767
768	has_nfs_args_opt = 0;
769	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
770		error = EINVAL;
771		goto out;
772	}
773
774	td = curthread;
775	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
776		error = nfs_mountroot(mp);
777		goto out;
778	}
779
780	nfscl_init();
781
782	/*
783	 * The old mount_nfs program passed the struct nfs_args
784	 * from userspace to kernel.  The new mount_nfs program
785	 * passes string options via nmount() from userspace to kernel
786	 * and we populate the struct nfs_args in the kernel.
787	 */
788	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
789		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
790		    sizeof(args));
791		if (error != 0)
792			goto out;
793
794		if (args.version != NFS_ARGSVERSION) {
795			error = EPROGMISMATCH;
796			goto out;
797		}
798		has_nfs_args_opt = 1;
799	}
800
801	/* Handle the new style options. */
802	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
803		args.flags |= NFSMNT_NOCONN;
804	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
805		args.flags |= NFSMNT_NOCONN;
806	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
807		args.flags |= NFSMNT_NOLOCKD;
808	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
809		args.flags &= ~NFSMNT_NOLOCKD;
810	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
811		args.flags |= NFSMNT_INT;
812	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
813		args.flags |= NFSMNT_RDIRPLUS;
814	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
815		args.flags |= NFSMNT_RESVPORT;
816	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
817		args.flags &= ~NFSMNT_RESVPORT;
818	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
819		args.flags |= NFSMNT_SOFT;
820	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
821		args.flags &= ~NFSMNT_SOFT;
822	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
823		args.sotype = SOCK_DGRAM;
824	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
825		args.sotype = SOCK_DGRAM;
826	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
827		args.sotype = SOCK_STREAM;
828	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
829		args.flags |= NFSMNT_NFSV3;
830	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
831		args.flags |= NFSMNT_NFSV4;
832		args.sotype = SOCK_STREAM;
833	}
834	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
835		args.flags |= NFSMNT_ALLGSSNAME;
836	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
837		args.flags |= NFSMNT_NOCTO;
838	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
839		if (opt == NULL) {
840			vfs_mount_error(mp, "illegal readdirsize");
841			error = EINVAL;
842			goto out;
843		}
844		ret = sscanf(opt, "%d", &args.readdirsize);
845		if (ret != 1 || args.readdirsize <= 0) {
846			vfs_mount_error(mp, "illegal readdirsize: %s",
847			    opt);
848			error = EINVAL;
849			goto out;
850		}
851		args.flags |= NFSMNT_READDIRSIZE;
852	}
853	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
854		if (opt == NULL) {
855			vfs_mount_error(mp, "illegal readahead");
856			error = EINVAL;
857			goto out;
858		}
859		ret = sscanf(opt, "%d", &args.readahead);
860		if (ret != 1 || args.readahead <= 0) {
861			vfs_mount_error(mp, "illegal readahead: %s",
862			    opt);
863			error = EINVAL;
864			goto out;
865		}
866		args.flags |= NFSMNT_READAHEAD;
867	}
868	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
869		if (opt == NULL) {
870			vfs_mount_error(mp, "illegal wsize");
871			error = EINVAL;
872			goto out;
873		}
874		ret = sscanf(opt, "%d", &args.wsize);
875		if (ret != 1 || args.wsize <= 0) {
876			vfs_mount_error(mp, "illegal wsize: %s",
877			    opt);
878			error = EINVAL;
879			goto out;
880		}
881		args.flags |= NFSMNT_WSIZE;
882	}
883	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
884		if (opt == NULL) {
885			vfs_mount_error(mp, "illegal rsize");
886			error = EINVAL;
887			goto out;
888		}
889		ret = sscanf(opt, "%d", &args.rsize);
890		if (ret != 1 || args.rsize <= 0) {
891			vfs_mount_error(mp, "illegal wsize: %s",
892			    opt);
893			error = EINVAL;
894			goto out;
895		}
896		args.flags |= NFSMNT_RSIZE;
897	}
898	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
899		if (opt == NULL) {
900			vfs_mount_error(mp, "illegal retrans");
901			error = EINVAL;
902			goto out;
903		}
904		ret = sscanf(opt, "%d", &args.retrans);
905		if (ret != 1 || args.retrans <= 0) {
906			vfs_mount_error(mp, "illegal retrans: %s",
907			    opt);
908			error = EINVAL;
909			goto out;
910		}
911		args.flags |= NFSMNT_RETRANS;
912	}
913	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
914		ret = sscanf(opt, "%d", &args.acregmin);
915		if (ret != 1 || args.acregmin < 0) {
916			vfs_mount_error(mp, "illegal acregmin: %s",
917			    opt);
918			error = EINVAL;
919			goto out;
920		}
921		args.flags |= NFSMNT_ACREGMIN;
922	}
923	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
924		ret = sscanf(opt, "%d", &args.acregmax);
925		if (ret != 1 || args.acregmax < 0) {
926			vfs_mount_error(mp, "illegal acregmax: %s",
927			    opt);
928			error = EINVAL;
929			goto out;
930		}
931		args.flags |= NFSMNT_ACREGMAX;
932	}
933	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
934		ret = sscanf(opt, "%d", &args.acdirmin);
935		if (ret != 1 || args.acdirmin < 0) {
936			vfs_mount_error(mp, "illegal acdirmin: %s",
937			    opt);
938			error = EINVAL;
939			goto out;
940		}
941		args.flags |= NFSMNT_ACDIRMIN;
942	}
943	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
944		ret = sscanf(opt, "%d", &args.acdirmax);
945		if (ret != 1 || args.acdirmax < 0) {
946			vfs_mount_error(mp, "illegal acdirmax: %s",
947			    opt);
948			error = EINVAL;
949			goto out;
950		}
951		args.flags |= NFSMNT_ACDIRMAX;
952	}
953	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
954		ret = sscanf(opt, "%d", &args.timeo);
955		if (ret != 1 || args.timeo <= 0) {
956			vfs_mount_error(mp, "illegal timeout: %s",
957			    opt);
958			error = EINVAL;
959			goto out;
960		}
961		args.flags |= NFSMNT_TIMEO;
962	}
963	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
964	    == 0) {
965		ret = sscanf(opt, "%d", &negnametimeo);
966		if (ret != 1 || negnametimeo < 0) {
967			vfs_mount_error(mp, "illegal negnametimeo: %s",
968			    opt);
969			error = EINVAL;
970			goto out;
971		}
972	}
973	if (vfs_getopt(mp->mnt_optnew, "sec",
974		(void **) &secname, NULL) == 0)
975		nfs_sec_name(secname, &args.flags);
976
977	if (mp->mnt_flag & MNT_UPDATE) {
978		struct nfsmount *nmp = VFSTONFS(mp);
979
980		if (nmp == NULL) {
981			error = EIO;
982			goto out;
983		}
984		/*
985		 * When doing an update, we can't change version,
986		 * security, switch lockd strategies or change cookie
987		 * translation
988		 */
989		args.flags = (args.flags &
990		    ~(NFSMNT_NFSV3 |
991		      NFSMNT_NFSV4 |
992		      NFSMNT_KERB |
993		      NFSMNT_INTEGRITY |
994		      NFSMNT_PRIVACY |
995		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
996		    (nmp->nm_flag &
997			(NFSMNT_NFSV3 |
998			 NFSMNT_NFSV4 |
999			 NFSMNT_KERB |
1000			 NFSMNT_INTEGRITY |
1001			 NFSMNT_PRIVACY |
1002			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1003		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1004		goto out;
1005	}
1006
1007	/*
1008	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1009	 * or no-connection mode for those protocols that support
1010	 * no-connection mode (the flag will be cleared later for protocols
1011	 * that do not support no-connection mode).  This will allow a client
1012	 * to receive replies from a different IP then the request was
1013	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1014	 * not 0.
1015	 */
1016	if (nfs_ip_paranoia == 0)
1017		args.flags |= NFSMNT_NOCONN;
1018
1019	if (has_nfs_args_opt != 0) {
1020		/*
1021		 * In the 'nfs_args' case, the pointers in the args
1022		 * structure are in userland - we copy them in here.
1023		 */
1024		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1025			vfs_mount_error(mp, "Bad file handle");
1026			error = EINVAL;
1027			goto out;
1028		}
1029		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1030		    args.fhsize);
1031		if (error != 0)
1032			goto out;
1033		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1034		if (error != 0)
1035			goto out;
1036		bzero(&hst[hstlen], MNAMELEN - hstlen);
1037		args.hostname = hst;
1038		/* sockargs() call must be after above copyin() calls */
1039		error = getsockaddr(&nam, (caddr_t)args.addr,
1040		    args.addrlen);
1041		if (error != 0)
1042			goto out;
1043	} else {
1044		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1045		    &args.fhsize) == 0) {
1046			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1047				vfs_mount_error(mp, "Bad file handle");
1048				error = EINVAL;
1049				goto out;
1050			}
1051			bcopy(args.fh, nfh, args.fhsize);
1052		} else {
1053			args.fhsize = 0;
1054		}
1055		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1056		    (void **)&args.hostname, &len);
1057		if (args.hostname == NULL) {
1058			vfs_mount_error(mp, "Invalid hostname");
1059			error = EINVAL;
1060			goto out;
1061		}
1062		bcopy(args.hostname, hst, MNAMELEN);
1063		hst[MNAMELEN - 1] = '\0';
1064	}
1065
1066	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1067		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1068	else
1069		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1070	srvkrbnamelen = strlen(srvkrbname);
1071
1072	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1073		strlcpy(krbname, name, sizeof (krbname));
1074	else
1075		krbname[0] = '\0';
1076	krbnamelen = strlen(krbname);
1077
1078	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1079		strlcpy(dirpath, name, sizeof (dirpath));
1080	else
1081		dirpath[0] = '\0';
1082	dirlen = strlen(dirpath);
1083
1084	if (has_nfs_args_opt == 0 && vfs_getopt(mp->mnt_optnew, "addr",
1085	    (void **)&args.addr, &args.addrlen) == 0) {
1086		if (args.addrlen > SOCK_MAXADDRLEN) {
1087			error = ENAMETOOLONG;
1088			goto out;
1089		}
1090		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1091		bcopy(args.addr, nam, args.addrlen);
1092		nam->sa_len = args.addrlen;
1093	}
1094
1095	args.fh = nfh;
1096	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1097	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1098	    negnametimeo);
1099out:
1100	if (!error) {
1101		MNT_ILOCK(mp);
1102		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1103		MNT_IUNLOCK(mp);
1104	}
1105	return (error);
1106}
1107
1108
1109/*
1110 * VFS Operations.
1111 *
1112 * mount system call
1113 * It seems a bit dumb to copyinstr() the host and path here and then
1114 * bcopy() them in mountnfs(), but I wanted to detect errors before
1115 * doing the sockargs() call because sockargs() allocates an mbuf and
1116 * an error after that means that I have to release the mbuf.
1117 */
1118/* ARGSUSED */
1119static int
1120nfs_cmount(struct mntarg *ma, void *data, int flags)
1121{
1122	int error;
1123	struct nfs_args args;
1124
1125	error = copyin(data, &args, sizeof (struct nfs_args));
1126	if (error)
1127		return error;
1128
1129	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1130
1131	error = kernel_mount(ma, flags);
1132	return (error);
1133}
1134
1135/*
1136 * Common code for mount and mountroot
1137 */
1138static int
1139mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1140    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1141    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1142    struct ucred *cred, struct thread *td, int negnametimeo)
1143{
1144	struct nfsmount *nmp;
1145	struct nfsnode *np;
1146	int error, trycnt, ret;
1147	struct nfsvattr nfsva;
1148	static u_int64_t clval = 0;
1149
1150	if (mp->mnt_flag & MNT_UPDATE) {
1151		nmp = VFSTONFS(mp);
1152		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1153		FREE(nam, M_SONAME);
1154		return (0);
1155	} else {
1156		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1157		    krbnamelen + dirlen + srvkrbnamelen + 2,
1158		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1159		TAILQ_INIT(&nmp->nm_bufq);
1160		if (clval == 0)
1161			clval = (u_int64_t)nfsboottime.tv_sec;
1162		nmp->nm_clval = clval++;
1163		nmp->nm_krbnamelen = krbnamelen;
1164		nmp->nm_dirpathlen = dirlen;
1165		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1166		if (td->td_ucred->cr_uid != (uid_t)0) {
1167			/*
1168			 * nm_uid is used to get KerberosV credentials for
1169			 * the nfsv4 state handling operations if there is
1170			 * no host based principal set. Use the uid of
1171			 * this user if not root, since they are doing the
1172			 * mount. I don't think setting this for root will
1173			 * work, since root normally does not have user
1174			 * credentials in a credentials cache.
1175			 */
1176			nmp->nm_uid = td->td_ucred->cr_uid;
1177		} else {
1178			/*
1179			 * Just set to -1, so it won't be used.
1180			 */
1181			nmp->nm_uid = (uid_t)-1;
1182		}
1183
1184		/* Copy and null terminate all the names */
1185		if (nmp->nm_krbnamelen > 0) {
1186			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1187			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1188		}
1189		if (nmp->nm_dirpathlen > 0) {
1190			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1191			    nmp->nm_dirpathlen);
1192			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1193			    + 1] = '\0';
1194		}
1195		if (nmp->nm_srvkrbnamelen > 0) {
1196			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1197			    nmp->nm_srvkrbnamelen);
1198			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1199			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1200		}
1201		nmp->nm_sockreq.nr_cred = crhold(cred);
1202		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1203		mp->mnt_data = nmp;
1204		nmp->nm_getinfo = nfs_getnlminfo;
1205		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1206	}
1207	vfs_getnewfsid(mp);
1208	nmp->nm_mountp = mp;
1209	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1210	nmp->nm_negnametimeo = negnametimeo;
1211
1212	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1213
1214	/*
1215	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1216	 * high, depending on whether we end up with negative offsets in
1217	 * the client or server somewhere.  2GB-1 may be safer.
1218	 *
1219	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1220	 * that we can handle until we find out otherwise.
1221	 * XXX Our "safe" limit on the client is what we can store in our
1222	 * buffer cache using signed(!) block numbers.
1223	 */
1224	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1225		nmp->nm_maxfilesize = 0xffffffffLL;
1226	else
1227		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1228
1229	nmp->nm_timeo = NFS_TIMEO;
1230	nmp->nm_retry = NFS_RETRANS;
1231	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1232		nmp->nm_wsize = NFS_WSIZE;
1233		nmp->nm_rsize = NFS_RSIZE;
1234		nmp->nm_readdirsize = NFS_READDIRSIZE;
1235	}
1236	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1237	nmp->nm_numgrps = NFS_MAXGRPS;
1238	nmp->nm_readahead = NFS_DEFRAHEAD;
1239	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1240	if (nmp->nm_tprintf_delay < 0)
1241		nmp->nm_tprintf_delay = 0;
1242	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1243	if (nmp->nm_tprintf_initial_delay < 0)
1244		nmp->nm_tprintf_initial_delay = 0;
1245	nmp->nm_fhsize = argp->fhsize;
1246	if (nmp->nm_fhsize > 0)
1247		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1248	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1249	nmp->nm_nam = nam;
1250	/* Set up the sockets and per-host congestion */
1251	nmp->nm_sotype = argp->sotype;
1252	nmp->nm_soproto = argp->proto;
1253	nmp->nm_sockreq.nr_prog = NFS_PROG;
1254	if ((argp->flags & NFSMNT_NFSV4))
1255		nmp->nm_sockreq.nr_vers = NFS_VER4;
1256	else if ((argp->flags & NFSMNT_NFSV3))
1257		nmp->nm_sockreq.nr_vers = NFS_VER3;
1258	else
1259		nmp->nm_sockreq.nr_vers = NFS_VER2;
1260
1261
1262	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1263		goto bad;
1264
1265	/*
1266	 * A reference count is needed on the nfsnode representing the
1267	 * remote root.  If this object is not persistent, then backward
1268	 * traversals of the mount point (i.e. "..") will not work if
1269	 * the nfsnode gets flushed out of the cache. Ufs does not have
1270	 * this problem, because one can identify root inodes by their
1271	 * number == ROOTINO (2).
1272	 */
1273	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1274	    nmp->nm_dirpathlen > 0) {
1275		/*
1276		 * If the fhsize on the mount point == 0 for V4, the mount
1277		 * path needs to be looked up.
1278		 */
1279		trycnt = 3;
1280		do {
1281			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1282			    cred, td);
1283			if (error)
1284				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1285		} while (error && --trycnt > 0);
1286		if (error) {
1287			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1288			goto bad;
1289		}
1290	}
1291	if (nmp->nm_fhsize > 0) {
1292		/*
1293		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1294		 * non-zero for the root vnode. f_iosize will be set correctly
1295		 * by nfs_statfs() before any I/O occurs.
1296		 */
1297		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1298		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1299		    LK_EXCLUSIVE);
1300		if (error)
1301			goto bad;
1302		*vpp = NFSTOV(np);
1303
1304		/*
1305		 * Get file attributes and transfer parameters for the
1306		 * mountpoint.  This has the side effect of filling in
1307		 * (*vpp)->v_type with the correct value.
1308		 */
1309		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1310		    cred, td, &nfsva, NULL);
1311		if (ret) {
1312			/*
1313			 * Just set default values to get things going.
1314			 */
1315			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1316			nfsva.na_vattr.va_type = VDIR;
1317			nfsva.na_vattr.va_mode = 0777;
1318			nfsva.na_vattr.va_nlink = 100;
1319			nfsva.na_vattr.va_uid = (uid_t)0;
1320			nfsva.na_vattr.va_gid = (gid_t)0;
1321			nfsva.na_vattr.va_fileid = 2;
1322			nfsva.na_vattr.va_gen = 1;
1323			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1324			nfsva.na_vattr.va_size = 512 * 1024;
1325		}
1326		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1327		if (argp->flags & NFSMNT_NFSV3)
1328			ncl_fsinfo(nmp, *vpp, cred, td);
1329
1330		/*
1331		 * Lose the lock but keep the ref.
1332		 */
1333		VOP_UNLOCK(*vpp, 0);
1334		return (0);
1335	}
1336	error = EIO;
1337
1338bad:
1339	newnfs_disconnect(&nmp->nm_sockreq);
1340	crfree(nmp->nm_sockreq.nr_cred);
1341	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1342	mtx_destroy(&nmp->nm_mtx);
1343	FREE(nmp, M_NEWNFSMNT);
1344	FREE(nam, M_SONAME);
1345	return (error);
1346}
1347
1348/*
1349 * unmount system call
1350 */
1351static int
1352nfs_unmount(struct mount *mp, int mntflags)
1353{
1354	struct thread *td;
1355	struct nfsmount *nmp;
1356	int error, flags = 0, trycnt = 0;
1357
1358	td = curthread;
1359
1360	if (mntflags & MNT_FORCE)
1361		flags |= FORCECLOSE;
1362	nmp = VFSTONFS(mp);
1363	/*
1364	 * Goes something like this..
1365	 * - Call vflush() to clear out vnodes for this filesystem
1366	 * - Close the socket
1367	 * - Free up the data structures
1368	 */
1369	/* In the forced case, cancel any outstanding requests. */
1370	if (mntflags & MNT_FORCE) {
1371		error = newnfs_nmcancelreqs(nmp);
1372		if (error)
1373			goto out;
1374		/* For a forced close, get rid of the renew thread now */
1375		nfscl_umount(nmp, td);
1376	}
1377	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1378	do {
1379		error = vflush(mp, 1, flags, td);
1380		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1381			(void) nfs_catnap(PSOCK, error, "newndm");
1382	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1383	if (error)
1384		goto out;
1385
1386	/*
1387	 * We are now committed to the unmount.
1388	 */
1389	if ((mntflags & MNT_FORCE) == 0)
1390		nfscl_umount(nmp, td);
1391	newnfs_disconnect(&nmp->nm_sockreq);
1392	crfree(nmp->nm_sockreq.nr_cred);
1393	FREE(nmp->nm_nam, M_SONAME);
1394
1395	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1396	mtx_destroy(&nmp->nm_mtx);
1397	FREE(nmp, M_NEWNFSMNT);
1398out:
1399	return (error);
1400}
1401
1402/*
1403 * Return root of a filesystem
1404 */
1405static int
1406nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1407{
1408	struct vnode *vp;
1409	struct nfsmount *nmp;
1410	struct nfsnode *np;
1411	int error;
1412
1413	nmp = VFSTONFS(mp);
1414	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1415	if (error)
1416		return error;
1417	vp = NFSTOV(np);
1418	/*
1419	 * Get transfer parameters and attributes for root vnode once.
1420	 */
1421	mtx_lock(&nmp->nm_mtx);
1422	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1423		mtx_unlock(&nmp->nm_mtx);
1424		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1425	} else
1426		mtx_unlock(&nmp->nm_mtx);
1427	if (vp->v_type == VNON)
1428	    vp->v_type = VDIR;
1429	vp->v_vflag |= VV_ROOT;
1430	*vpp = vp;
1431	return (0);
1432}
1433
1434/*
1435 * Flush out the buffer cache
1436 */
1437/* ARGSUSED */
1438static int
1439nfs_sync(struct mount *mp, int waitfor)
1440{
1441	struct vnode *vp, *mvp;
1442	struct thread *td;
1443	int error, allerror = 0;
1444
1445	td = curthread;
1446
1447	/*
1448	 * Force stale buffer cache information to be flushed.
1449	 */
1450	MNT_ILOCK(mp);
1451loop:
1452	MNT_VNODE_FOREACH(vp, mp, mvp) {
1453		VI_LOCK(vp);
1454		MNT_IUNLOCK(mp);
1455		/* XXX Racy bv_cnt check. */
1456		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1457		    waitfor == MNT_LAZY) {
1458			VI_UNLOCK(vp);
1459			MNT_ILOCK(mp);
1460			continue;
1461		}
1462		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1463			MNT_ILOCK(mp);
1464			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1465			goto loop;
1466		}
1467		error = VOP_FSYNC(vp, waitfor, td);
1468		if (error)
1469			allerror = error;
1470		VOP_UNLOCK(vp, 0);
1471		vrele(vp);
1472
1473		MNT_ILOCK(mp);
1474	}
1475	MNT_IUNLOCK(mp);
1476	return (allerror);
1477}
1478
1479static int
1480nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1481{
1482	struct nfsmount *nmp = VFSTONFS(mp);
1483	struct vfsquery vq;
1484	int error;
1485
1486	bzero(&vq, sizeof(vq));
1487	switch (op) {
1488#if 0
1489	case VFS_CTL_NOLOCKS:
1490		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1491 		if (req->oldptr != NULL) {
1492 			error = SYSCTL_OUT(req, &val, sizeof(val));
1493 			if (error)
1494 				return (error);
1495 		}
1496 		if (req->newptr != NULL) {
1497 			error = SYSCTL_IN(req, &val, sizeof(val));
1498 			if (error)
1499 				return (error);
1500			if (val)
1501				nmp->nm_flag |= NFSMNT_NOLOCKS;
1502			else
1503				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1504 		}
1505		break;
1506#endif
1507	case VFS_CTL_QUERY:
1508		mtx_lock(&nmp->nm_mtx);
1509		if (nmp->nm_state & NFSSTA_TIMEO)
1510			vq.vq_flags |= VQ_NOTRESP;
1511		mtx_unlock(&nmp->nm_mtx);
1512#if 0
1513		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1514		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1515			vq.vq_flags |= VQ_NOTRESPLOCK;
1516#endif
1517		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1518		break;
1519 	case VFS_CTL_TIMEO:
1520 		if (req->oldptr != NULL) {
1521 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1522 			    sizeof(nmp->nm_tprintf_initial_delay));
1523 			if (error)
1524 				return (error);
1525 		}
1526 		if (req->newptr != NULL) {
1527			error = vfs_suser(mp, req->td);
1528			if (error)
1529				return (error);
1530 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1531 			    sizeof(nmp->nm_tprintf_initial_delay));
1532 			if (error)
1533 				return (error);
1534 			if (nmp->nm_tprintf_initial_delay < 0)
1535 				nmp->nm_tprintf_initial_delay = 0;
1536 		}
1537		break;
1538	default:
1539		return (ENOTSUP);
1540	}
1541	return (0);
1542}
1543
1544/*
1545 * Extract the information needed by the nlm from the nfs vnode.
1546 */
1547static void
1548nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1549    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1550    struct timeval *timeop)
1551{
1552	struct nfsmount *nmp;
1553	struct nfsnode *np = VTONFS(vp);
1554
1555	nmp = VFSTONFS(vp->v_mount);
1556	if (fhlenp != NULL)
1557		*fhlenp = (size_t)np->n_fhp->nfh_len;
1558	if (fhp != NULL)
1559		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1560	if (sp != NULL)
1561		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1562	if (is_v3p != NULL)
1563		*is_v3p = NFS_ISV3(vp);
1564	if (sizep != NULL)
1565		*sizep = np->n_size;
1566	if (timeop != NULL) {
1567		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1568		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1569	}
1570}
1571
1572