nfs_clvfsops.c revision 221436
1107286Sobrien/*-
295349Sobrien * Copyright (c) 1989, 1993, 1995
334229Speter *	The Regents of the University of California.  All rights reserved.
434229Speter *
551408Sobrien * This code is derived from software contributed to Berkeley by
652112Sobrien * Rick Macklem at The University of Guelph.
718334Speter *
818334Speter * Redistribution and use in source and binary forms, with or without
918334Speter * modification, are permitted provided that the following conditions
1018334Speter * are met:
1118334Speter * 1. Redistributions of source code must retain the above copyright
1218334Speter *    notice, this list of conditions and the following disclaimer.
1318334Speter * 2. Redistributions in binary form must reproduce the above copyright
1418334Speter *    notice, this list of conditions and the following disclaimer in the
1518334Speter *    documentation and/or other materials provided with the distribution.
1618334Speter * 4. Neither the name of the University nor the names of its contributors
1718334Speter *    may be used to endorse or promote products derived from this software
1818334Speter *    without specific prior written permission.
1918334Speter *
2018334Speter * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2118334Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2218334Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2318334Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2418334Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2551408Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2618334Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2758478Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28114077Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2951408Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3058478Sobrien * SUCH DAMAGE.
31107286Sobrien *
3258478Sobrien *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
3395349Sobrien */
3458478Sobrien
3558478Sobrien#include <sys/cdefs.h>
3658478Sobrien__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 221436 2011-05-04 13:27:45Z ru $");
3758478Sobrien
3858478Sobrien
3958478Sobrien#include "opt_bootp.h"
4058478Sobrien#include "opt_nfsroot.h"
4158478Sobrien
4258478Sobrien#include <sys/param.h>
4358478Sobrien#include <sys/systm.h>
4458478Sobrien#include <sys/kernel.h>
4558478Sobrien#include <sys/bio.h>
4658478Sobrien#include <sys/buf.h>
4758478Sobrien#include <sys/clock.h>
4858478Sobrien#include <sys/jail.h>
49117418Skan#include <sys/limits.h>
5058478Sobrien#include <sys/lock.h>
5195349Sobrien#include <sys/malloc.h>
5258478Sobrien#include <sys/mbuf.h>
5358478Sobrien#include <sys/module.h>
5458478Sobrien#include <sys/mount.h>
5558478Sobrien#include <sys/proc.h>
5658478Sobrien#include <sys/socket.h>
5758478Sobrien#include <sys/socketvar.h>
5895349Sobrien#include <sys/sockio.h>
59107286Sobrien#include <sys/sysctl.h>
6058478Sobrien#include <sys/vnode.h>
6195349Sobrien#include <sys/signalvar.h>
6295349Sobrien
6395349Sobrien#include <vm/vm.h>
6495349Sobrien#include <vm/vm_extern.h>
6558478Sobrien#include <vm/uma.h>
6658478Sobrien
6758478Sobrien#include <net/if.h>
6858478Sobrien#include <net/route.h>
6958478Sobrien#include <netinet/in.h>
7058478Sobrien
7158478Sobrien#include <fs/nfs/nfsport.h>
7295349Sobrien#include <fs/nfsclient/nfsnode.h>
7395349Sobrien#include <fs/nfsclient/nfsmount.h>
74107286Sobrien#include <fs/nfsclient/nfs.h>
7558478Sobrien#include <nfs/nfsdiskless.h>
7658478Sobrien
7795349SobrienFEATURE(nfscl, "NFSv4 client");
7858478Sobrien
7958478Sobrienextern int nfscl_ticks;
8058478Sobrienextern struct timeval nfsboottime;
8158478Sobrienextern struct nfsstats	newnfsstats;
8258478Sobrien
8358478SobrienMALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
84107286SobrienMALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
8558478Sobrien
8658478SobrienSYSCTL_DECL(_vfs_newnfs);
8758478SobrienSYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
8858478Sobrien	&newnfsstats, nfsstats, "S,nfsstats");
8995349Sobrienstatic int nfs_ip_paranoia = 1;
9058478SobrienSYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
9158478Sobrien    &nfs_ip_paranoia, 0, "");
9258478Sobrienstatic int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
9395349SobrienSYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
9498597Sobrien        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
9595349Sobrien/* how long between console messages "nfs server foo not responding" */
9695349Sobrienstatic int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
9798597SobrienSYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
9858478Sobrien        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
9995349Sobrien
10098597Sobrienstatic int	nfs_mountroot(struct mount *);
10195349Sobrienstatic void	nfs_sec_name(char *, int *);
10258478Sobrienstatic void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
10358478Sobrien		    struct nfs_args *argp, const char *, struct ucred *,
10434229Speter		    struct thread *);
10518349Speterstatic int	mountnfs(struct nfs_args *, struct mount *,
10634229Speter		    struct sockaddr *, char *, u_char *, int, u_char *, int,
107107286Sobrien		    u_char *, int, struct vnode **, struct ucred *,
10818349Speter		    struct thread *, int);
10934229Speterstatic void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
11052112Sobrien		    struct sockaddr_storage *, int *, off_t *,
11168601Sobrien		    struct timeval *);
112107286Sobrienstatic vfs_mount_t nfs_mount;
11318349Speterstatic vfs_cmount_t nfs_cmount;
11458478Sobrienstatic vfs_unmount_t nfs_unmount;
11595810Sobrienstatic vfs_root_t nfs_root;
11695810Sobrienstatic vfs_statfs_t nfs_statfs;
11758478Sobrienstatic vfs_sync_t nfs_sync;
11858478Sobrienstatic vfs_sysctl_t nfs_sysctl;
11995349Sobrien
12095349Sobrien/*
12195349Sobrien * nfs vfs operations.
12295349Sobrien */
12318349Speterstatic struct vfsops nfs_vfsops = {
12495349Sobrien	.vfs_init =		ncl_init,
12595349Sobrien	.vfs_mount =		nfs_mount,
12695349Sobrien	.vfs_cmount =		nfs_cmount,
127117418Skan	.vfs_root =		nfs_root,
128117418Skan	.vfs_statfs =		nfs_statfs,
129117418Skan	.vfs_sync =		nfs_sync,
130117418Skan	.vfs_uninit =		ncl_uninit,
131117418Skan	.vfs_unmount =		nfs_unmount,
132117418Skan	.vfs_sysctl =		nfs_sysctl,
133117418Skan};
134117418SkanVFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
135117418Skan
136117418Skan/* So that loader and kldload(2) can find us, wherever we are.. */
13758478SobrienMODULE_VERSION(nfs, 1);
13895810SobrienMODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
13958478SobrienMODULE_DEPEND(nfs, krpc, 1, 1, 1);
14058478SobrienMODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
14158478SobrienMODULE_DEPEND(nfs, nfslock, 1, 1, 1);
14258478Sobrien
143117418Skan/*
144117418Skan * This structure is now defined in sys/nfs/nfs_diskless.c so that it
14558478Sobrien * can be shared by both NFS clients. It is declared here so that it
14658478Sobrien * will be defined for kernels built without NFS_ROOT, although it
14758478Sobrien * isn't used in that case.
14895810Sobrien */
14958478Sobrien#if !defined(NFS_ROOT) && !defined(NFSCLIENT)
15058478Sobrienstruct nfs_diskless	nfs_diskless = { { { 0 } } };
15158478Sobrienstruct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
15258478Sobrienint			nfs_diskless_valid = 0;
15395348Sobrien#endif
15495348Sobrien
15595348SobrienSYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
15695348Sobrien    &nfs_diskless_valid, 0,
15796144Sobrien    "Has the diskless struct been filled correctly");
15858478Sobrien
15958478SobrienSYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
16058478Sobrien    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
16195349Sobrien
16258478SobrienSYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
163107286Sobrien    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
16458478Sobrien    "%Ssockaddr_in", "Diskless root nfs address");
16558478Sobrien
16658478Sobrien
16758478Sobrienvoid		newnfsargs_ntoh(struct nfs_args *);
16858478Sobrienstatic int	nfs_mountdiskless(char *,
16958478Sobrien		    struct sockaddr_in *, struct nfs_args *,
17058478Sobrien		    struct thread *, struct vnode **, struct mount *);
17158478Sobrienstatic void	nfs_convert_diskless(void);
17258478Sobrienstatic void	nfs_convert_oargs(struct nfs_args *args,
17358478Sobrien		    struct onfs_args *oargs);
17458478Sobrien
17558478Sobrienint
17658478Sobriennewnfs_iosize(struct nfsmount *nmp)
17758478Sobrien{
17858478Sobrien	int iosize, maxio;
179107286Sobrien
18058478Sobrien	/* First, set the upper limit for iosize */
18158478Sobrien	if (nmp->nm_flag & NFSMNT_NFSV4) {
18256810Sobrien		maxio = NFS_MAXBSIZE;
18356810Sobrien	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
18456810Sobrien		if (nmp->nm_sotype == SOCK_DGRAM)
18595810Sobrien			maxio = NFS_MAXDGRAMDATA;
18656810Sobrien		else
18758478Sobrien			maxio = NFS_MAXBSIZE;
18852112Sobrien	} else {
18997907Sobrien		maxio = NFS_V2MAXDATA;
19097907Sobrien	}
19197907Sobrien	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
19297907Sobrien		nmp->nm_rsize = maxio;
19397907Sobrien	if (nmp->nm_rsize > MAXBSIZE)
19497907Sobrien		nmp->nm_rsize = MAXBSIZE;
19597907Sobrien	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
19696466Sobrien		nmp->nm_readdirsize = maxio;
19718349Speter	if (nmp->nm_readdirsize > nmp->nm_rsize)
19895810Sobrien		nmp->nm_readdirsize = nmp->nm_rsize;
19995810Sobrien	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
20095810Sobrien		nmp->nm_wsize = maxio;
20151408Sobrien	if (nmp->nm_wsize > MAXBSIZE)
20295810Sobrien		nmp->nm_wsize = MAXBSIZE;
20351408Sobrien
20496447Sobrien	/*
20596447Sobrien	 * Calculate the size used for io buffers.  Use the larger
20696447Sobrien	 * of the two sizes to minimise nfs requests but make sure
20795810Sobrien	 * that it is at least one VM page to avoid wasting buffer
20896447Sobrien	 * space.
20996466Sobrien	 */
21097910Sobrien	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
211107286Sobrien	iosize = imax(iosize, PAGE_SIZE);
21297910Sobrien	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
21397910Sobrien	return (iosize);
21497910Sobrien}
21597910Sobrien
21696466Sobrienstatic void
21795810Sobriennfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
21858478Sobrien{
219113420Skan
220113420Skan	args->version = NFS_ARGSVERSION;
221113420Skan	args->addr = oargs->addr;
222113420Skan	args->addrlen = oargs->addrlen;
223113420Skan	args->sotype = oargs->sotype;
224113420Skan	args->proto = oargs->proto;
225113420Skan	args->fh = oargs->fh;
226113420Skan	args->fhsize = oargs->fhsize;
227113420Skan	args->flags = oargs->flags;
22873305Sobrien	args->wsize = oargs->wsize;
229113420Skan	args->rsize = oargs->rsize;
230113420Skan	args->readdirsize = oargs->readdirsize;
231113420Skan	args->timeo = oargs->timeo;
232113420Skan	args->retrans = oargs->retrans;
233113420Skan	args->readahead = oargs->readahead;
234113420Skan	args->hostname = oargs->hostname;
235113420Skan}
236113420Skan
237113420Skanstatic void
238113420Skannfs_convert_diskless(void)
239113420Skan{
240113420Skan
24158478Sobrien	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
24218334Speter		sizeof(struct ifaliasreq));
24334229Speter	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
24495810Sobrien		sizeof(struct sockaddr_in));
24595810Sobrien	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
24693263Sobrien	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
24795810Sobrien		nfsv3_diskless.root_fhsize = NFSX_MYFH;
24895810Sobrien		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
24934229Speter	} else {
25099884Sobrien		nfsv3_diskless.root_fhsize = NFSX_V2FH;
25199884Sobrien		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
25299884Sobrien	}
25399884Sobrien	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
25499884Sobrien		sizeof(struct sockaddr_in));
25599884Sobrien	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
25699884Sobrien	nfsv3_diskless.root_time = nfs_diskless.root_time;
25799884Sobrien	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
25858478Sobrien		MAXHOSTNAMELEN);
25958478Sobrien	nfs_diskless_valid = 3;
26058478Sobrien}
26158478Sobrien
26258478Sobrien/*
26358478Sobrien * nfs statfs call
26458478Sobrien */
26558478Sobrienstatic int
26658478Sobriennfs_statfs(struct mount *mp, struct statfs *sbp)
26758478Sobrien{
26858478Sobrien	struct vnode *vp;
26952112Sobrien	struct thread *td;
27058478Sobrien	struct nfsmount *nmp = VFSTONFS(mp);
27158478Sobrien	struct nfsvattr nfsva;
27258478Sobrien	struct nfsfsinfo fs;
27358478Sobrien	struct nfsstatfs sb;
27458478Sobrien	int error = 0, attrflag, gotfsinfo = 0, ret;
27558478Sobrien	struct nfsnode *np;
27658478Sobrien
27758478Sobrien	td = curthread;
27858478Sobrien
27958478Sobrien	error = vfs_busy(mp, MBF_NOWAIT);
280	if (error)
281		return (error);
282	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
283	if (error) {
284		vfs_unbusy(mp);
285		return (error);
286	}
287	vp = NFSTOV(np);
288	mtx_lock(&nmp->nm_mtx);
289	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
290		mtx_unlock(&nmp->nm_mtx);
291		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
292		    &attrflag, NULL);
293		if (!error)
294			gotfsinfo = 1;
295	} else
296		mtx_unlock(&nmp->nm_mtx);
297	if (!error)
298		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
299		    &attrflag, NULL);
300	if (attrflag == 0) {
301		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
302		    td->td_ucred, td, &nfsva, NULL);
303		if (ret) {
304			/*
305			 * Just set default values to get things going.
306			 */
307			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
308			nfsva.na_vattr.va_type = VDIR;
309			nfsva.na_vattr.va_mode = 0777;
310			nfsva.na_vattr.va_nlink = 100;
311			nfsva.na_vattr.va_uid = (uid_t)0;
312			nfsva.na_vattr.va_gid = (gid_t)0;
313			nfsva.na_vattr.va_fileid = 2;
314			nfsva.na_vattr.va_gen = 1;
315			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
316			nfsva.na_vattr.va_size = 512 * 1024;
317		}
318	}
319	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
320	if (!error) {
321	    mtx_lock(&nmp->nm_mtx);
322	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
323		nfscl_loadfsinfo(nmp, &fs);
324	    nfscl_loadsbinfo(nmp, &sb, sbp);
325	    sbp->f_iosize = newnfs_iosize(nmp);
326	    mtx_unlock(&nmp->nm_mtx);
327	    if (sbp != &mp->mnt_stat) {
328		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
329		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
330	    }
331	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
332	} else if (NFS_ISV4(vp)) {
333		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
334	}
335	vput(vp);
336	vfs_unbusy(mp);
337	return (error);
338}
339
340/*
341 * nfs version 3 fsinfo rpc call
342 */
343int
344ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
345    struct thread *td)
346{
347	struct nfsfsinfo fs;
348	struct nfsvattr nfsva;
349	int error, attrflag;
350
351	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
352	if (!error) {
353		if (attrflag)
354			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
355			    1);
356		mtx_lock(&nmp->nm_mtx);
357		nfscl_loadfsinfo(nmp, &fs);
358		mtx_unlock(&nmp->nm_mtx);
359	}
360	return (error);
361}
362
363/*
364 * Mount a remote root fs via. nfs. This depends on the info in the
365 * nfs_diskless structure that has been filled in properly by some primary
366 * bootstrap.
367 * It goes something like this:
368 * - do enough of "ifconfig" by calling ifioctl() so that the system
369 *   can talk to the server
370 * - If nfs_diskless.mygateway is filled in, use that address as
371 *   a default gateway.
372 * - build the rootfs mount point and call mountnfs() to do the rest.
373 *
374 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
375 * structure, as well as other global NFS client variables here, as
376 * nfs_mountroot() will be called once in the boot before any other NFS
377 * client activity occurs.
378 */
379static int
380nfs_mountroot(struct mount *mp)
381{
382	struct thread *td = curthread;
383	struct nfsv3_diskless *nd = &nfsv3_diskless;
384	struct socket *so;
385	struct vnode *vp;
386	struct ifreq ir;
387	int error;
388	u_long l;
389	char buf[128];
390	char *cp;
391
392#if defined(BOOTP_NFSROOT) && defined(BOOTP)
393	bootpc_init();		/* use bootp to get nfs_diskless filled in */
394#elif defined(NFS_ROOT)
395	nfs_setup_diskless();
396#endif
397
398	if (nfs_diskless_valid == 0)
399		return (-1);
400	if (nfs_diskless_valid == 1)
401		nfs_convert_diskless();
402
403	/*
404	 * XXX splnet, so networks will receive...
405	 */
406	splnet();
407
408	/*
409	 * Do enough of ifconfig(8) so that the critical net interface can
410	 * talk to the server.
411	 */
412	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
413	    td->td_ucred, td);
414	if (error)
415		panic("nfs_mountroot: socreate(%04x): %d",
416			nd->myif.ifra_addr.sa_family, error);
417
418#if 0 /* XXX Bad idea */
419	/*
420	 * We might not have been told the right interface, so we pass
421	 * over the first ten interfaces of the same kind, until we get
422	 * one of them configured.
423	 */
424
425	for (i = strlen(nd->myif.ifra_name) - 1;
426		nd->myif.ifra_name[i] >= '0' &&
427		nd->myif.ifra_name[i] <= '9';
428		nd->myif.ifra_name[i] ++) {
429		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
430		if(!error)
431			break;
432	}
433#endif
434	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
435	if (error)
436		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
437	if ((cp = getenv("boot.netif.mtu")) != NULL) {
438		ir.ifr_mtu = strtol(cp, NULL, 10);
439		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
440		freeenv(cp);
441		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
442		if (error)
443			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
444	}
445	soclose(so);
446
447	/*
448	 * If the gateway field is filled in, set it as the default route.
449	 * Note that pxeboot will set a default route of 0 if the route
450	 * is not set by the DHCP server.  Check also for a value of 0
451	 * to avoid panicking inappropriately in that situation.
452	 */
453	if (nd->mygateway.sin_len != 0 &&
454	    nd->mygateway.sin_addr.s_addr != 0) {
455		struct sockaddr_in mask, sin;
456
457		bzero((caddr_t)&mask, sizeof(mask));
458		sin = mask;
459		sin.sin_family = AF_INET;
460		sin.sin_len = sizeof(sin);
461                /* XXX MRT use table 0 for this sort of thing */
462		CURVNET_SET(TD_TO_VNET(td));
463		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
464		    (struct sockaddr *)&nd->mygateway,
465		    (struct sockaddr *)&mask,
466		    RTF_UP | RTF_GATEWAY, NULL);
467		CURVNET_RESTORE();
468		if (error)
469			panic("nfs_mountroot: RTM_ADD: %d", error);
470	}
471
472	/*
473	 * Create the rootfs mount point.
474	 */
475	nd->root_args.fh = nd->root_fh;
476	nd->root_args.fhsize = nd->root_fhsize;
477	l = ntohl(nd->root_saddr.sin_addr.s_addr);
478	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
479		(l >> 24) & 0xff, (l >> 16) & 0xff,
480		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
481	printf("NFS ROOT: %s\n", buf);
482	nd->root_args.hostname = buf;
483	if ((error = nfs_mountdiskless(buf,
484	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
485		return (error);
486	}
487
488	/*
489	 * This is not really an nfs issue, but it is much easier to
490	 * set hostname here and then let the "/etc/rc.xxx" files
491	 * mount the right /var based upon its preset value.
492	 */
493	mtx_lock(&prison0.pr_mtx);
494	strlcpy(prison0.pr_hostname, nd->my_hostnam,
495	    sizeof(prison0.pr_hostname));
496	mtx_unlock(&prison0.pr_mtx);
497	inittodr(ntohl(nd->root_time));
498	return (0);
499}
500
501/*
502 * Internal version of mount system call for diskless setup.
503 */
504static int
505nfs_mountdiskless(char *path,
506    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
507    struct vnode **vpp, struct mount *mp)
508{
509	struct sockaddr *nam;
510	int dirlen, error;
511	char *dirpath;
512
513	/*
514	 * Find the directory path in "path", which also has the server's
515	 * name/ip address in it.
516	 */
517	dirpath = strchr(path, ':');
518	if (dirpath != NULL)
519		dirlen = strlen(++dirpath);
520	else
521		dirlen = 0;
522	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
523	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
524	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
525		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
526		return (error);
527	}
528	return (0);
529}
530
531static void
532nfs_sec_name(char *sec, int *flagsp)
533{
534	if (!strcmp(sec, "krb5"))
535		*flagsp |= NFSMNT_KERB;
536	else if (!strcmp(sec, "krb5i"))
537		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
538	else if (!strcmp(sec, "krb5p"))
539		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
540}
541
542static void
543nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
544    const char *hostname, struct ucred *cred, struct thread *td)
545{
546	int s;
547	int adjsock;
548	char *p;
549
550	s = splnet();
551
552	/*
553	 * Set read-only flag if requested; otherwise, clear it if this is
554	 * an update.  If this is not an update, then either the read-only
555	 * flag is already clear, or this is a root mount and it was set
556	 * intentionally at some previous point.
557	 */
558	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
559		MNT_ILOCK(mp);
560		mp->mnt_flag |= MNT_RDONLY;
561		MNT_IUNLOCK(mp);
562	} else if (mp->mnt_flag & MNT_UPDATE) {
563		MNT_ILOCK(mp);
564		mp->mnt_flag &= ~MNT_RDONLY;
565		MNT_IUNLOCK(mp);
566	}
567
568	/*
569	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
570	 * no sense in that context.  Also, set up appropriate retransmit
571	 * and soft timeout behavior.
572	 */
573	if (argp->sotype == SOCK_STREAM) {
574		nmp->nm_flag &= ~NFSMNT_NOCONN;
575		nmp->nm_timeo = NFS_MAXTIMEO;
576		if ((argp->flags & NFSMNT_NFSV4) != 0)
577			nmp->nm_retry = INT_MAX;
578		else
579			nmp->nm_retry = NFS_RETRANS_TCP;
580	}
581
582	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
583	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
584		argp->flags &= ~NFSMNT_RDIRPLUS;
585		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
586	}
587
588	/* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */
589	if ((argp->flags & NFSMNT_NFSV4) != 0) {
590		argp->flags &= ~NFSMNT_RESVPORT;
591		nmp->nm_flag &= ~NFSMNT_RESVPORT;
592	}
593
594	/* Re-bind if rsrvd port requested and wasn't on one */
595	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
596		  && (argp->flags & NFSMNT_RESVPORT);
597	/* Also re-bind if we're switching to/from a connected UDP socket */
598	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
599		    (argp->flags & NFSMNT_NOCONN));
600
601	/* Update flags atomically.  Don't change the lock bits. */
602	nmp->nm_flag = argp->flags | nmp->nm_flag;
603	splx(s);
604
605	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
606		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
607		if (nmp->nm_timeo < NFS_MINTIMEO)
608			nmp->nm_timeo = NFS_MINTIMEO;
609		else if (nmp->nm_timeo > NFS_MAXTIMEO)
610			nmp->nm_timeo = NFS_MAXTIMEO;
611	}
612
613	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
614		nmp->nm_retry = argp->retrans;
615		if (nmp->nm_retry > NFS_MAXREXMIT)
616			nmp->nm_retry = NFS_MAXREXMIT;
617	}
618
619	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
620		nmp->nm_wsize = argp->wsize;
621		/* Round down to multiple of blocksize */
622		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
623		if (nmp->nm_wsize <= 0)
624			nmp->nm_wsize = NFS_FABLKSIZE;
625	}
626
627	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
628		nmp->nm_rsize = argp->rsize;
629		/* Round down to multiple of blocksize */
630		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
631		if (nmp->nm_rsize <= 0)
632			nmp->nm_rsize = NFS_FABLKSIZE;
633	}
634
635	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
636		nmp->nm_readdirsize = argp->readdirsize;
637	}
638
639	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
640		nmp->nm_acregmin = argp->acregmin;
641	else
642		nmp->nm_acregmin = NFS_MINATTRTIMO;
643	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
644		nmp->nm_acregmax = argp->acregmax;
645	else
646		nmp->nm_acregmax = NFS_MAXATTRTIMO;
647	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
648		nmp->nm_acdirmin = argp->acdirmin;
649	else
650		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
651	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
652		nmp->nm_acdirmax = argp->acdirmax;
653	else
654		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
655	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
656		nmp->nm_acdirmin = nmp->nm_acdirmax;
657	if (nmp->nm_acregmin > nmp->nm_acregmax)
658		nmp->nm_acregmin = nmp->nm_acregmax;
659
660	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
661		if (argp->readahead <= NFS_MAXRAHEAD)
662			nmp->nm_readahead = argp->readahead;
663		else
664			nmp->nm_readahead = NFS_MAXRAHEAD;
665	}
666	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
667		if (argp->wcommitsize < nmp->nm_wsize)
668			nmp->nm_wcommitsize = nmp->nm_wsize;
669		else
670			nmp->nm_wcommitsize = argp->wcommitsize;
671	}
672
673	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
674		    (nmp->nm_soproto != argp->proto));
675
676	if (nmp->nm_client != NULL && adjsock) {
677		int haslock = 0, error = 0;
678
679		if (nmp->nm_sotype == SOCK_STREAM) {
680			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
681			if (!error)
682				haslock = 1;
683		}
684		if (!error) {
685		    newnfs_disconnect(&nmp->nm_sockreq);
686		    if (haslock)
687			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
688		    nmp->nm_sotype = argp->sotype;
689		    nmp->nm_soproto = argp->proto;
690		    if (nmp->nm_sotype == SOCK_DGRAM)
691			while (newnfs_connect(nmp, &nmp->nm_sockreq,
692			    cred, td, 0)) {
693				printf("newnfs_args: retrying connect\n");
694				(void) nfs_catnap(PSOCK, 0, "newnfscon");
695			}
696		}
697	} else {
698		nmp->nm_sotype = argp->sotype;
699		nmp->nm_soproto = argp->proto;
700	}
701
702	if (hostname != NULL) {
703		strlcpy(nmp->nm_hostname, hostname,
704		    sizeof(nmp->nm_hostname));
705		p = strchr(nmp->nm_hostname, ':');
706		if (p != NULL)
707			*p = '\0';
708	}
709}
710
711static const char *nfs_opts[] = { "from", "nfs_args",
712    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
713    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
714    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
715    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
716    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
717    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
718    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
719    "negnametimeo", "nocto",
720    NULL };
721
722/*
723 * VFS Operations.
724 *
725 * mount system call
726 * It seems a bit dumb to copyinstr() the host and path here and then
727 * bcopy() them in mountnfs(), but I wanted to detect errors before
728 * doing the sockargs() call because sockargs() allocates an mbuf and
729 * an error after that means that I have to release the mbuf.
730 */
731/* ARGSUSED */
732static int
733nfs_mount(struct mount *mp)
734{
735	struct nfs_args args = {
736	    .version = NFS_ARGSVERSION,
737	    .addr = NULL,
738	    .addrlen = sizeof (struct sockaddr_in),
739	    .sotype = SOCK_STREAM,
740	    .proto = 0,
741	    .fh = NULL,
742	    .fhsize = 0,
743	    .flags = NFSMNT_RESVPORT,
744	    .wsize = NFS_WSIZE,
745	    .rsize = NFS_RSIZE,
746	    .readdirsize = NFS_READDIRSIZE,
747	    .timeo = 10,
748	    .retrans = NFS_RETRANS,
749	    .readahead = NFS_DEFRAHEAD,
750	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
751	    .hostname = NULL,
752	    .acregmin = NFS_MINATTRTIMO,
753	    .acregmax = NFS_MAXATTRTIMO,
754	    .acdirmin = NFS_MINDIRATTRTIMO,
755	    .acdirmax = NFS_MAXDIRATTRTIMO,
756	};
757	int error = 0, ret, len;
758	struct sockaddr *nam = NULL;
759	struct vnode *vp;
760	struct thread *td;
761	char hst[MNAMELEN];
762	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
763	char *opt, *name, *secname;
764	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
765	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
766	size_t hstlen;
767
768	has_nfs_args_opt = 0;
769	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
770		error = EINVAL;
771		goto out;
772	}
773
774	td = curthread;
775	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
776		error = nfs_mountroot(mp);
777		goto out;
778	}
779
780	nfscl_init();
781
782	/*
783	 * The old mount_nfs program passed the struct nfs_args
784	 * from userspace to kernel.  The new mount_nfs program
785	 * passes string options via nmount() from userspace to kernel
786	 * and we populate the struct nfs_args in the kernel.
787	 */
788	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
789		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
790		    sizeof(args));
791		if (error != 0)
792			goto out;
793
794		if (args.version != NFS_ARGSVERSION) {
795			error = EPROGMISMATCH;
796			goto out;
797		}
798		has_nfs_args_opt = 1;
799	}
800
801	/* Handle the new style options. */
802	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
803		args.flags |= NFSMNT_NOCONN;
804	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
805		args.flags |= NFSMNT_NOCONN;
806	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
807		args.flags |= NFSMNT_NOLOCKD;
808	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
809		args.flags &= ~NFSMNT_NOLOCKD;
810	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
811		args.flags |= NFSMNT_INT;
812	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
813		args.flags |= NFSMNT_RDIRPLUS;
814	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
815		args.flags |= NFSMNT_RESVPORT;
816	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
817		args.flags &= ~NFSMNT_RESVPORT;
818	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
819		args.flags |= NFSMNT_SOFT;
820	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
821		args.flags &= ~NFSMNT_SOFT;
822	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
823		args.sotype = SOCK_DGRAM;
824	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
825		args.sotype = SOCK_DGRAM;
826	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
827		args.sotype = SOCK_STREAM;
828	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
829		args.flags |= NFSMNT_NFSV3;
830	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
831		args.flags |= NFSMNT_NFSV4;
832		args.sotype = SOCK_STREAM;
833	}
834	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
835		args.flags |= NFSMNT_ALLGSSNAME;
836	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
837		args.flags |= NFSMNT_NOCTO;
838	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
839		if (opt == NULL) {
840			vfs_mount_error(mp, "illegal readdirsize");
841			error = EINVAL;
842			goto out;
843		}
844		ret = sscanf(opt, "%d", &args.readdirsize);
845		if (ret != 1 || args.readdirsize <= 0) {
846			vfs_mount_error(mp, "illegal readdirsize: %s",
847			    opt);
848			error = EINVAL;
849			goto out;
850		}
851		args.flags |= NFSMNT_READDIRSIZE;
852	}
853	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
854		if (opt == NULL) {
855			vfs_mount_error(mp, "illegal readahead");
856			error = EINVAL;
857			goto out;
858		}
859		ret = sscanf(opt, "%d", &args.readahead);
860		if (ret != 1 || args.readahead <= 0) {
861			vfs_mount_error(mp, "illegal readahead: %s",
862			    opt);
863			error = EINVAL;
864			goto out;
865		}
866		args.flags |= NFSMNT_READAHEAD;
867	}
868	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
869		if (opt == NULL) {
870			vfs_mount_error(mp, "illegal wsize");
871			error = EINVAL;
872			goto out;
873		}
874		ret = sscanf(opt, "%d", &args.wsize);
875		if (ret != 1 || args.wsize <= 0) {
876			vfs_mount_error(mp, "illegal wsize: %s",
877			    opt);
878			error = EINVAL;
879			goto out;
880		}
881		args.flags |= NFSMNT_WSIZE;
882	}
883	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
884		if (opt == NULL) {
885			vfs_mount_error(mp, "illegal rsize");
886			error = EINVAL;
887			goto out;
888		}
889		ret = sscanf(opt, "%d", &args.rsize);
890		if (ret != 1 || args.rsize <= 0) {
891			vfs_mount_error(mp, "illegal wsize: %s",
892			    opt);
893			error = EINVAL;
894			goto out;
895		}
896		args.flags |= NFSMNT_RSIZE;
897	}
898	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
899		if (opt == NULL) {
900			vfs_mount_error(mp, "illegal retrans");
901			error = EINVAL;
902			goto out;
903		}
904		ret = sscanf(opt, "%d", &args.retrans);
905		if (ret != 1 || args.retrans <= 0) {
906			vfs_mount_error(mp, "illegal retrans: %s",
907			    opt);
908			error = EINVAL;
909			goto out;
910		}
911		args.flags |= NFSMNT_RETRANS;
912	}
913	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
914		ret = sscanf(opt, "%d", &args.acregmin);
915		if (ret != 1 || args.acregmin < 0) {
916			vfs_mount_error(mp, "illegal acregmin: %s",
917			    opt);
918			error = EINVAL;
919			goto out;
920		}
921		args.flags |= NFSMNT_ACREGMIN;
922	}
923	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
924		ret = sscanf(opt, "%d", &args.acregmax);
925		if (ret != 1 || args.acregmax < 0) {
926			vfs_mount_error(mp, "illegal acregmax: %s",
927			    opt);
928			error = EINVAL;
929			goto out;
930		}
931		args.flags |= NFSMNT_ACREGMAX;
932	}
933	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
934		ret = sscanf(opt, "%d", &args.acdirmin);
935		if (ret != 1 || args.acdirmin < 0) {
936			vfs_mount_error(mp, "illegal acdirmin: %s",
937			    opt);
938			error = EINVAL;
939			goto out;
940		}
941		args.flags |= NFSMNT_ACDIRMIN;
942	}
943	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
944		ret = sscanf(opt, "%d", &args.acdirmax);
945		if (ret != 1 || args.acdirmax < 0) {
946			vfs_mount_error(mp, "illegal acdirmax: %s",
947			    opt);
948			error = EINVAL;
949			goto out;
950		}
951		args.flags |= NFSMNT_ACDIRMAX;
952	}
953	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
954		ret = sscanf(opt, "%d", &args.timeo);
955		if (ret != 1 || args.timeo <= 0) {
956			vfs_mount_error(mp, "illegal timeout: %s",
957			    opt);
958			error = EINVAL;
959			goto out;
960		}
961		args.flags |= NFSMNT_TIMEO;
962	}
963	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
964	    == 0) {
965		ret = sscanf(opt, "%d", &negnametimeo);
966		if (ret != 1 || negnametimeo < 0) {
967			vfs_mount_error(mp, "illegal negnametimeo: %s",
968			    opt);
969			error = EINVAL;
970			goto out;
971		}
972	}
973	if (vfs_getopt(mp->mnt_optnew, "sec",
974		(void **) &secname, NULL) == 0)
975		nfs_sec_name(secname, &args.flags);
976
977	if (mp->mnt_flag & MNT_UPDATE) {
978		struct nfsmount *nmp = VFSTONFS(mp);
979
980		if (nmp == NULL) {
981			error = EIO;
982			goto out;
983		}
984		/*
985		 * When doing an update, we can't change version,
986		 * security, switch lockd strategies or change cookie
987		 * translation
988		 */
989		args.flags = (args.flags &
990		    ~(NFSMNT_NFSV3 |
991		      NFSMNT_NFSV4 |
992		      NFSMNT_KERB |
993		      NFSMNT_INTEGRITY |
994		      NFSMNT_PRIVACY |
995		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
996		    (nmp->nm_flag &
997			(NFSMNT_NFSV3 |
998			 NFSMNT_NFSV4 |
999			 NFSMNT_KERB |
1000			 NFSMNT_INTEGRITY |
1001			 NFSMNT_PRIVACY |
1002			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1003		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1004		goto out;
1005	}
1006
1007	/*
1008	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1009	 * or no-connection mode for those protocols that support
1010	 * no-connection mode (the flag will be cleared later for protocols
1011	 * that do not support no-connection mode).  This will allow a client
1012	 * to receive replies from a different IP then the request was
1013	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1014	 * not 0.
1015	 */
1016	if (nfs_ip_paranoia == 0)
1017		args.flags |= NFSMNT_NOCONN;
1018
1019	if (has_nfs_args_opt != 0) {
1020		/*
1021		 * In the 'nfs_args' case, the pointers in the args
1022		 * structure are in userland - we copy them in here.
1023		 */
1024		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1025			vfs_mount_error(mp, "Bad file handle");
1026			error = EINVAL;
1027			goto out;
1028		}
1029		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1030		    args.fhsize);
1031		if (error != 0)
1032			goto out;
1033		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1034		if (error != 0)
1035			goto out;
1036		bzero(&hst[hstlen], MNAMELEN - hstlen);
1037		args.hostname = hst;
1038		/* sockargs() call must be after above copyin() calls */
1039		error = getsockaddr(&nam, (caddr_t)args.addr,
1040		    args.addrlen);
1041		if (error != 0)
1042			goto out;
1043	} else {
1044		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1045		    &args.fhsize) == 0) {
1046			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1047				vfs_mount_error(mp, "Bad file handle");
1048				error = EINVAL;
1049				goto out;
1050			}
1051			bcopy(args.fh, nfh, args.fhsize);
1052		} else {
1053			args.fhsize = 0;
1054		}
1055		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1056		    (void **)&args.hostname, &len);
1057		if (args.hostname == NULL) {
1058			vfs_mount_error(mp, "Invalid hostname");
1059			error = EINVAL;
1060			goto out;
1061		}
1062		bcopy(args.hostname, hst, MNAMELEN);
1063		hst[MNAMELEN - 1] = '\0';
1064	}
1065
1066	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1067		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1068	else
1069		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1070	srvkrbnamelen = strlen(srvkrbname);
1071
1072	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1073		strlcpy(krbname, name, sizeof (krbname));
1074	else
1075		krbname[0] = '\0';
1076	krbnamelen = strlen(krbname);
1077
1078	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1079		strlcpy(dirpath, name, sizeof (dirpath));
1080	else
1081		dirpath[0] = '\0';
1082	dirlen = strlen(dirpath);
1083
1084	if (has_nfs_args_opt == 0 && vfs_getopt(mp->mnt_optnew, "addr",
1085	    (void **)&args.addr, &args.addrlen) == 0) {
1086		if (args.addrlen > SOCK_MAXADDRLEN) {
1087			error = ENAMETOOLONG;
1088			goto out;
1089		}
1090		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1091		bcopy(args.addr, nam, args.addrlen);
1092		nam->sa_len = args.addrlen;
1093	}
1094
1095	args.fh = nfh;
1096	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1097	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1098	    negnametimeo);
1099out:
1100	if (!error) {
1101		MNT_ILOCK(mp);
1102		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1103		MNT_IUNLOCK(mp);
1104	}
1105	return (error);
1106}
1107
1108
1109/*
1110 * VFS Operations.
1111 *
1112 * mount system call
1113 * It seems a bit dumb to copyinstr() the host and path here and then
1114 * bcopy() them in mountnfs(), but I wanted to detect errors before
1115 * doing the sockargs() call because sockargs() allocates an mbuf and
1116 * an error after that means that I have to release the mbuf.
1117 */
1118/* ARGSUSED */
1119static int
1120nfs_cmount(struct mntarg *ma, void *data, int flags)
1121{
1122	int error;
1123	struct nfs_args args;
1124
1125	error = copyin(data, &args, sizeof (struct nfs_args));
1126	if (error)
1127		return error;
1128
1129	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1130
1131	error = kernel_mount(ma, flags);
1132	return (error);
1133}
1134
1135/*
1136 * Common code for mount and mountroot
1137 */
1138static int
1139mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1140    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1141    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1142    struct ucred *cred, struct thread *td, int negnametimeo)
1143{
1144	struct nfsmount *nmp;
1145	struct nfsnode *np;
1146	int error, trycnt, ret;
1147	struct nfsvattr nfsva;
1148	static u_int64_t clval = 0;
1149
1150	if (mp->mnt_flag & MNT_UPDATE) {
1151		nmp = VFSTONFS(mp);
1152		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1153		FREE(nam, M_SONAME);
1154		return (0);
1155	} else {
1156		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1157		    krbnamelen + dirlen + srvkrbnamelen + 2,
1158		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1159		TAILQ_INIT(&nmp->nm_bufq);
1160		if (clval == 0)
1161			clval = (u_int64_t)nfsboottime.tv_sec;
1162		nmp->nm_clval = clval++;
1163		nmp->nm_krbnamelen = krbnamelen;
1164		nmp->nm_dirpathlen = dirlen;
1165		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1166		if (td->td_ucred->cr_uid != (uid_t)0) {
1167			/*
1168			 * nm_uid is used to get KerberosV credentials for
1169			 * the nfsv4 state handling operations if there is
1170			 * no host based principal set. Use the uid of
1171			 * this user if not root, since they are doing the
1172			 * mount. I don't think setting this for root will
1173			 * work, since root normally does not have user
1174			 * credentials in a credentials cache.
1175			 */
1176			nmp->nm_uid = td->td_ucred->cr_uid;
1177		} else {
1178			/*
1179			 * Just set to -1, so it won't be used.
1180			 */
1181			nmp->nm_uid = (uid_t)-1;
1182		}
1183
1184		/* Copy and null terminate all the names */
1185		if (nmp->nm_krbnamelen > 0) {
1186			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1187			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1188		}
1189		if (nmp->nm_dirpathlen > 0) {
1190			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1191			    nmp->nm_dirpathlen);
1192			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1193			    + 1] = '\0';
1194		}
1195		if (nmp->nm_srvkrbnamelen > 0) {
1196			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1197			    nmp->nm_srvkrbnamelen);
1198			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1199			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1200		}
1201		nmp->nm_sockreq.nr_cred = crhold(cred);
1202		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1203		mp->mnt_data = nmp;
1204		nmp->nm_getinfo = nfs_getnlminfo;
1205		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1206	}
1207	vfs_getnewfsid(mp);
1208	nmp->nm_mountp = mp;
1209	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1210	nmp->nm_negnametimeo = negnametimeo;
1211
1212	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1213
1214	/*
1215	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1216	 * high, depending on whether we end up with negative offsets in
1217	 * the client or server somewhere.  2GB-1 may be safer.
1218	 *
1219	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1220	 * that we can handle until we find out otherwise.
1221	 * XXX Our "safe" limit on the client is what we can store in our
1222	 * buffer cache using signed(!) block numbers.
1223	 */
1224	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1225		nmp->nm_maxfilesize = 0xffffffffLL;
1226	else
1227		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1228
1229	nmp->nm_timeo = NFS_TIMEO;
1230	nmp->nm_retry = NFS_RETRANS;
1231	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1232		nmp->nm_wsize = NFS_WSIZE;
1233		nmp->nm_rsize = NFS_RSIZE;
1234		nmp->nm_readdirsize = NFS_READDIRSIZE;
1235	}
1236	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1237	nmp->nm_numgrps = NFS_MAXGRPS;
1238	nmp->nm_readahead = NFS_DEFRAHEAD;
1239	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1240	if (nmp->nm_tprintf_delay < 0)
1241		nmp->nm_tprintf_delay = 0;
1242	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1243	if (nmp->nm_tprintf_initial_delay < 0)
1244		nmp->nm_tprintf_initial_delay = 0;
1245	nmp->nm_fhsize = argp->fhsize;
1246	if (nmp->nm_fhsize > 0)
1247		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1248	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1249	nmp->nm_nam = nam;
1250	/* Set up the sockets and per-host congestion */
1251	nmp->nm_sotype = argp->sotype;
1252	nmp->nm_soproto = argp->proto;
1253	nmp->nm_sockreq.nr_prog = NFS_PROG;
1254	if ((argp->flags & NFSMNT_NFSV4))
1255		nmp->nm_sockreq.nr_vers = NFS_VER4;
1256	else if ((argp->flags & NFSMNT_NFSV3))
1257		nmp->nm_sockreq.nr_vers = NFS_VER3;
1258	else
1259		nmp->nm_sockreq.nr_vers = NFS_VER2;
1260
1261
1262	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1263		goto bad;
1264
1265	/*
1266	 * A reference count is needed on the nfsnode representing the
1267	 * remote root.  If this object is not persistent, then backward
1268	 * traversals of the mount point (i.e. "..") will not work if
1269	 * the nfsnode gets flushed out of the cache. Ufs does not have
1270	 * this problem, because one can identify root inodes by their
1271	 * number == ROOTINO (2).
1272	 */
1273	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1274	    nmp->nm_dirpathlen > 0) {
1275		/*
1276		 * If the fhsize on the mount point == 0 for V4, the mount
1277		 * path needs to be looked up.
1278		 */
1279		trycnt = 3;
1280		do {
1281			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1282			    cred, td);
1283			if (error)
1284				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1285		} while (error && --trycnt > 0);
1286		if (error) {
1287			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1288			goto bad;
1289		}
1290	}
1291	if (nmp->nm_fhsize > 0) {
1292		/*
1293		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1294		 * non-zero for the root vnode. f_iosize will be set correctly
1295		 * by nfs_statfs() before any I/O occurs.
1296		 */
1297		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1298		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1299		    LK_EXCLUSIVE);
1300		if (error)
1301			goto bad;
1302		*vpp = NFSTOV(np);
1303
1304		/*
1305		 * Get file attributes and transfer parameters for the
1306		 * mountpoint.  This has the side effect of filling in
1307		 * (*vpp)->v_type with the correct value.
1308		 */
1309		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1310		    cred, td, &nfsva, NULL);
1311		if (ret) {
1312			/*
1313			 * Just set default values to get things going.
1314			 */
1315			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1316			nfsva.na_vattr.va_type = VDIR;
1317			nfsva.na_vattr.va_mode = 0777;
1318			nfsva.na_vattr.va_nlink = 100;
1319			nfsva.na_vattr.va_uid = (uid_t)0;
1320			nfsva.na_vattr.va_gid = (gid_t)0;
1321			nfsva.na_vattr.va_fileid = 2;
1322			nfsva.na_vattr.va_gen = 1;
1323			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1324			nfsva.na_vattr.va_size = 512 * 1024;
1325		}
1326		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1327		if (argp->flags & NFSMNT_NFSV3)
1328			ncl_fsinfo(nmp, *vpp, cred, td);
1329
1330		/*
1331		 * Lose the lock but keep the ref.
1332		 */
1333		VOP_UNLOCK(*vpp, 0);
1334		return (0);
1335	}
1336	error = EIO;
1337
1338bad:
1339	newnfs_disconnect(&nmp->nm_sockreq);
1340	crfree(nmp->nm_sockreq.nr_cred);
1341	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1342	mtx_destroy(&nmp->nm_mtx);
1343	FREE(nmp, M_NEWNFSMNT);
1344	FREE(nam, M_SONAME);
1345	return (error);
1346}
1347
1348/*
1349 * unmount system call
1350 */
1351static int
1352nfs_unmount(struct mount *mp, int mntflags)
1353{
1354	struct thread *td;
1355	struct nfsmount *nmp;
1356	int error, flags = 0, trycnt = 0;
1357
1358	td = curthread;
1359
1360	if (mntflags & MNT_FORCE)
1361		flags |= FORCECLOSE;
1362	nmp = VFSTONFS(mp);
1363	/*
1364	 * Goes something like this..
1365	 * - Call vflush() to clear out vnodes for this filesystem
1366	 * - Close the socket
1367	 * - Free up the data structures
1368	 */
1369	/* In the forced case, cancel any outstanding requests. */
1370	if (mntflags & MNT_FORCE) {
1371		error = newnfs_nmcancelreqs(nmp);
1372		if (error)
1373			goto out;
1374		/* For a forced close, get rid of the renew thread now */
1375		nfscl_umount(nmp, td);
1376	}
1377	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1378	do {
1379		error = vflush(mp, 1, flags, td);
1380		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1381			(void) nfs_catnap(PSOCK, error, "newndm");
1382	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1383	if (error)
1384		goto out;
1385
1386	/*
1387	 * We are now committed to the unmount.
1388	 */
1389	if ((mntflags & MNT_FORCE) == 0)
1390		nfscl_umount(nmp, td);
1391	newnfs_disconnect(&nmp->nm_sockreq);
1392	crfree(nmp->nm_sockreq.nr_cred);
1393	FREE(nmp->nm_nam, M_SONAME);
1394
1395	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1396	mtx_destroy(&nmp->nm_mtx);
1397	FREE(nmp, M_NEWNFSMNT);
1398out:
1399	return (error);
1400}
1401
1402/*
1403 * Return root of a filesystem
1404 */
1405static int
1406nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1407{
1408	struct vnode *vp;
1409	struct nfsmount *nmp;
1410	struct nfsnode *np;
1411	int error;
1412
1413	nmp = VFSTONFS(mp);
1414	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1415	if (error)
1416		return error;
1417	vp = NFSTOV(np);
1418	/*
1419	 * Get transfer parameters and attributes for root vnode once.
1420	 */
1421	mtx_lock(&nmp->nm_mtx);
1422	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1423		mtx_unlock(&nmp->nm_mtx);
1424		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1425	} else
1426		mtx_unlock(&nmp->nm_mtx);
1427	if (vp->v_type == VNON)
1428	    vp->v_type = VDIR;
1429	vp->v_vflag |= VV_ROOT;
1430	*vpp = vp;
1431	return (0);
1432}
1433
1434/*
1435 * Flush out the buffer cache
1436 */
1437/* ARGSUSED */
1438static int
1439nfs_sync(struct mount *mp, int waitfor)
1440{
1441	struct vnode *vp, *mvp;
1442	struct thread *td;
1443	int error, allerror = 0;
1444
1445	td = curthread;
1446
1447	/*
1448	 * Force stale buffer cache information to be flushed.
1449	 */
1450	MNT_ILOCK(mp);
1451loop:
1452	MNT_VNODE_FOREACH(vp, mp, mvp) {
1453		VI_LOCK(vp);
1454		MNT_IUNLOCK(mp);
1455		/* XXX Racy bv_cnt check. */
1456		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1457		    waitfor == MNT_LAZY) {
1458			VI_UNLOCK(vp);
1459			MNT_ILOCK(mp);
1460			continue;
1461		}
1462		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1463			MNT_ILOCK(mp);
1464			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1465			goto loop;
1466		}
1467		error = VOP_FSYNC(vp, waitfor, td);
1468		if (error)
1469			allerror = error;
1470		VOP_UNLOCK(vp, 0);
1471		vrele(vp);
1472
1473		MNT_ILOCK(mp);
1474	}
1475	MNT_IUNLOCK(mp);
1476	return (allerror);
1477}
1478
1479static int
1480nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1481{
1482	struct nfsmount *nmp = VFSTONFS(mp);
1483	struct vfsquery vq;
1484	int error;
1485
1486	bzero(&vq, sizeof(vq));
1487	switch (op) {
1488#if 0
1489	case VFS_CTL_NOLOCKS:
1490		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1491 		if (req->oldptr != NULL) {
1492 			error = SYSCTL_OUT(req, &val, sizeof(val));
1493 			if (error)
1494 				return (error);
1495 		}
1496 		if (req->newptr != NULL) {
1497 			error = SYSCTL_IN(req, &val, sizeof(val));
1498 			if (error)
1499 				return (error);
1500			if (val)
1501				nmp->nm_flag |= NFSMNT_NOLOCKS;
1502			else
1503				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1504 		}
1505		break;
1506#endif
1507	case VFS_CTL_QUERY:
1508		mtx_lock(&nmp->nm_mtx);
1509		if (nmp->nm_state & NFSSTA_TIMEO)
1510			vq.vq_flags |= VQ_NOTRESP;
1511		mtx_unlock(&nmp->nm_mtx);
1512#if 0
1513		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1514		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1515			vq.vq_flags |= VQ_NOTRESPLOCK;
1516#endif
1517		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1518		break;
1519 	case VFS_CTL_TIMEO:
1520 		if (req->oldptr != NULL) {
1521 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1522 			    sizeof(nmp->nm_tprintf_initial_delay));
1523 			if (error)
1524 				return (error);
1525 		}
1526 		if (req->newptr != NULL) {
1527			error = vfs_suser(mp, req->td);
1528			if (error)
1529				return (error);
1530 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1531 			    sizeof(nmp->nm_tprintf_initial_delay));
1532 			if (error)
1533 				return (error);
1534 			if (nmp->nm_tprintf_initial_delay < 0)
1535 				nmp->nm_tprintf_initial_delay = 0;
1536 		}
1537		break;
1538	default:
1539		return (ENOTSUP);
1540	}
1541	return (0);
1542}
1543
1544/*
1545 * Extract the information needed by the nlm from the nfs vnode.
1546 */
1547static void
1548nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1549    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1550    struct timeval *timeop)
1551{
1552	struct nfsmount *nmp;
1553	struct nfsnode *np = VTONFS(vp);
1554
1555	nmp = VFSTONFS(vp->v_mount);
1556	if (fhlenp != NULL)
1557		*fhlenp = (size_t)np->n_fhp->nfh_len;
1558	if (fhp != NULL)
1559		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1560	if (sp != NULL)
1561		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1562	if (is_v3p != NULL)
1563		*is_v3p = NFS_ISV3(vp);
1564	if (sizep != NULL)
1565		*sizep = np->n_size;
1566	if (timeop != NULL) {
1567		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1568		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1569	}
1570}
1571
1572