nfs_clvfsops.c revision 192145
117683Spst/*-
217683Spst * Copyright (c) 1989, 1993, 1995
317683Spst *	The Regents of the University of California.  All rights reserved.
417683Spst *
517683Spst * This code is derived from software contributed to Berkeley by
617683Spst * Rick Macklem at The University of Guelph.
717683Spst *
817683Spst * Redistribution and use in source and binary forms, with or without
917683Spst * modification, are permitted provided that the following conditions
1017683Spst * are met:
1117683Spst * 1. Redistributions of source code must retain the above copyright
1217683Spst *    notice, this list of conditions and the following disclaimer.
1317683Spst * 2. Redistributions in binary form must reproduce the above copyright
1417683Spst *    notice, this list of conditions and the following disclaimer in the
1517683Spst *    documentation and/or other materials provided with the distribution.
1617683Spst * 4. Neither the name of the University nor the names of its contributors
1717683Spst *    may be used to endorse or promote products derived from this software
1817683Spst *    without specific prior written permission.
1917683Spst *
2017683Spst * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2117683Spst * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2217683Spst * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2317683Spst * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24127664Sbms * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25214518Srpaulo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2617683Spst * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2717683Spst * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2875107Sfenner * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2975107Sfenner * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3075107Sfenner * SUCH DAMAGE.
3175107Sfenner *
32214518Srpaulo *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33214518Srpaulo */
34214518Srpaulo
35214518Srpaulo#include <sys/cdefs.h>
36214518Srpaulo__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 192145 2009-05-15 16:03:11Z rmacklem $");
37214518Srpaulo
38214518Srpaulo
39214518Srpaulo#include "opt_bootp.h"
40214518Srpaulo#include "opt_nfsroot.h"
41214518Srpaulo
42214518Srpaulo#include <sys/param.h>
43214518Srpaulo#include <sys/systm.h>
44214518Srpaulo#include <sys/kernel.h>
45214518Srpaulo#include <sys/bio.h>
4617683Spst#include <sys/buf.h>
4717683Spst#include <sys/clock.h>
4817683Spst#include <sys/lock.h>
49146768Ssam#include <sys/malloc.h>
5017683Spst#include <sys/mbuf.h>
5175107Sfenner#include <sys/module.h>
5275107Sfenner#include <sys/mount.h>
5317683Spst#include <sys/proc.h>
5417683Spst#include <sys/socket.h>
5517683Spst#include <sys/socketvar.h>
5617683Spst#include <sys/sockio.h>
5717683Spst#include <sys/sysctl.h>
5817683Spst#include <sys/vnode.h>
5917683Spst#include <sys/signalvar.h>
6017683Spst
6117683Spst#include <vm/vm.h>
6217683Spst#include <vm/vm_extern.h>
6317683Spst#include <vm/uma.h>
6417683Spst
65146768Ssam#include <net/if.h>
66146768Ssam#include <net/route.h>
67146768Ssam#include <netinet/in.h>
68146768Ssam
6917683Spst#include <fs/nfs/nfsport.h>
70190225Srpaulo#include <fs/nfsclient/nfsnode.h>
71190225Srpaulo#include <fs/nfsclient/nfsmount.h>
72190225Srpaulo#include <fs/nfsclient/nfs.h>
73190225Srpaulo#include <fs/nfsclient/nfsdiskless.h>
74146768Ssam
75146768Ssamextern int nfscl_ticks;
76146768Ssamextern struct timeval nfsboottime;
7717683Spstextern struct nfsstats	newnfsstats;
7817683Spst
7917683SpstMALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
80146768SsamMALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
81146768Ssam
82146768SsamSYSCTL_DECL(_vfs_newnfs);
83146768SsamSYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
84146768Ssam	&newnfsstats, nfsstats, "S,nfsstats");
85146768Ssamstatic int nfs_ip_paranoia = 1;
86146768SsamSYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
87146768Ssam    &nfs_ip_paranoia, 0, "");
88146768Ssamstatic int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
8917683SpstSYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
9017683Spst        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
9117683Spst/* how long between console messages "nfs server foo not responding" */
9217683Spststatic int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
9317683SpstSYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
9417683Spst        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
9517683Spst
9617683Spststatic void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
9717683Spst		    struct nfs_args *argp, struct ucred *, struct thread *);
9817683Spststatic int	mountnfs(struct nfs_args *, struct mount *,
9917683Spst		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
10017683Spst		    struct vnode **, struct ucred *, struct thread *);
10117683Spststatic vfs_mount_t nfs_mount;
10217683Spststatic vfs_cmount_t nfs_cmount;
10317683Spststatic vfs_unmount_t nfs_unmount;
10417683Spststatic vfs_root_t nfs_root;
10517683Spststatic vfs_statfs_t nfs_statfs;
10617683Spststatic vfs_sync_t nfs_sync;
10717683Spststatic vfs_sysctl_t nfs_sysctl;
10817683Spst
10917683Spst/*
11017683Spst * nfs vfs operations.
11117683Spst */
11217683Spststatic struct vfsops nfs_vfsops = {
11317683Spst	.vfs_init =		ncl_init,
11417683Spst	.vfs_mount =		nfs_mount,
11517683Spst	.vfs_cmount =		nfs_cmount,
11617683Spst	.vfs_root =		nfs_root,
11717683Spst	.vfs_statfs =		nfs_statfs,
11817683Spst	.vfs_sync =		nfs_sync,
11917683Spst	.vfs_uninit =		ncl_uninit,
12017683Spst	.vfs_unmount =		nfs_unmount,
12117683Spst	.vfs_sysctl =		nfs_sysctl,
12217683Spst};
12317683SpstVFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
12417683Spst
12517683Spst/* So that loader and kldload(2) can find us, wherever we are.. */
12617683SpstMODULE_VERSION(newnfs, 1);
12717683Spst
12817683Spst/*
12917683Spst * This structure must be filled in by a primary bootstrap or bootstrap
13017683Spst * server for a diskless/dataless machine. It is initialized below just
13117683Spst * to ensure that it is allocated to initialized data (.data not .bss).
13217683Spst */
13317683Spststruct nfs_diskless newnfs_diskless = { { { 0 } } };
13417683Spststruct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
13517683Spstint newnfs_diskless_valid = 0;
13617683Spst
13717683SpstSYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
13817683Spst    &newnfs_diskless_valid, 0,
13917683Spst    "Has the diskless struct been filled correctly");
14017683Spst
14117683SpstSYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
14217683Spst    newnfsv3_diskless.root_hostnam, 0, "Path to nfs root");
14317683Spst
14417683SpstSYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
14517683Spst    &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
14617683Spst    "%Ssockaddr_in", "Diskless root nfs address");
14717683Spst
14817683Spst
14917683Spstvoid		newnfsargs_ntoh(struct nfs_args *);
15017683Spststatic int	nfs_mountdiskless(char *,
15117683Spst		    struct sockaddr_in *, struct nfs_args *,
15217683Spst		    struct thread *, struct vnode **, struct mount *);
15317683Spststatic void	nfs_convert_diskless(void);
15417683Spststatic void	nfs_convert_oargs(struct nfs_args *args,
155241231Sdelphij		    struct onfs_args *oargs);
15617683Spst
15717683Spstint
158241231Sdelphijnewnfs_iosize(struct nfsmount *nmp)
15917683Spst{
16017683Spst	int iosize, maxio;
16117683Spst
16217683Spst	/* First, set the upper limit for iosize */
16317683Spst	if (nmp->nm_flag & NFSMNT_NFSV4) {
16417683Spst		maxio = NFS_MAXBSIZE;
16517683Spst	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
16617683Spst		if (nmp->nm_sotype == SOCK_DGRAM)
16717683Spst			maxio = NFS_MAXDGRAMDATA;
16817683Spst		else
16917683Spst			maxio = NFS_MAXBSIZE;
17017683Spst	} else {
17117683Spst		maxio = NFS_V2MAXDATA;
17217683Spst	}
17317683Spst	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
17417683Spst		nmp->nm_rsize = maxio;
17517683Spst	if (nmp->nm_rsize > MAXBSIZE)
17617683Spst		nmp->nm_rsize = MAXBSIZE;
17717683Spst	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
17817683Spst		nmp->nm_readdirsize = maxio;
17917683Spst	if (nmp->nm_readdirsize > nmp->nm_rsize)
18017683Spst		nmp->nm_readdirsize = nmp->nm_rsize;
18117683Spst	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
18217683Spst		nmp->nm_wsize = maxio;
18317683Spst	if (nmp->nm_wsize > MAXBSIZE)
18417683Spst		nmp->nm_wsize = MAXBSIZE;
18517683Spst
18617683Spst	/*
18717683Spst	 * Calculate the size used for io buffers.  Use the larger
18817683Spst	 * of the two sizes to minimise nfs requests but make sure
18917683Spst	 * that it is at least one VM page to avoid wasting buffer
19017683Spst	 * space.
19117683Spst	 */
19217683Spst	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
19317683Spst	iosize = imax(iosize, PAGE_SIZE);
19417683Spst	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
19517683Spst	return (iosize);
19617683Spst}
19717683Spst
19817683Spststatic void
19917683Spstnfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
20017683Spst{
20117683Spst
20217683Spst	args->version = NFS_ARGSVERSION;
20317683Spst	args->addr = oargs->addr;
20417683Spst	args->addrlen = oargs->addrlen;
20517683Spst	args->sotype = oargs->sotype;
20617683Spst	args->proto = oargs->proto;
20717683Spst	args->fh = oargs->fh;
20817683Spst	args->fhsize = oargs->fhsize;
20917683Spst	args->flags = oargs->flags;
21017683Spst	args->wsize = oargs->wsize;
21117683Spst	args->rsize = oargs->rsize;
21217683Spst	args->readdirsize = oargs->readdirsize;
21317683Spst	args->timeo = oargs->timeo;
21417683Spst	args->retrans = oargs->retrans;
21517683Spst	args->readahead = oargs->readahead;
21617683Spst	args->hostname = oargs->hostname;
21717683Spst}
21817683Spst
21917683Spststatic void
22017683Spstnfs_convert_diskless(void)
22117683Spst{
22217683Spst
22317683Spst	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
22417683Spst	    sizeof (struct ifaliasreq));
22517683Spst	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
22617683Spst	    sizeof (struct sockaddr_in));
22717683Spst	nfs_convert_oargs(&newnfsv3_diskless.root_args,
22817683Spst	    &newnfs_diskless.root_args);
22917683Spst	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
23017683Spst		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
23117683Spst		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
23217683Spst		    NFSX_MYFH);
23317683Spst	} else {
23417683Spst		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
23517683Spst		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
23617683Spst		    NFSX_V2FH);
23717683Spst	}
23817683Spst	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
23917683Spst	    sizeof(struct sockaddr_in));
24017683Spst	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam,
24117683Spst	    MNAMELEN);
24217683Spst	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
24317683Spst	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
24417683Spst	    MAXHOSTNAMELEN);
24517683Spst	newnfs_diskless_valid = 3;
24617683Spst}
24717683Spst
24817683Spst/*
24917683Spst * nfs statfs call
25017683Spst */
25117683Spststatic int
25217683Spstnfs_statfs(struct mount *mp, struct statfs *sbp)
25317683Spst{
25417683Spst	struct vnode *vp;
25517683Spst	struct thread *td;
25617683Spst	struct nfsmount *nmp = VFSTONFS(mp);
25717683Spst	struct nfsvattr nfsva;
25817683Spst	struct nfsfsinfo fs;
25917683Spst	struct nfsstatfs sb;
26017683Spst	int error = 0, attrflag, gotfsinfo = 0, ret;
26117683Spst	struct nfsnode *np;
26217683Spst
26317683Spst	td = curthread;
26417683Spst
26517683Spst	error = vfs_busy(mp, MBF_NOWAIT);
26617683Spst	if (error)
26717683Spst		return (error);
26817683Spst	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
26917683Spst	if (error) {
27017683Spst		vfs_unbusy(mp);
27117683Spst		return (error);
27217683Spst	}
27317683Spst	vp = NFSTOV(np);
27417683Spst	mtx_lock(&nmp->nm_mtx);
27517683Spst	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
27617683Spst		mtx_unlock(&nmp->nm_mtx);
27717683Spst		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
27817683Spst		    &attrflag, NULL);
27917683Spst		if (!error)
28017683Spst			gotfsinfo = 1;
28117683Spst	} else
28217683Spst		mtx_unlock(&nmp->nm_mtx);
28317683Spst	if (!error)
28417683Spst		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
28517683Spst		    &attrflag, NULL);
28617683Spst	if (attrflag == 0) {
28717683Spst		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
28817683Spst		    td->td_ucred, td, &nfsva, NULL);
28917683Spst		if (ret) {
29017683Spst			/*
29117683Spst			 * Just set default values to get things going.
29217683Spst			 */
29317683Spst			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
29417683Spst			nfsva.na_vattr.va_type = VDIR;
29517683Spst			nfsva.na_vattr.va_mode = 0777;
29617683Spst			nfsva.na_vattr.va_nlink = 100;
29717683Spst			nfsva.na_vattr.va_uid = (uid_t)0;
29817683Spst			nfsva.na_vattr.va_gid = (gid_t)0;
29917683Spst			nfsva.na_vattr.va_fileid = 2;
30017683Spst			nfsva.na_vattr.va_gen = 1;
30117683Spst			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
30217683Spst			nfsva.na_vattr.va_size = 512 * 1024;
30317683Spst		}
30417683Spst	}
30517683Spst	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
30617683Spst	if (!error) {
30717683Spst	    mtx_lock(&nmp->nm_mtx);
30817683Spst	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
30917683Spst		nfscl_loadfsinfo(nmp, &fs);
31017683Spst	    nfscl_loadsbinfo(nmp, &sb, sbp);
31117683Spst	    sbp->f_flags = nmp->nm_flag;
31217683Spst	    sbp->f_iosize = newnfs_iosize(nmp);
31317683Spst	    mtx_unlock(&nmp->nm_mtx);
31417683Spst	    if (sbp != &mp->mnt_stat) {
31517683Spst		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
31617683Spst		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
31717683Spst	    }
31817683Spst	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
31917683Spst	} else if (NFS_ISV4(vp)) {
32017683Spst		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
32117683Spst	}
32217683Spst	vput(vp);
32317683Spst	vfs_unbusy(mp);
32417683Spst	return (error);
32517683Spst}
32617683Spst
32717683Spst/*
32817683Spst * nfs version 3 fsinfo rpc call
32917683Spst */
33017683Spstint
33117683Spstncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
33217683Spst    struct thread *td)
33317683Spst{
33417683Spst	struct nfsfsinfo fs;
33517683Spst	struct nfsvattr nfsva;
33617683Spst	int error, attrflag;
33717683Spst
33817683Spst	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
33917683Spst	if (!error) {
34017683Spst		if (attrflag)
34117683Spst			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
34217683Spst			    1);
34317683Spst		mtx_lock(&nmp->nm_mtx);
34417683Spst		nfscl_loadfsinfo(nmp, &fs);
34517683Spst		mtx_unlock(&nmp->nm_mtx);
34617683Spst	}
34717683Spst	return (error);
34817683Spst}
34917683Spst
35017683Spst/*
35117683Spst * Mount a remote root fs via. nfs. This depends on the info in the
35217683Spst * newnfs_diskless structure that has been filled in properly by some primary
35317683Spst * bootstrap.
35417683Spst * It goes something like this:
35517683Spst * - do enough of "ifconfig" by calling ifioctl() so that the system
35617683Spst *   can talk to the server
35717683Spst * - If newnfs_diskless.mygateway is filled in, use that address as
35817683Spst *   a default gateway.
35917683Spst * - build the rootfs mount point and call mountnfs() to do the rest.
36017683Spst *
36117683Spst * It is assumed to be safe to read, modify, and write the nfsv3_diskless
36217683Spst * structure, as well as other global NFS client variables here, as
36317683Spst * nfs_mountroot() will be called once in the boot before any other NFS
36417683Spst * client activity occurs.
36517683Spst */
36617683Spstint
36717683Spstncl_mountroot(struct mount *mp)
36817683Spst{
36917683Spst	struct thread *td = curthread;
37017683Spst	INIT_VPROCG(TD_TO_VPROCG(td));
37117683Spst	struct nfsv3_diskless *nd = &newnfsv3_diskless;
37217683Spst	struct socket *so;
37317683Spst	struct vnode *vp;
37417683Spst	struct ifreq ir;
37517683Spst	int error, i;
37617683Spst	u_long l;
37717683Spst	char buf[128];
37817683Spst	char *cp;
37917683Spst
38017683Spst#if defined(BOOTP_NFSROOT) && defined(BOOTP)
38117683Spst	bootpc_init();		/* use bootp to get nfs_diskless filled in */
38217683Spst#elif defined(NFS_ROOT)
38317683Spst	nfs_setup_diskless();
38417683Spst#endif
38517683Spst
38617683Spst	if (newnfs_diskless_valid == 0)
38717683Spst		return (-1);
38817683Spst	if (newnfs_diskless_valid == 1)
38917683Spst		nfs_convert_diskless();
39017683Spst
39117683Spst	/*
39217683Spst	 * XXX splnet, so networks will receive...
39317683Spst	 */
39417683Spst	splnet();
39517683Spst
39617683Spst	/*
39717683Spst	 * Do enough of ifconfig(8) so that the critical net interface can
39817683Spst	 * talk to the server.
39917683Spst	 */
40017683Spst	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
40117683Spst	    td->td_ucred, td);
40217683Spst	if (error)
40317683Spst		panic("nfs_mountroot: socreate(%04x): %d",
40417683Spst			nd->myif.ifra_addr.sa_family, error);
40517683Spst
40617683Spst#if 0 /* XXX Bad idea */
40717683Spst	/*
40817683Spst	 * We might not have been told the right interface, so we pass
40917683Spst	 * over the first ten interfaces of the same kind, until we get
41017683Spst	 * one of them configured.
41117683Spst	 */
41217683Spst
41317683Spst	for (i = strlen(nd->myif.ifra_name) - 1;
41417683Spst		nd->myif.ifra_name[i] >= '0' &&
41517683Spst		nd->myif.ifra_name[i] <= '9';
41617683Spst		nd->myif.ifra_name[i] ++) {
41717683Spst		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
41817683Spst		if(!error)
41917683Spst			break;
42017683Spst	}
42117683Spst#endif
42217683Spst	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
42317683Spst	if (error)
42417683Spst		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
42517683Spst	if ((cp = getenv("boot.netif.mtu")) != NULL) {
42617683Spst		ir.ifr_mtu = strtol(cp, NULL, 10);
42717683Spst		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
42817683Spst		freeenv(cp);
42917683Spst		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
43017683Spst		if (error)
43117683Spst			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
43217683Spst	}
43317683Spst	soclose(so);
43417683Spst
43517683Spst	/*
43617683Spst	 * If the gateway field is filled in, set it as the default route.
43717683Spst	 * Note that pxeboot will set a default route of 0 if the route
43817683Spst	 * is not set by the DHCP server.  Check also for a value of 0
43917683Spst	 * to avoid panicking inappropriately in that situation.
44017683Spst	 */
44117683Spst	if (nd->mygateway.sin_len != 0 &&
44217683Spst	    nd->mygateway.sin_addr.s_addr != 0) {
44317683Spst		struct sockaddr_in mask, sin;
44417683Spst
44517683Spst		bzero((caddr_t)&mask, sizeof(mask));
44617683Spst		sin = mask;
44717683Spst		sin.sin_family = AF_INET;
44817683Spst		sin.sin_len = sizeof(sin);
44917683Spst                /* XXX MRT use table 0 for this sort of thing */
45017683Spst		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
45117683Spst		    (struct sockaddr *)&nd->mygateway,
45217683Spst		    (struct sockaddr *)&mask,
45317683Spst		    RTF_UP | RTF_GATEWAY, NULL);
45417683Spst		if (error)
455146768Ssam			panic("nfs_mountroot: RTM_ADD: %d", error);
456146768Ssam	}
457146768Ssam
458146768Ssam	/*
459146768Ssam	 * Create the rootfs mount point.
460146768Ssam	 */
461146768Ssam	nd->root_args.fh = nd->root_fh;
462146768Ssam	nd->root_args.fhsize = nd->root_fhsize;
463146768Ssam	l = ntohl(nd->root_saddr.sin_addr.s_addr);
464146768Ssam	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
465146768Ssam		(l >> 24) & 0xff, (l >> 16) & 0xff,
46617683Spst		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
46717683Spst	printf("NFS ROOT: %s\n", buf);
46817683Spst	nd->root_args.hostname = buf;
46917683Spst	if ((error = nfs_mountdiskless(buf,
47017683Spst	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
47117683Spst		return (error);
47217683Spst	}
47317683Spst
47417683Spst	/*
47517683Spst	 * This is not really an nfs issue, but it is much easier to
47617683Spst	 * set hostname here and then let the "/etc/rc.xxx" files
47717683Spst	 * mount the right /var based upon its preset value.
47817683Spst	 */
47917683Spst	mtx_lock(&hostname_mtx);
48017683Spst	bcopy(nd->my_hostnam, V_hostname, MAXHOSTNAMELEN);
48117683Spst	V_hostname[MAXHOSTNAMELEN - 1] = '\0';
48217683Spst	for (i = 0; i < MAXHOSTNAMELEN; i++)
48317683Spst		if (V_hostname[i] == '\0')
48417683Spst			break;
48517683Spst	mtx_unlock(&hostname_mtx);
48617683Spst	inittodr(ntohl(nd->root_time));
48717683Spst	return (0);
48817683Spst}
48917683Spst
49017683Spst/*
49117683Spst * Internal version of mount system call for diskless setup.
49217683Spst */
49317683Spststatic int
49417683Spstnfs_mountdiskless(char *path,
49517683Spst    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
49617683Spst    struct vnode **vpp, struct mount *mp)
49717683Spst{
49817683Spst	struct sockaddr *nam;
499146768Ssam	int error;
500146768Ssam
501146768Ssam	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
502146768Ssam	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
503146768Ssam	    td->td_ucred, td)) != 0) {
504146768Ssam		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
505146768Ssam		return (error);
506146768Ssam	}
507146768Ssam	return (0);
508146768Ssam}
509146768Ssam
510146768Ssamstatic void
511146768Ssamnfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
512146768Ssam    struct ucred *cred, struct thread *td)
513146768Ssam{
514146768Ssam	int s;
515146768Ssam	int adjsock;
516146768Ssam
517146768Ssam	s = splnet();
518146768Ssam
51917683Spst	/*
52017683Spst	 * Set read-only flag if requested; otherwise, clear it if this is
52117683Spst	 * an update.  If this is not an update, then either the read-only
52217683Spst	 * flag is already clear, or this is a root mount and it was set
52317683Spst	 * intentionally at some previous point.
52417683Spst	 */
52517683Spst	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
52617683Spst		MNT_ILOCK(mp);
52717683Spst		mp->mnt_flag |= MNT_RDONLY;
52817683Spst		MNT_IUNLOCK(mp);
52917683Spst	} else if (mp->mnt_flag & MNT_UPDATE) {
53017683Spst		MNT_ILOCK(mp);
53117683Spst		mp->mnt_flag &= ~MNT_RDONLY;
53217683Spst		MNT_IUNLOCK(mp);
53317683Spst	}
53417683Spst
53517683Spst	/*
53617683Spst	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
53717683Spst	 * no sense in that context.  Also, set up appropriate retransmit
53817683Spst	 * and soft timeout behavior.
53917683Spst	 */
54017683Spst	if (argp->sotype == SOCK_STREAM) {
54117683Spst		nmp->nm_flag &= ~NFSMNT_NOCONN;
54217683Spst		nmp->nm_timeo = NFS_MAXTIMEO;
54317683Spst	}
54417683Spst
54517683Spst	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
54617683Spst	if ((argp->flags & NFSMNT_NFSV3) == 0)
54717683Spst		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
54817683Spst
54917683Spst	/* Also re-bind if we're switching to/from a connected UDP socket */
55017683Spst	adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) !=
55117683Spst		    (argp->flags & NFSMNT_NOCONN));
55217683Spst
55317683Spst	/* Update flags atomically.  Don't change the lock bits. */
55417683Spst	nmp->nm_flag = argp->flags | nmp->nm_flag;
55517683Spst	splx(s);
55617683Spst
55717683Spst	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
55817683Spst		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
55917683Spst		if (nmp->nm_timeo < NFS_MINTIMEO)
56017683Spst			nmp->nm_timeo = NFS_MINTIMEO;
56117683Spst		else if (nmp->nm_timeo > NFS_MAXTIMEO)
56217683Spst			nmp->nm_timeo = NFS_MAXTIMEO;
56317683Spst	}
56417683Spst
56517683Spst	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
56617683Spst		nmp->nm_retry = argp->retrans;
56717683Spst		if (nmp->nm_retry > NFS_MAXREXMIT)
56817683Spst			nmp->nm_retry = NFS_MAXREXMIT;
56917683Spst	}
57017683Spst
57117683Spst	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
57217683Spst		nmp->nm_wsize = argp->wsize;
57317683Spst		/* Round down to multiple of blocksize */
57417683Spst		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
57517683Spst		if (nmp->nm_wsize <= 0)
57617683Spst			nmp->nm_wsize = NFS_FABLKSIZE;
57717683Spst	}
57817683Spst
57917683Spst	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
58017683Spst		nmp->nm_rsize = argp->rsize;
58117683Spst		/* Round down to multiple of blocksize */
58217683Spst		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
58317683Spst		if (nmp->nm_rsize <= 0)
58417683Spst			nmp->nm_rsize = NFS_FABLKSIZE;
58517683Spst	}
58617683Spst
58717683Spst	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
58817683Spst		nmp->nm_readdirsize = argp->readdirsize;
58917683Spst	}
59017683Spst
59117683Spst	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
59217683Spst		nmp->nm_acregmin = argp->acregmin;
59317683Spst	else
59417683Spst		nmp->nm_acregmin = NFS_MINATTRTIMO;
59517683Spst	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
59617683Spst		nmp->nm_acregmax = argp->acregmax;
59717683Spst	else
59817683Spst		nmp->nm_acregmax = NFS_MAXATTRTIMO;
59917683Spst	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
60017683Spst		nmp->nm_acdirmin = argp->acdirmin;
60117683Spst	else
60217683Spst		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
60317683Spst	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
60417683Spst		nmp->nm_acdirmax = argp->acdirmax;
60517683Spst	else
60617683Spst		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
60717683Spst	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
60817683Spst		nmp->nm_acdirmin = nmp->nm_acdirmax;
60917683Spst	if (nmp->nm_acregmin > nmp->nm_acregmax)
61017683Spst		nmp->nm_acregmin = nmp->nm_acregmax;
61117683Spst
61217683Spst	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
61317683Spst		if (argp->readahead <= NFS_MAXRAHEAD)
61417683Spst			nmp->nm_readahead = argp->readahead;
61517683Spst		else
61617683Spst			nmp->nm_readahead = NFS_MAXRAHEAD;
61717683Spst	}
61817683Spst	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
61917683Spst		if (argp->wcommitsize < nmp->nm_wsize)
62017683Spst			nmp->nm_wcommitsize = nmp->nm_wsize;
62117683Spst		else
62217683Spst			nmp->nm_wcommitsize = argp->wcommitsize;
62317683Spst	}
62417683Spst
62517683Spst	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
62617683Spst		    (nmp->nm_soproto != argp->proto));
62717683Spst
62817683Spst	if (nmp->nm_client != NULL && adjsock) {
62917683Spst		int haslock = 0, error = 0;
63017683Spst
63117683Spst		if (nmp->nm_sotype == SOCK_STREAM) {
63217683Spst			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
63317683Spst			if (!error)
63417683Spst				haslock = 1;
63517683Spst		}
63617683Spst		if (!error) {
63717683Spst		    newnfs_disconnect(&nmp->nm_sockreq);
63817683Spst		    if (haslock)
63917683Spst			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
64017683Spst		    nmp->nm_sotype = argp->sotype;
64117683Spst		    nmp->nm_soproto = argp->proto;
64217683Spst		    if (nmp->nm_sotype == SOCK_DGRAM)
64317683Spst			while (newnfs_connect(nmp, &nmp->nm_sockreq,
64417683Spst			    cred, td, 0)) {
645172677Smlaier				printf("newnfs_args: retrying connect\n");
64617683Spst				(void) nfs_catnap(PSOCK, "newnfscon");
64717683Spst			}
64817683Spst		}
64917683Spst	} else {
65017683Spst		nmp->nm_sotype = argp->sotype;
65117683Spst		nmp->nm_soproto = argp->proto;
65217683Spst	}
65317683Spst}
65417683Spst
65517683Spststatic const char *nfs_opts[] = { "from", "nfs_args",
65617683Spst    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
65717683Spst    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
65817683Spst    "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
65917683Spst    "readdirsize", "soft", "hard", "mntudp", "tcp", "wsize", "rsize",
66017683Spst    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax",
66117683Spst    NULL };
66217683Spst
66317683Spst/*
66417683Spst * VFS Operations.
66517683Spst *
66617683Spst * mount system call
66717683Spst * It seems a bit dumb to copyinstr() the host and path here and then
66817683Spst * bcopy() them in mountnfs(), but I wanted to detect errors before
66917683Spst * doing the sockargs() call because sockargs() allocates an mbuf and
67017683Spst * an error after that means that I have to release the mbuf.
67117683Spst */
67217683Spst/* ARGSUSED */
67317683Spststatic int
67417683Spstnfs_mount(struct mount *mp)
67517683Spst{
67617683Spst	struct nfs_args args = {
67717683Spst	    .version = NFS_ARGSVERSION,
67817683Spst	    .addr = NULL,
67917683Spst	    .addrlen = sizeof (struct sockaddr_in),
68017683Spst	    .sotype = SOCK_STREAM,
68117683Spst	    .proto = 0,
68217683Spst	    .fh = NULL,
68317683Spst	    .fhsize = 0,
68417683Spst	    .flags = 0,
68517683Spst	    .wsize = NFS_WSIZE,
68617683Spst	    .rsize = NFS_RSIZE,
68717683Spst	    .readdirsize = NFS_READDIRSIZE,
68817683Spst	    .timeo = 10,
68917683Spst	    .retrans = NFS_RETRANS,
69017683Spst	    .readahead = NFS_DEFRAHEAD,
69117683Spst	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
69217683Spst	    .hostname = NULL,
69317683Spst	    /* args version 4 */
69417683Spst	    .acregmin = NFS_MINATTRTIMO,
69517683Spst	    .acregmax = NFS_MAXATTRTIMO,
69617683Spst	    .acdirmin = NFS_MINDIRATTRTIMO,
69717683Spst	    .acdirmax = NFS_MAXDIRATTRTIMO,
69817683Spst	    .dirlen = 0,
69917683Spst	    .krbnamelen = 0,
70017683Spst	};
70117683Spst	int error;
70217683Spst	struct sockaddr *nam;
70317683Spst	struct vnode *vp;
70417683Spst	struct thread *td;
70517683Spst	char hst[MNAMELEN];
70617683Spst	size_t len;
70717683Spst	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
70817683Spst
70917683Spst	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
71017683Spst		error = EINVAL;
71117683Spst		goto out;
71217683Spst	}
71317683Spst
71417683Spst	td = curthread;
71517683Spst	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
71617683Spst		error = ncl_mountroot(mp);
71717683Spst		goto out;
71817683Spst	}
71917683Spst
72017683Spst	error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args);
72117683Spst	if (error)
72217683Spst		goto out;
72317683Spst
72417683Spst	if (args.version != NFS_ARGSVERSION) {
72517683Spst		error = EPROGMISMATCH;
72617683Spst		goto out;
72717683Spst	}
72817683Spst
729146768Ssam	nfscl_init();
730146768Ssam
731146768Ssam	if (mp->mnt_flag & MNT_UPDATE) {
732146768Ssam		struct nfsmount *nmp = VFSTONFS(mp);
73317683Spst
73417683Spst		if (nmp == NULL) {
735146768Ssam			error = EIO;
736146768Ssam			goto out;
737146768Ssam		}
738146768Ssam		/*
739146768Ssam		 * When doing an update, we can't change version,
740146768Ssam		 * security, switch lockd strategies or change cookie
74117683Spst		 * translation
74217683Spst		 */
743146768Ssam		args.flags = (args.flags &
74417683Spst		    ~(NFSMNT_NFSV3 |
74517683Spst		      NFSMNT_NFSV4 |
74617683Spst		      NFSMNT_KERB |
74717683Spst		      NFSMNT_INTEGRITY |
74817683Spst		      NFSMNT_PRIVACY |
74917683Spst		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
75017683Spst		    (nmp->nm_flag &
75117683Spst			(NFSMNT_NFSV3 |
75217683Spst			 NFSMNT_NFSV4 |
75317683Spst			 NFSMNT_KERB |
75417683Spst			 NFSMNT_INTEGRITY |
75517683Spst			 NFSMNT_PRIVACY |
75617683Spst			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
75717683Spst		nfs_decode_args(mp, nmp, &args, td->td_ucred, td);
75817683Spst		goto out;
75917683Spst	}
76017683Spst
76117683Spst	/*
76217683Spst	 * Make the nfs_ip_paranoia sysctl serve as the default connection
76317683Spst	 * or no-connection mode for those protocols that support
76417683Spst	 * no-connection mode (the flag will be cleared later for protocols
76517683Spst	 * that do not support no-connection mode).  This will allow a client
76617683Spst	 * to receive replies from a different IP then the request was
76717683Spst	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
76817683Spst	 * not 0.
76917683Spst	 */
77017683Spst	if (nfs_ip_paranoia == 0)
77117683Spst		args.flags |= NFSMNT_NOCONN;
77217683Spst	if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
77317683Spst		error = EINVAL;
77417683Spst		goto out;
77517683Spst	}
77617683Spst	if (args.fhsize > 0) {
77717683Spst		error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize);
77817683Spst		if (error)
77917683Spst			goto out;
780146768Ssam	}
78117683Spst	error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
782146768Ssam	if (error)
783146768Ssam		goto out;
784146768Ssam	bzero(&hst[len], MNAMELEN - len);
785146768Ssam	if (args.krbnamelen > 0) {
786146768Ssam		if (args.krbnamelen >= 100) {
787146768Ssam			error = EINVAL;
78817683Spst			goto out;
78917683Spst		}
79017683Spst		error = copyin(args.krbname, krbname, args.krbnamelen);
79117683Spst		if (error)
79217683Spst			goto out;
793146768Ssam		krbname[args.krbnamelen] = '\0';
79417683Spst	} else {
795146768Ssam		krbname[0] = '\0';
796146768Ssam		args.krbnamelen = 0;
797146768Ssam	}
798146768Ssam	if (args.dirlen > 0) {
79917683Spst		if (args.dirlen >= 100) {
80017683Spst			error = EINVAL;
801146768Ssam			goto out;
80217683Spst		}
803146768Ssam		error = copyin(args.dirpath, dirpath, args.dirlen);
804146768Ssam		if (error)
805146768Ssam			goto out;
806146768Ssam		dirpath[args.dirlen] = '\0';
80717683Spst	} else {
80817683Spst		dirpath[0] = '\0';
80917683Spst		args.dirlen = 0;
810146768Ssam	}
81117683Spst	if (args.srvkrbnamelen > 0) {
81217683Spst		if (args.srvkrbnamelen >= 100) {
81317683Spst			error = EINVAL;
81417683Spst			goto out;
81517683Spst		}
81617683Spst		error = copyin(args.srvkrbname, srvkrbname, args.srvkrbnamelen);
81717683Spst		if (error)
81817683Spst			goto out;
81917683Spst		srvkrbname[args.srvkrbnamelen] = '\0';
82017683Spst	} else {
82117683Spst		srvkrbname[0] = '\0';
82217683Spst		args.srvkrbnamelen = 0;
82317683Spst	}
82417683Spst	/* sockargs() call must be after above copyin() calls */
82517683Spst	error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen);
82617683Spst	if (error)
82717683Spst		goto out;
828146768Ssam	args.fh = nfh;
829146768Ssam	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
830146768Ssam	    &vp, td->td_ucred, td);
831146768Ssamout:
832146768Ssam	if (!error) {
833146768Ssam		MNT_ILOCK(mp);
834146768Ssam		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
835146768Ssam		MNT_IUNLOCK(mp);
836146768Ssam	}
837146768Ssam	return (error);
83817683Spst}
83917683Spst
84017683Spst
84117683Spst/*
84217683Spst * VFS Operations.
84317683Spst *
84417683Spst * mount system call
84517683Spst * It seems a bit dumb to copyinstr() the host and path here and then
84617683Spst * bcopy() them in mountnfs(), but I wanted to detect errors before
847146768Ssam * doing the sockargs() call because sockargs() allocates an mbuf and
848146768Ssam * an error after that means that I have to release the mbuf.
849146768Ssam */
850146768Ssam/* ARGSUSED */
851146768Ssamstatic int
85217683Spstnfs_cmount(struct mntarg *ma, void *data, int flags)
853146768Ssam{
854146768Ssam	int error;
855146768Ssam	struct nfs_args args;
856146768Ssam
857146768Ssam	error = copyin(data, &args, sizeof (struct nfs_args));
858146768Ssam	if (error)
859146768Ssam		return error;
860127664Sbms
861127664Sbms	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
862127664Sbms
863146768Ssam	error = kernel_mount(ma, flags);
864146768Ssam	return (error);
865146768Ssam}
866146768Ssam
867146768Ssam/*
868127664Sbms * Common code for mount and mountroot
869127664Sbms */
870127664Sbmsstatic int
871127664Sbmsmountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
872127664Sbms    char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
873127664Sbms    struct vnode **vpp, struct ucred *cred, struct thread *td)
874127664Sbms{
875127664Sbms	struct nfsmount *nmp;
876146768Ssam	struct nfsnode *np;
877146768Ssam	int error, trycnt, ret, clearintr;
878146768Ssam	struct nfsvattr nfsva;
879146768Ssam	static u_int64_t clval = 0;
880146768Ssam
881146768Ssam	if (mp->mnt_flag & MNT_UPDATE) {
882146768Ssam		nmp = VFSTONFS(mp);
883127664Sbms		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
884127664Sbms		FREE(nam, M_SONAME);
885146768Ssam		return (0);
886127664Sbms	} else {
887127664Sbms		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
888127664Sbms		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
889127664Sbms		    M_NEWNFSMNT, M_WAITOK);
890146768Ssam		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
891146768Ssam		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
892146768Ssam		TAILQ_INIT(&nmp->nm_bufq);
893146768Ssam		if (clval == 0)
894127664Sbms			clval = (u_int64_t)nfsboottime.tv_sec;
895146768Ssam		nmp->nm_clval = clval++;
89617683Spst		nmp->nm_krbnamelen = argp->krbnamelen;
897127664Sbms		nmp->nm_dirpathlen = argp->dirlen;
89817683Spst		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
89917683Spst		if (nmp->nm_dirpathlen > 0) {
900146768Ssam			/*
901146768Ssam			 * Since we will be doing dirpath as root,
902146768Ssam			 * set nm_uid to the real uid doing the mount,
903146768Ssam			 * since that is normally the user with a valid TGT.
904146768Ssam			 */
905146768Ssam			nmp->nm_uid = td->td_ucred->cr_ruid;
906146768Ssam		} else {
907146768Ssam			/*
908146768Ssam			 * Just set to -1, so the first Op
909146768Ssam			 * will set it later, to the uid of
910146768Ssam			 * the process doing that (usually
911146768Ssam			 * from a first open in the mount
912146768Ssam			 * point).
913146768Ssam			 */
914146768Ssam			nmp->nm_uid = (uid_t)-1;
91517683Spst		}
91617683Spst
917127664Sbms		/* Copy and null terminate all the names */
918127664Sbms		if (nmp->nm_krbnamelen > 0) {
919127664Sbms			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
920127664Sbms			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
921127664Sbms		}
922127664Sbms		if (nmp->nm_dirpathlen > 0) {
923127664Sbms			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
924127664Sbms			    nmp->nm_dirpathlen);
925127664Sbms			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
926127664Sbms			    + 1] = '\0';
927190225Srpaulo		}
928190225Srpaulo		if (nmp->nm_srvkrbnamelen > 0) {
929190225Srpaulo			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
930190225Srpaulo			    nmp->nm_srvkrbnamelen);
931190225Srpaulo			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
932190225Srpaulo			    + nmp->nm_srvkrbnamelen + 2] = '\0';
933190225Srpaulo		}
934190225Srpaulo		nmp->nm_sockreq.nr_cred = crhold(cred);
935190225Srpaulo		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
936190225Srpaulo		mp->mnt_data = nmp;
937190225Srpaulo	}
93817683Spst	vfs_getnewfsid(mp);
93917683Spst	nmp->nm_mountp = mp;
94017683Spst	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
94117683Spst
94217683Spst	/*
94317683Spst	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
94417683Spst	 * high, depending on whether we end up with negative offsets in
94517683Spst	 * the client or server somewhere.  2GB-1 may be safer.
94617683Spst	 *
94717683Spst	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
94817683Spst	 * that we can handle until we find out otherwise.
94917683Spst	 * XXX Our "safe" limit on the client is what we can store in our
95017683Spst	 * buffer cache using signed(!) block numbers.
95117683Spst	 */
95217683Spst	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
95317683Spst		nmp->nm_maxfilesize = 0xffffffffLL;
95417683Spst	else
95517683Spst		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
95617683Spst
95717683Spst	nmp->nm_timeo = NFS_TIMEO;
95817683Spst	nmp->nm_retry = NFS_RETRANS;
95917683Spst	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
96017683Spst		nmp->nm_wsize = NFS_WSIZE;
96117683Spst		nmp->nm_rsize = NFS_RSIZE;
96217683Spst		nmp->nm_readdirsize = NFS_READDIRSIZE;
96317683Spst	}
96417683Spst	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
96517683Spst	nmp->nm_numgrps = NFS_MAXGRPS;
96617683Spst	nmp->nm_readahead = NFS_DEFRAHEAD;
96717683Spst	nmp->nm_tprintf_delay = nfs_tprintf_delay;
96817683Spst	if (nmp->nm_tprintf_delay < 0)
96917683Spst		nmp->nm_tprintf_delay = 0;
97017683Spst	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
97117683Spst	if (nmp->nm_tprintf_initial_delay < 0)
97217683Spst		nmp->nm_tprintf_initial_delay = 0;
97317683Spst	nmp->nm_fhsize = argp->fhsize;
97417683Spst	if (nmp->nm_fhsize > 0)
97517683Spst		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
97617683Spst	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
97717683Spst	nmp->nm_nam = nam;
97817683Spst	/* Set up the sockets and per-host congestion */
97917683Spst	nmp->nm_sotype = argp->sotype;
98017683Spst	nmp->nm_soproto = argp->proto;
98117683Spst	nmp->nm_sockreq.nr_prog = NFS_PROG;
98217683Spst	if ((argp->flags & NFSMNT_NFSV4))
98317683Spst		nmp->nm_sockreq.nr_vers = NFS_VER4;
98417683Spst	else if ((argp->flags & NFSMNT_NFSV3))
98517683Spst		nmp->nm_sockreq.nr_vers = NFS_VER3;
98617683Spst	else
98717683Spst		nmp->nm_sockreq.nr_vers = NFS_VER2;
98817683Spst
98917683Spst	nfs_decode_args(mp, nmp, argp, cred, td);
99017683Spst
99117683Spst	/*
99217683Spst	 * For Connection based sockets (TCP,...) do the connect here,
99317683Spst	 * but make it interruptible, even for non-interuptible mounts.
99417683Spst	 */
99517683Spst	if ((nmp->nm_flag & NFSMNT_INT) == 0) {
99617683Spst		nmp->nm_flag |= NFSMNT_INT;
99717683Spst		clearintr = 1;
99817683Spst	} else {
99917683Spst		clearintr = 0;
100017683Spst	}
100117683Spst	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
100217683Spst		goto bad;
100317683Spst	if (clearintr)
100417683Spst		nmp->nm_flag &= ~NFSMNT_INT;
100517683Spst
100617683Spst	/*
100717683Spst	 * A reference count is needed on the nfsnode representing the
100817683Spst	 * remote root.  If this object is not persistent, then backward
100917683Spst	 * traversals of the mount point (i.e. "..") will not work if
101017683Spst	 * the nfsnode gets flushed out of the cache. Ufs does not have
101117683Spst	 * this problem, because one can identify root inodes by their
101217683Spst	 * number == ROOTINO (2).
101317683Spst	 */
101417683Spst	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
101517683Spst	    nmp->nm_dirpathlen > 0) {
101617683Spst		/*
101717683Spst		 * If the fhsize on the mount point == 0 for V4, the mount
101817683Spst		 * path needs to be looked up.
101917683Spst		 */
102017683Spst		trycnt = 3;
102117683Spst		do {
102217683Spst			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
102317683Spst			    cred, td);
102417683Spst			if (error)
102517683Spst				(void) nfs_catnap(PZERO, "nfsgetdirp");
102617683Spst		} while (error && --trycnt > 0);
102717683Spst		if (error) {
102817683Spst			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
102917683Spst			goto bad;
103017683Spst		}
103117683Spst	}
103217683Spst	if (nmp->nm_fhsize > 0) {
103317683Spst		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
103417683Spst		if (error)
103517683Spst			goto bad;
103617683Spst		*vpp = NFSTOV(np);
103717683Spst
103817683Spst		/*
103917683Spst		 * Get file attributes and transfer parameters for the
104017683Spst		 * mountpoint.  This has the side effect of filling in
104117683Spst		 * (*vpp)->v_type with the correct value.
104217683Spst		 */
104317683Spst		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
104417683Spst		    cred, td, &nfsva, NULL);
104517683Spst		if (ret) {
104617683Spst			/*
104717683Spst			 * Just set default values to get things going.
104817683Spst			 */
104917683Spst			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
105017683Spst			nfsva.na_vattr.va_type = VDIR;
105117683Spst			nfsva.na_vattr.va_mode = 0777;
105217683Spst			nfsva.na_vattr.va_nlink = 100;
105317683Spst			nfsva.na_vattr.va_uid = (uid_t)0;
105498530Sfenner			nfsva.na_vattr.va_gid = (gid_t)0;
105598530Sfenner			nfsva.na_vattr.va_fileid = 2;
105698530Sfenner			nfsva.na_vattr.va_gen = 1;
105798530Sfenner			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
105817683Spst			nfsva.na_vattr.va_size = 512 * 1024;
105917683Spst		}
106017683Spst		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
106117683Spst		if (argp->flags & NFSMNT_NFSV3)
106217683Spst			ncl_fsinfo(nmp, *vpp, cred, td);
106317683Spst
106417683Spst		/*
106517683Spst		 * Lose the lock but keep the ref.
106617683Spst		 */
106717683Spst		VOP_UNLOCK(*vpp, 0);
106817683Spst		return (0);
106917683Spst	}
107017683Spst	error = EIO;
107117683Spst
107217683Spstbad:
107317683Spst	newnfs_disconnect(&nmp->nm_sockreq);
107417683Spst	crfree(nmp->nm_sockreq.nr_cred);
107517683Spst	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
107617683Spst	mtx_destroy(&nmp->nm_mtx);
107717683Spst	FREE(nmp, M_NEWNFSMNT);
107817683Spst	FREE(nam, M_SONAME);
107917683Spst	return (error);
108017683Spst}
108117683Spst
108217683Spst/*
108317683Spst * unmount system call
108417683Spst */
108517683Spststatic int
108617683Spstnfs_unmount(struct mount *mp, int mntflags)
108717683Spst{
108817683Spst	struct thread *td;
108917683Spst	struct nfsmount *nmp;
109017683Spst	int error, flags = 0, trycnt = 0;
109117683Spst
109217683Spst	td = curthread;
109317683Spst
109417683Spst	if (mntflags & MNT_FORCE)
109517683Spst		flags |= FORCECLOSE;
109617683Spst	nmp = VFSTONFS(mp);
109717683Spst	/*
109817683Spst	 * Goes something like this..
109917683Spst	 * - Call vflush() to clear out vnodes for this filesystem
110017683Spst	 * - Close the socket
110117683Spst	 * - Free up the data structures
110217683Spst	 */
110317683Spst	/* In the forced case, cancel any outstanding requests. */
110417683Spst	if (mntflags & MNT_FORCE) {
110517683Spst		error = newnfs_nmcancelreqs(nmp);
1106127664Sbms		if (error)
110717683Spst			goto out;
110817683Spst		/* For a forced close, get rid of the renew thread now */
110917683Spst		nfscl_umount(nmp, td);
1110127664Sbms	}
111117683Spst	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
111217683Spst	do {
111317683Spst		error = vflush(mp, 1, flags, td);
111417683Spst		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
111517683Spst			(void) nfs_catnap(PSOCK, "newndm");
1116127664Sbms	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
111717683Spst	if (error)
111817683Spst		goto out;
111917683Spst
112017683Spst	/*
112117683Spst	 * We are now committed to the unmount.
112217683Spst	 */
112317683Spst	if ((mntflags & MNT_FORCE) == 0)
112417683Spst		nfscl_umount(nmp, td);
112517683Spst	newnfs_disconnect(&nmp->nm_sockreq);
112617683Spst	crfree(nmp->nm_sockreq.nr_cred);
112717683Spst	FREE(nmp->nm_nam, M_SONAME);
112817683Spst
112917683Spst	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
113017683Spst	mtx_destroy(&nmp->nm_mtx);
113117683Spst	FREE(nmp, M_NEWNFSMNT);
113217683Spstout:
113317683Spst	return (error);
113417683Spst}
113517683Spst
113617683Spst/*
113717683Spst * Return root of a filesystem
113817683Spst */
113917683Spststatic int
114017683Spstnfs_root(struct mount *mp, int flags, struct vnode **vpp)
114117683Spst{
114217683Spst	struct vnode *vp;
114317683Spst	struct nfsmount *nmp;
114417683Spst	struct nfsnode *np;
114517683Spst	int error;
114617683Spst
114717683Spst	nmp = VFSTONFS(mp);
114817683Spst	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
114917683Spst	if (error)
115017683Spst		return error;
115117683Spst	vp = NFSTOV(np);
115217683Spst	/*
115317683Spst	 * Get transfer parameters and attributes for root vnode once.
115417683Spst	 */
115517683Spst	mtx_lock(&nmp->nm_mtx);
115617683Spst	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
115717683Spst		mtx_unlock(&nmp->nm_mtx);
115817683Spst		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
115917683Spst	} else
116017683Spst		mtx_unlock(&nmp->nm_mtx);
116117683Spst	if (vp->v_type == VNON)
116217683Spst	    vp->v_type = VDIR;
116317683Spst	vp->v_vflag |= VV_ROOT;
116417683Spst	*vpp = vp;
116517683Spst	return (0);
116617683Spst}
116717683Spst
116817683Spst/*
116917683Spst * Flush out the buffer cache
117017683Spst */
117117683Spst/* ARGSUSED */
117217683Spststatic int
117317683Spstnfs_sync(struct mount *mp, int waitfor)
117417683Spst{
117517683Spst	struct vnode *vp, *mvp;
117617683Spst	struct thread *td;
117717683Spst	int error, allerror = 0;
117817683Spst
117917683Spst	td = curthread;
118017683Spst
118117683Spst	/*
118217683Spst	 * Force stale buffer cache information to be flushed.
118317683Spst	 */
118417683Spst	MNT_ILOCK(mp);
118517683Spstloop:
118617683Spst	MNT_VNODE_FOREACH(vp, mp, mvp) {
118717683Spst		VI_LOCK(vp);
118817683Spst		MNT_IUNLOCK(mp);
118917683Spst		/* XXX Racy bv_cnt check. */
119017683Spst		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
119117683Spst		    waitfor == MNT_LAZY) {
119217683Spst			VI_UNLOCK(vp);
119317683Spst			MNT_ILOCK(mp);
119417683Spst			continue;
119517683Spst		}
119617683Spst		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
119717683Spst			MNT_ILOCK(mp);
119817683Spst			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
119917683Spst			goto loop;
120017683Spst		}
120117683Spst		error = VOP_FSYNC(vp, waitfor, td);
120217683Spst		if (error)
120317683Spst			allerror = error;
120417683Spst		VOP_UNLOCK(vp, 0);
120517683Spst		vrele(vp);
120617683Spst
120717683Spst		MNT_ILOCK(mp);
120817683Spst	}
120917683Spst	MNT_IUNLOCK(mp);
121017683Spst	return (allerror);
121117683Spst}
121217683Spst
121317683Spststatic int
121417683Spstnfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
121517683Spst{
121617683Spst	struct nfsmount *nmp = VFSTONFS(mp);
121717683Spst	struct vfsquery vq;
121817683Spst	int error;
121917683Spst
122017683Spst	bzero(&vq, sizeof(vq));
122117683Spst	switch (op) {
122217683Spst#if 0
1223146768Ssam	case VFS_CTL_NOLOCKS:
122417683Spst		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
122556889Sfenner 		if (req->oldptr != NULL) {
122656889Sfenner 			error = SYSCTL_OUT(req, &val, sizeof(val));
122756889Sfenner 			if (error)
122856889Sfenner 				return (error);
122956889Sfenner 		}
123056889Sfenner 		if (req->newptr != NULL) {
123156889Sfenner 			error = SYSCTL_IN(req, &val, sizeof(val));
123256889Sfenner 			if (error)
123317683Spst 				return (error);
123417683Spst			if (val)
123517683Spst				nmp->nm_flag |= NFSMNT_NOLOCKS;
123617683Spst			else
1237146768Ssam				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1238146768Ssam 		}
1239146768Ssam		break;
1240146768Ssam#endif
1241146768Ssam	case VFS_CTL_QUERY:
124217683Spst		mtx_lock(&nmp->nm_mtx);
1243146768Ssam		if (nmp->nm_state & NFSSTA_TIMEO)
1244146768Ssam			vq.vq_flags |= VQ_NOTRESP;
1245146768Ssam		mtx_unlock(&nmp->nm_mtx);
1246146768Ssam#if 0
1247146768Ssam		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1248146768Ssam		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1249146768Ssam			vq.vq_flags |= VQ_NOTRESPLOCK;
125017683Spst#endif
1251146768Ssam		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1252146768Ssam		break;
1253146768Ssam 	case VFS_CTL_TIMEO:
1254146768Ssam 		if (req->oldptr != NULL) {
1255146768Ssam 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1256146768Ssam 			    sizeof(nmp->nm_tprintf_initial_delay));
1257146768Ssam 			if (error)
1258146768Ssam 				return (error);
125917683Spst 		}
126017683Spst 		if (req->newptr != NULL) {
126117683Spst			error = vfs_suser(mp, req->td);
126217683Spst			if (error)
126317683Spst				return (error);
126417683Spst 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
126517683Spst 			    sizeof(nmp->nm_tprintf_initial_delay));
1266146768Ssam 			if (error)
126717683Spst 				return (error);
126817683Spst 			if (nmp->nm_tprintf_initial_delay < 0)
126917683Spst 				nmp->nm_tprintf_initial_delay = 0;
127017683Spst 		}
127117683Spst		break;
1272146768Ssam	default:
1273146768Ssam		return (ENOTSUP);
127417683Spst	}
1275146768Ssam	return (0);
1276146768Ssam}
1277146768Ssam
1278146768Ssam