nfs_clvfsops.c revision 191783
148893Sjkh/*-
23027Srgrimes * Copyright (c) 1989, 1993, 1995
326103Sjkh *	The Regents of the University of California.  All rights reserved.
46718Sphk *
526103Sjkh * This code is derived from software contributed to Berkeley by
640277Sjdp * Rick Macklem at The University of Guelph.
726103Sjkh *
826103Sjkh * Redistribution and use in source and binary forms, with or without
926103Sjkh * modification, are permitted provided that the following conditions
106718Sphk * are met:
1126103Sjkh * 1. Redistributions of source code must retain the above copyright
1219746Sandreas *    notice, this list of conditions and the following disclaimer.
1319746Sandreas * 2. Redistributions in binary form must reproduce the above copyright
1426103Sjkh *    notice, this list of conditions and the following disclaimer in the
1526331Sjoerg *    documentation and/or other materials provided with the distribution.
1626331Sjoerg * 4. Neither the name of the University nor the names of its contributors
1743699Sjkh *    may be used to endorse or promote products derived from this software
1826331Sjoerg *    without specific prior written permission.
1926331Sjoerg *
2037531Sdes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2143699Sjkh * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2229104Sjkh * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2326331Sjoerg * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2415786Sjkh * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2547438Sbrian * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2647438Sbrian * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
278034Sphk * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2847983Smarkm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2947983Smarkm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3047983Smarkm * SUCH DAMAGE.
3147983Smarkm *
3247983Smarkm *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
3347983Smarkm */
3447983Smarkm
3547983Smarkm#include <sys/cdefs.h>
3647983Smarkm__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 191783 2009-05-04 15:23:58Z rmacklem $");
3747983Smarkm
3847983Smarkm
3935213Smsmith#include "opt_bootp.h"
4035213Smsmith#include "opt_nfsroot.h"
4135213Smsmith
4235213Smsmith#include <sys/param.h>
4335213Smsmith#include <sys/systm.h>
4435213Smsmith#include <sys/kernel.h>
4535213Smsmith#include <sys/bio.h>
4638281Sjkh#include <sys/buf.h>
4738795Sjkh#include <sys/clock.h>
4838281Sjkh#include <sys/lock.h>
4932718Sjkh#include <sys/malloc.h>
5032718Sjkh#include <sys/mbuf.h>
5142041Sjkh#include <sys/module.h>
5239273Sjhay#include <sys/mount.h>
5336687Sjkoshy#include <sys/proc.h>
5436687Sjkoshy#include <sys/socket.h>
5541919Sjkh#include <sys/socketvar.h>
5641919Sjkh#include <sys/sockio.h>
5741979Sjkh#include <sys/sysctl.h>
5841979Sjkh#include <sys/vnode.h>
5927626Sjkh#include <sys/signalvar.h>
6040839Sjkh
6147246Sphk#include <vm/vm.h>
6240839Sjkh#include <vm/vm_extern.h>
636718Sphk#include <vm/uma.h>
6426761Sjkh
6526761Sjkh#include <net/if.h>
6646812Sobrien#include <net/route.h>
6727170Sjkh#include <netinet/in.h>
6826788Sjkh
6926761Sjkh#include <fs/nfs/nfsport.h>
7026788Sjkh#include <fs/nfsclient/nfsnode.h>
7116156Sjkh#include <fs/nfsclient/nfsmount.h>
727107Sphk#include <fs/nfsclient/nfs.h>
7312661Speter#include <fs/nfsclient/nfsdiskless.h>
7412661Speter
7512661Speterextern int nfscl_ticks;
7612661Speterextern struct timeval nfsboottime;
7712661Speterextern struct nfsstats	newnfsstats;
7843834Sjkh
794324SphkMALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
807940SphkMALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
817940Sphk
827954SphkSYSCTL_DECL(_vfs_newnfs);
8340022SjkhSYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
8440022Sjkh	&newnfsstats, nfsstats, "S,nfsstats");
8540022Sjkhstatic int nfs_ip_paranoia = 1;
8640022SjkhSYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
8743658Sjkh    &nfs_ip_paranoia, 0, "");
8843464Sjkhstatic int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
8940345SjkhSYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
9037749Sjkh        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
9124864Sjoerg/* how long between console messages "nfs server foo not responding" */
9240022Sjkhstatic int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
9340022SjkhSYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
9443464Sjkh        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
9543658Sjkh
9642502Sjkhstatic void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
9742502Sjkh		    struct nfs_args *argp, struct ucred *, struct thread *);
9842502Sjkhstatic int	mountnfs(struct nfs_args *, struct mount *,
9940022Sjkh		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
10040022Sjkh		    struct vnode **, struct ucred *, struct thread *);
10140022Sjkhstatic vfs_mount_t nfs_mount;
1023884Sphkstatic vfs_cmount_t nfs_cmount;
10340022Sjkhstatic vfs_unmount_t nfs_unmount;
10440022Sjkhstatic vfs_root_t nfs_root;
10540022Sjkhstatic vfs_statfs_t nfs_statfs;
10634782Sjhaystatic vfs_sync_t nfs_sync;
10734782Sjhaystatic vfs_sysctl_t nfs_sysctl;
10826761Sjkh
1096718Sphk/*
11023257Sjoerg * nfs vfs operations.
11123257Sjoerg */
11223257Sjoergstatic struct vfsops nfs_vfsops = {
11324842Sjoerg	.vfs_init =		ncl_init,
11423750Sjkh	.vfs_mount =		nfs_mount,
1153884Sphk	.vfs_cmount =		nfs_cmount,
11632843Sjkh	.vfs_root =		nfs_root,
11732843Sjkh	.vfs_statfs =		nfs_statfs,
11832843Sjkh	.vfs_sync =		nfs_sync,
11924864Sjoerg	.vfs_uninit =		ncl_uninit,
12025415Sdanny	.vfs_unmount =		nfs_unmount,
12125415Sdanny	.vfs_sysctl =		nfs_sysctl,
12224864Sjoerg};
12324864SjoergVFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
12424864Sjoerg
12524864Sjoerg/* So that loader and kldload(2) can find us, wherever we are.. */
12624864SjoergMODULE_VERSION(newnfs, 1);
12724864Sjoerg
12824864Sjoerg/*
12924864Sjoerg * This structure must be filled in by a primary bootstrap or bootstrap
13024864Sjoerg * server for a diskless/dataless machine. It is initialized below just
13124864Sjoerg * to ensure that it is allocated to initialized data (.data not .bss).
13224864Sjoerg */
13324864Sjoergstruct nfs_diskless newnfs_diskless = { { { 0 } } };
13426761Sjkhstruct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
13526761Sjkhint newnfs_diskless_valid = 0;
1368663Sphk
1378663SphkSYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
13848575Shosokawa	&newnfs_diskless_valid, 0, "");
13948575Shosokawa
14048575ShosokawaSYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
14148575Shosokawa	newnfsv3_diskless.root_hostnam, 0, "");
14227348Sjkh
14348575ShosokawaSYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
14426813Sjkh	&newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
14526813Sjkh	"%Ssockaddr_in", "");
14626813Sjkh
14726813Sjkh
14826813Sjkhvoid		newnfsargs_ntoh(struct nfs_args *);
14913352Sphkstatic int	nfs_mountdiskless(char *,
15027070Sjkh		    struct sockaddr_in *, struct nfs_args *,
15127070Sjkh		    struct thread *, struct vnode **, struct mount *);
1526723Sphkstatic void	nfs_convert_diskless(void);
15313352Sphkstatic void	nfs_convert_oargs(struct nfs_args *args,
1548497Srgrimes		    struct onfs_args *oargs);
15536742Sjdp
15636742Sjdpint
15736742Sjdpnewnfs_iosize(struct nfsmount *nmp)
15836777Sjdp{
15913124Sjoerg	int iosize, maxio;
1608497Srgrimes
16113124Sjoerg	/* First, set the upper limit for iosize */
16213352Sphk	if (nmp->nm_flag & NFSMNT_NFSV4) {
16313352Sphk		maxio = NFS_MAXBSIZE;
16445182Ssada	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
16545182Ssada		if (nmp->nm_sotype == SOCK_DGRAM)
16645182Ssada			maxio = NFS_MAXDGRAMDATA;
16738929Sjkh		else
16824864Sjoerg			maxio = NFS_MAXBSIZE;
16924864Sjoerg	} else {
17024864Sjoerg		maxio = NFS_V2MAXDATA;
17124864Sjoerg	}
1726718Sphk	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
17318091Spst		nmp->nm_rsize = maxio;
17439132Sphk	if (nmp->nm_rsize > MAXBSIZE)
1756718Sphk		nmp->nm_rsize = MAXBSIZE;
17618091Spst	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
17739132Sphk		nmp->nm_readdirsize = maxio;
1786718Sphk	if (nmp->nm_readdirsize > nmp->nm_rsize)
17918091Spst		nmp->nm_readdirsize = nmp->nm_rsize;
18018091Spst	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
18117392Spst		nmp->nm_wsize = maxio;
18238355Sjkh	if (nmp->nm_wsize > MAXBSIZE)
18338355Sjkh		nmp->nm_wsize = MAXBSIZE;
18438355Sjkh
18524842Sjoerg	/*
18639132Sphk	 * Calculate the size used for io buffers.  Use the larger
18718091Spst	 * of the two sizes to minimise nfs requests but make sure
18826103Sjkh	 * that it is at least one VM page to avoid wasting buffer
18939132Sphk	 * space.
19045182Ssada	 */
19145182Ssada	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
19245182Ssada	iosize = imax(iosize, PAGE_SIZE);
19323633Sjkh	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
19426103Sjkh	return (iosize);
19513352Sphk}
19613352Sphk
19713352Sphkstatic void
19839132Sphknfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
19913352Sphk{
20039132Sphk
20113352Sphk	args->version = NFS_ARGSVERSION;
20224842Sjoerg	args->addr = oargs->addr;
20339132Sphk	args->addrlen = oargs->addrlen;
20413352Sphk	args->sotype = oargs->sotype;
20526103Sjkh	args->proto = oargs->proto;
20639132Sphk	args->fh = oargs->fh;
20713352Sphk	args->fhsize = oargs->fhsize;
20823633Sjkh	args->flags = oargs->flags;
20926103Sjkh	args->wsize = oargs->wsize;
21026761Sjkh	args->rsize = oargs->rsize;
2118034Sphk	args->readdirsize = oargs->readdirsize;
21213352Sphk	args->timeo = oargs->timeo;
21341310Sjkh	args->retrans = oargs->retrans;
21438844Sjkh	args->readahead = oargs->readahead;
21518499Spst	args->hostname = oargs->hostname;
2168367Sjkh}
2178367Sjkh
21839956Sjkhstatic void
21939854Sjkhnfs_convert_diskless(void)
22026761Sjkh{
22123912Sjoerg
22234782Sjhay	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
22338841Sjkh		sizeof(struct ifaliasreq));
22423912Sjoerg	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
22523909Sjoerg		sizeof(struct sockaddr_in));
22623912Sjoerg	nfs_convert_oargs(&newnfsv3_diskless.root_args,&newnfs_diskless.root_args);
22724842Sjoerg	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
22824842Sjoerg		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
22923909Sjoerg		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh, NFSX_MYFH);
23026813Sjkh	} else {
23126813Sjkh		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
23226813Sjkh		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh, NFSX_V2FH);
23336687Sjkoshy	}
23436687Sjkoshy	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
23545061Snik		sizeof(struct sockaddr_in));
23645061Snik	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam, MNAMELEN);
23736687Sjkoshy	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
23826909Sjkh	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
23926909Sjkh		MAXHOSTNAMELEN);
24026909Sjkh	newnfs_diskless_valid = 3;
24127030Spst}
24227030Spst
24327030Spst/*
24427030Spst * nfs statfs call
24527030Spst */
24627030Spststatic int
24723434Sjoergnfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
24823434Sjoerg{
24932843Sjkh	struct vnode *vp;
25042298Sjkh	struct nfsmount *nmp = VFSTONFS(mp);
25142298Sjkh	struct nfsvattr nfsva;
25213352Sphk	struct nfsfsinfo fs;
25342298Sjkh	struct nfsstatfs sb;
25442298Sjkh	int error = 0, attrflag, gotfsinfo = 0, ret;
25513352Sphk	struct nfsnode *np;
25613352Sphk
25742298Sjkh	error = vfs_busy(mp, MBF_NOWAIT);
25813352Sphk	if (error)
25942298Sjkh		return (error);
26042298Sjkh	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
26121008Sjkh	if (error) {
26221008Sjkh		vfs_unbusy(mp);
2638371Sjkh		return (error);
26439956Sjkh	}
26513352Sphk	vp = NFSTOV(np);
2668367Sjkh	mtx_lock(&nmp->nm_mtx);
2678367Sjkh	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
2688367Sjkh		mtx_unlock(&nmp->nm_mtx);
2696718Sphk		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
27047983Smarkm		    &attrflag, NULL);
2714734Sphk		if (!error)
27223257Sjoerg			gotfsinfo = 1;
2736718Sphk	} else
27423257Sjoerg		mtx_unlock(&nmp->nm_mtx);
27542588Sjoerg	if (!error)
27623257Sjoerg		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
2776718Sphk		    &attrflag, NULL);
2786718Sphk	if (attrflag == 0) {
27948552Shosokawa		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
28048552Shosokawa		    td->td_ucred, td, &nfsva, NULL);
28148552Shosokawa		if (ret) {
2827063Sphk			/*
2836718Sphk			 * Just set default values to get things going.
28413370Sphk			 */
28526761Sjkh			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
28613352Sphk			nfsva.na_vattr.va_type = VDIR;
28713352Sphk			nfsva.na_vattr.va_mode = 0777;
2886718Sphk			nfsva.na_vattr.va_nlink = 100;
28913352Sphk			nfsva.na_vattr.va_uid = (uid_t)0;
2906718Sphk			nfsva.na_vattr.va_gid = (gid_t)0;
29113352Sphk			nfsva.na_vattr.va_fileid = 2;
2927252Sphk			nfsva.na_vattr.va_gen = 1;
29313352Sphk			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
2946718Sphk			nfsva.na_vattr.va_size = 512 * 1024;
2957063Sphk		}
2966718Sphk	}
2976718Sphk	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
2983027Srgrimes	if (!error) {
2996718Sphk	    mtx_lock(&nmp->nm_mtx);
3006718Sphk	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
30126799Sjkh		nfscl_loadfsinfo(nmp, &fs);
30224842Sjoerg	    nfscl_loadsbinfo(nmp, &sb, sbp);
30347246Sphk	    sbp->f_flags = nmp->nm_flag;
30430034Sjkh	    sbp->f_iosize = newnfs_iosize(nmp);
30524842Sjoerg	    mtx_unlock(&nmp->nm_mtx);
30624842Sjoerg	    if (sbp != &mp->mnt_stat) {
30724842Sjoerg		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
30824842Sjoerg		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
30924842Sjoerg	    }
31042588Sjoerg	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
31124842Sjoerg	} else if (NFS_ISV4(vp)) {
31224842Sjoerg		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
31324842Sjoerg	}
31424842Sjoerg	vput(vp);
31516156Sjkh	vfs_unbusy(mp);
31616156Sjkh	return (error);
31716156Sjkh}
31816156Sjkh
31916156Sjkh/*
32016156Sjkh * nfs version 3 fsinfo rpc call
32116156Sjkh */
3226718Sphkint
3236123Sachencl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
3247732Sphk    struct thread *td)
3256718Sphk{
32619506Sjoerg	struct nfsfsinfo fs;
32719506Sjoerg	struct nfsvattr nfsva;
32819506Sjoerg	int error, attrflag;
3296718Sphk
33029223Sjkh	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
33147983Smarkm	if (!error) {
33247983Smarkm		if (attrflag)
33347983Smarkm			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
33447983Smarkm			    1);
33547983Smarkm		mtx_lock(&nmp->nm_mtx);
33648625Shosokawa		nfscl_loadfsinfo(nmp, &fs);
3377954Sphk		mtx_unlock(&nmp->nm_mtx);
3387954Sphk	}
33919506Sjoerg	return (error);
34026761Sjkh}
3417954Sphk
3427954Sphk/*
3437954Sphk * Mount a remote root fs via. nfs. This depends on the info in the
34447983Smarkm * newnfs_diskless structure that has been filled in properly by some primary
34547983Smarkm * bootstrap.
34647983Smarkm * It goes something like this:
34747983Smarkm * - do enough of "ifconfig" by calling ifioctl() so that the system
34837150Sbrian *   can talk to the server
34932589Sbrian * - If newnfs_diskless.mygateway is filled in, use that address as
3507954Sphk *   a default gateway.
35119362Sjoerg * - build the rootfs mount point and call mountnfs() to do the rest.
3526123Sache *
3536718Sphk * It is assumed to be safe to read, modify, and write the nfsv3_diskless
3543884Sphk * structure, as well as other global NFS client variables here, as
35513391Sphk * ncl_mountroot() will be called once in the boot before any other NFS
35613391Sphk * client activity occurs.
35713391Sphk */
3586718Sphkint
35918852Sjkhncl_mountroot(struct mount *mp, struct thread *td)
36046179Sjdp{
36114085Sjoerg	struct nfsv3_diskless *nd = &newnfsv3_diskless;
36214085Sjoerg	struct socket *so;
36314085Sjoerg	struct vnode *vp;
36414085Sjoerg	struct ifreq ir;
36526761Sjkh	int error, i;
36626761Sjkh	u_long l;
36726761Sjkh	char buf[128];
36826761Sjkh	char *cp;
36938635Sjb
37026765Sjkh#if defined(BOOTP_NFSROOT) && defined(BOOTP)
37126761Sjkh	bootpc_init();		/* use bootp to get newnfs_diskless filled in */
37226761Sjkh#elif defined(NFS_ROOT)
37326761Sjkh	nfs_setup_diskless();
37413621Sjoerg#endif
37513621Sjoerg
37613621Sjoerg	nfscl_init();
37739175Sjkh
37813621Sjoerg	if (newnfs_diskless_valid == 0)
37913621Sjoerg		return (-1);
38013621Sjoerg	if (newnfs_diskless_valid == 1)
38113621Sjoerg		nfs_convert_diskless();
38214079Sjoerg
38313391Sphk	/*
3847664Sphk	 * XXX splnet, so networks will receive...
38526788Sjkh	 */
3866718Sphk	splnet();
3876718Sphk
38813391Sphk	/*
38926761Sjkh	 * Do enough of ifconfig(8) so that the critical net interface can
39013391Sphk	 * talk to the server.
3916718Sphk	 */
3928714Sjkh	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
3938714Sjkh	    td->td_ucred, td);
3948714Sjkh	if (error)
3958714Sjkh		panic("ncl_mountroot: socreate(%04x): %d",
3968714Sjkh			nd->myif.ifra_addr.sa_family, error);
39713391Sphk
39813391Sphk#if 0 /* XXX Bad idea */
39913391Sphk	/*
4008714Sjkh	 * We might not have been told the right interface, so we pass
4018714Sjkh	 * over the first ten interfaces of the same kind, until we get
4028881Srgrimes	 * one of them configured.
40326761Sjkh	 */
40413391Sphk
40513391Sphk	for (i = strlen(nd->myif.ifra_name) - 1;
4068720Sjkh		nd->myif.ifra_name[i] >= '0' &&
4078714Sjkh		nd->myif.ifra_name[i] <= '9';
4088714Sjkh		nd->myif.ifra_name[i] ++) {
40913391Sphk		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
41013391Sphk		if(!error)
41113391Sphk			break;
4128714Sjkh	}
41326909Sjkh#endif
41413352Sphk	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
4158714Sjkh	if (error)
41630101Sjkh		panic("ncl_mountroot: SIOCAIFADDR: %d", error);
41713391Sphk	if ((cp = getenv("boot.netif.mtu")) != NULL) {
41813391Sphk		ir.ifr_mtu = strtol(cp, NULL, 10);
4198714Sjkh		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
42013391Sphk		freeenv(cp);
4218714Sjkh		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
4228714Sjkh		if (error)
42312661Speter			printf("ncl_mountroot: SIOCSIFMTU: %d", error);
42413352Sphk	}
42512661Speter	soclose(so);
42613391Sphk
42713391Sphk	/*
42813391Sphk	 * If the gateway field is filled in, set it as the default route.
42913391Sphk	 * Note that pxeboot will set a default route of 0 if the route
43012661Speter	 * is not set by the DHCP server.  Check also for a value of 0
43112661Speter	 * to avoid panicking inappropriately in that situation.
43247252Sphk	 */
4338714Sjkh	if (nd->mygateway.sin_len != 0 &&
43413391Sphk	    nd->mygateway.sin_addr.s_addr != 0) {
43530076Smarkm		struct sockaddr_in mask, sin;
43630109Sjkh
43747252Sphk		bzero((caddr_t)&mask, sizeof(mask));
4388714Sjkh		sin = mask;
43926909Sjkh		sin.sin_family = AF_INET;
4408720Sjkh		sin.sin_len = sizeof(sin);
4418714Sjkh		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
4428881Srgrimes		    (struct sockaddr *)&nd->mygateway,
4438778Sjkh		    (struct sockaddr *)&mask,
4448778Sjkh		    RTF_UP | RTF_GATEWAY, NULL);
4458778Sjkh		if (error)
4468778Sjkh			panic("ncl_mountroot: RTM_ADD: %d", error);
4478778Sjkh	}
4488778Sjkh
4498778Sjkh	/*
4508778Sjkh	 * Create the rootfs mount point.
4518778Sjkh	 */
4528778Sjkh	nd->root_args.fh = nd->root_fh;
4538778Sjkh	nd->root_args.fhsize = nd->root_fhsize;
4548778Sjkh	l = ntohl(nd->root_saddr.sin_addr.s_addr);
4558778Sjkh	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
4568778Sjkh		(l >> 24) & 0xff, (l >> 16) & 0xff,
45744623Sjkh		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
4588778Sjkh	printf("NFS ROOT: %s\n", buf);
4598778Sjkh	if ((error = nfs_mountdiskless(buf,
4608778Sjkh	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
46122268Sjkh		return (error);
4628806Sphk	}
46313418Sphk
4648806Sphk	/*
46547983Smarkm	 * This is not really an nfs issue, but it is much easier to
4668778Sjkh	 * set hostname here and then let the "/etc/rc.xxx" files
46720485Sjkh	 * mount the right /var based upon its preset value.
46848893Sjkh	 */
46948873Sjkh	bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
47047983Smarkm	hostname[MAXHOSTNAMELEN - 1] = '\0';
47147983Smarkm	for (i = 0; i < MAXHOSTNAMELEN; i++)
47247983Smarkm		if (hostname[i] == '\0')
4738806Sphk			break;
4749202Srgrimes	inittodr(ntohl(nd->root_time));
4759202Srgrimes	return (0);
4769202Srgrimes}
4779202Srgrimes
47819333Sjkh/*
47943238Sjkh * Internal version of mount system call for diskless setup.
48047246Sphk */
48141020Sjkhstatic int
48240968Sjkhnfs_mountdiskless(char *path,
48342389Sjkh    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
48442389Sjkh    struct vnode **vpp, struct mount *mp)
48516977Sjkh{
48642300Sjkh	struct sockaddr *nam;
48742300Sjkh	int error;
48816977Sjkh
48942299Sjkh	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
49043392Sjkh	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
49143392Sjkh	    td->td_ucred, td)) != 0) {
49243238Sjkh		printf("ncl_mountroot: mount %s on /: %d\n", path, error);
49342300Sjkh		return (error);
49442300Sjkh	}
49542300Sjkh	return (0);
49643464Sjkh}
49742300Sjkh
49840047Sjkhstatic void
4998720Sjkhnfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
5003993Sphk    struct ucred *cred, struct thread *td)
50113391Sphk{
50215972Sjkh	int s;
50313391Sphk	int adjsock;
5048714Sjkh
50542300Sjkh	s = splnet();
50642300Sjkh
50742300Sjkh	/*
50842300Sjkh	 * Set read-only flag if requested; otherwise, clear it if this is
50923257Sjoerg	 * an update.  If this is not an update, then either the read-only
51042261Sjkh	 * flag is already clear, or this is a root mount and it was set
5118806Sphk	 * intentionally at some previous point.
51219571Sjoerg	 */
51342300Sjkh	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
51419818Sjoerg		MNT_ILOCK(mp);
51519818Sjoerg		mp->mnt_flag |= MNT_RDONLY;
51613391Sphk		MNT_IUNLOCK(mp);
51742300Sjkh	} else if (mp->mnt_flag & MNT_UPDATE) {
51819571Sjoerg		MNT_ILOCK(mp);
51942300Sjkh		mp->mnt_flag &= ~MNT_RDONLY;
52023257Sjoerg		MNT_IUNLOCK(mp);
52142300Sjkh	}
52242300Sjkh
52342300Sjkh	/*
52442300Sjkh	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
52542300Sjkh	 * no sense in that context.  Also, set up appropriate retransmit
52642300Sjkh	 * and soft timeout behavior.
52723633Sjkh	 */
52842300Sjkh	if (argp->sotype == SOCK_STREAM) {
52948552Shosokawa		nmp->nm_flag &= ~NFSMNT_NOCONN;
53048552Shosokawa		nmp->nm_timeo = NFS_MAXTIMEO;
53148633Shosokawa	}
53248623Shosokawa
53348552Shosokawa	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
53415972Sjkh	if ((argp->flags & NFSMNT_NFSV3) == 0)
5357052Sphk		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
53613391Sphk
53713391Sphk	/* Also re-bind if we're switching to/from a connected UDP socket */
53813391Sphk	adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) !=
5397063Sphk		    (argp->flags & NFSMNT_NOCONN));
54042300Sjkh
54142300Sjkh	/* Update flags atomically.  Don't change the lock bits. */
54242300Sjkh	nmp->nm_flag = argp->flags | nmp->nm_flag;
54342300Sjkh	splx(s);
54447092Sjkh
54542300Sjkh	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
54627291Sjkh		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
54747591Sjkh		if (nmp->nm_timeo < NFS_MINTIMEO)
54827291Sjkh			nmp->nm_timeo = NFS_MINTIMEO;
5497052Sphk		else if (nmp->nm_timeo > NFS_MAXTIMEO)
55013391Sphk			nmp->nm_timeo = NFS_MAXTIMEO;
55113391Sphk	}
55213391Sphk
5537100Sphk	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
55442300Sjkh		nmp->nm_retry = argp->retrans;
55542300Sjkh		if (nmp->nm_retry > NFS_MAXREXMIT)
55642300Sjkh			nmp->nm_retry = NFS_MAXREXMIT;
55742300Sjkh	}
55842312Sjkh
55942300Sjkh	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
5607100Sphk		nmp->nm_wsize = argp->wsize;
5617100Sphk		/* Round down to multiple of blocksize */
56243994Sdes		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
5637732Sphk		if (nmp->nm_wsize <= 0)
56423750Sjkh			nmp->nm_wsize = NFS_FABLKSIZE;
5657100Sphk	}
5667100Sphk
56742300Sjkh	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
56842300Sjkh		nmp->nm_rsize = argp->rsize;
56942300Sjkh		/* Round down to multiple of blocksize */
57042300Sjkh		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
57142300Sjkh		if (nmp->nm_rsize <= 0)
57224842Sjoerg			nmp->nm_rsize = NFS_FABLKSIZE;
57342300Sjkh	}
57442300Sjkh
57547591Sjkh	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
57641618Sjkh		nmp->nm_readdirsize = argp->readdirsize;
57741618Sjkh	}
57823756Sjkh
57924842Sjoerg	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
58026813Sjkh		nmp->nm_acregmin = argp->acregmin;
58142300Sjkh	else
58242300Sjkh		nmp->nm_acregmin = NFS_MINATTRTIMO;
58342921Sjkh	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
58432731Sjkh		nmp->nm_acregmax = argp->acregmax;
58542300Sjkh	else
58640022Sjkh		nmp->nm_acregmax = NFS_MAXATTRTIMO;
58726813Sjkh	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
5886723Sphk		nmp->nm_acdirmin = argp->acdirmin;
5896723Sphk	else
5906718Sphk		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
5916718Sphk	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
59213352Sphk		nmp->nm_acdirmax = argp->acdirmax;
5938881Srgrimes	else
5946718Sphk		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
59513352Sphk	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
5968881Srgrimes		nmp->nm_acdirmin = nmp->nm_acdirmax;
5976718Sphk	if (nmp->nm_acregmin > nmp->nm_acregmax)
59813352Sphk		nmp->nm_acregmin = nmp->nm_acregmax;
5998881Srgrimes
60042300Sjkh	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
60142300Sjkh		if (argp->readahead <= NFS_MAXRAHEAD)
60242300Sjkh			nmp->nm_readahead = argp->readahead;
6037651Sphk		else
6047651Sphk			nmp->nm_readahead = NFS_MAXRAHEAD;
60523803Sjoerg	}
6068714Sjkh	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
60741548Sjkh		if (argp->wcommitsize < nmp->nm_wsize)
60819542Sjkh			nmp->nm_wcommitsize = nmp->nm_wsize;
60916346Sjkh		else
61016346Sjkh			nmp->nm_wcommitsize = argp->wcommitsize;
61116346Sjkh	}
61221152Sjkh
61316156Sjkh	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
61416156Sjkh		    (nmp->nm_soproto != argp->proto));
61521152Sjkh
61621152Sjkh	if (nmp->nm_client != NULL && adjsock) {
61721152Sjkh		int haslock = 0, error = 0;
61821152Sjkh
61921152Sjkh		if (nmp->nm_sotype == SOCK_STREAM) {
62021152Sjkh			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
62121152Sjkh			if (!error)
62221152Sjkh				haslock = 1;
6236723Sphk		}
62426817Sjkh		if (!error) {
62526813Sjkh		    newnfs_disconnect(&nmp->nm_sockreq);
62642300Sjkh		    if (haslock)
6276723Sphk			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
62838481Sgpalmer		    nmp->nm_sotype = argp->sotype;
6298597Sjkh		    nmp->nm_soproto = argp->proto;
63048552Shosokawa		    if (nmp->nm_sotype == SOCK_DGRAM)
63148552Shosokawa			while (newnfs_connect(nmp, &nmp->nm_sockreq,
63248552Shosokawa			    cred, td, 0)) {
63348552Shosokawa				printf("newnfs_args: retrying connect\n");
63448552Shosokawa				(void) nfs_catnap(PSOCK, "newnfscon");
63542300Sjkh			}
63642300Sjkh		}
63742300Sjkh	} else {
6388597Sjkh		nmp->nm_sotype = argp->sotype;
6398663Sphk		nmp->nm_soproto = argp->proto;
64042300Sjkh	}
64142300Sjkh}
6428663Sphk
64315972Sjkhstatic const char *nfs_opts[] = { "from", "nfs_args",
64442300Sjkh    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
64542300Sjkh    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
6468720Sjkh    "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
6478778Sjkh    "readdirsize", "soft", "hard", "mntudp", "tcp", "wsize", "rsize",
6488778Sjkh    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax",
6498778Sjkh    NULL };
6507954Sphk
6517954Sphk/*
65213352Sphk * VFS Operations.
6537954Sphk *
6547954Sphk * mount system call
65513352Sphk * It seems a bit dumb to copyinstr() the host and path here and then
6567954Sphk * bcopy() them in mountnfs(), but I wanted to detect errors before
6577954Sphk * doing the sockargs() call because sockargs() allocates an mbuf and
65813352Sphk * an error after that means that I have to release the mbuf.
6597954Sphk */
66042300Sjkh/* ARGSUSED */
6617954Sphkstatic int
6627954Sphknfs_mount(struct mount *mp, struct thread *td)
6637954Sphk{
6647954Sphk	struct nfs_args args = {
66542300Sjkh	    .version = NFS_ARGSVERSION,
66647983Smarkm	    .addr = NULL,
66747983Smarkm	    .addrlen = sizeof (struct sockaddr_in),
66847983Smarkm	    .sotype = SOCK_STREAM,
66947983Smarkm	    .proto = 0,
67047983Smarkm	    .fh = NULL,
67147983Smarkm	    .fhsize = 0,
67247983Smarkm	    .flags = 0,
67347983Smarkm	    .wsize = NFS_WSIZE,
67447983Smarkm	    .rsize = NFS_RSIZE,
67513352Sphk	    .readdirsize = NFS_READDIRSIZE,
67613352Sphk	    .timeo = 10,
67713352Sphk	    .retrans = NFS_RETRANS,
67813352Sphk	    .readahead = NFS_DEFRAHEAD,
67913352Sphk	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
68013352Sphk	    .hostname = NULL,
68113352Sphk	    /* args version 4 */
68213352Sphk	    .acregmin = NFS_MINATTRTIMO,
68342300Sjkh	    .acregmax = NFS_MAXATTRTIMO,
68442300Sjkh	    .acdirmin = NFS_MINDIRATTRTIMO,
68542300Sjkh	    .acdirmax = NFS_MAXDIRATTRTIMO,
68613352Sphk	    .dirlen = 0,
68747217Sjkh	    .krbnamelen = 0,
68847233Sphk	};
68913352Sphk	int error;
69013352Sphk	struct sockaddr *nam;
69113352Sphk	struct vnode *vp;
69213352Sphk	char hst[MNAMELEN];
69313352Sphk	size_t len;
69442300Sjkh	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
69518702Sjkh
69640312Sjkh	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
69747983Smarkm		error = EINVAL;
69840312Sjkh		goto out;
69940868Smsmith	}
70040312Sjkh
70143464Sjkh	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
70242300Sjkh		error = ncl_mountroot(mp, td);
70340315Sjkh		goto out;
70439939Sjkh	}
70542300Sjkh
70642297Sjkh	error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args);
70742297Sjkh	if (error)
70842297Sjkh		goto out;
70942300Sjkh
71042300Sjkh	if (args.version != NFS_ARGSVERSION) {
71142297Sjkh		error = EPROGMISMATCH;
71243504Sjkh		goto out;
71343464Sjkh	}
71443823Smsmith
71546109Smax	nfscl_init();
71643823Smsmith
71743823Smsmith	if (mp->mnt_flag & MNT_UPDATE) {
71843823Smsmith		struct nfsmount *nmp = VFSTONFS(mp);
71946323Smax
72046323Smax		if (nmp == NULL) {
72146065Smax			error = EIO;
72242297Sjkh			goto out;
72346323Smax		}
72446323Smax		/*
72540059Sjkh		 * When doing an update, we can't change version,
72643464Sjkh		 * security, switch lockd strategies or change cookie
72742388Sjkh		 * translation
72842300Sjkh		 */
72942297Sjkh		args.flags = (args.flags &
73042388Sjkh		    ~(NFSMNT_NFSV3 |
73143238Sjkh		      NFSMNT_NFSV4 |
73242297Sjkh		      NFSMNT_KERB |
73342297Sjkh		      NFSMNT_INTEGRITY |
73443464Sjkh		      NFSMNT_PRIVACY |
73543392Sjkh		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
73643464Sjkh		    (nmp->nm_flag &
73743464Sjkh			(NFSMNT_NFSV3 |
73843392Sjkh			 NFSMNT_NFSV4 |
73943392Sjkh			 NFSMNT_KERB |
74042300Sjkh			 NFSMNT_INTEGRITY |
74142300Sjkh			 NFSMNT_PRIVACY |
74243392Sjkh			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
74342300Sjkh		nfs_decode_args(mp, nmp, &args, td->td_ucred, td);
74413352Sphk		goto out;
7456784Sphk	}
746
747	/*
748	 * Make the nfs_ip_paranoia sysctl serve as the default connection
749	 * or no-connection mode for those protocols that support
750	 * no-connection mode (the flag will be cleared later for protocols
751	 * that do not support no-connection mode).  This will allow a client
752	 * to receive replies from a different IP then the request was
753	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
754	 * not 0.
755	 */
756	if (nfs_ip_paranoia == 0)
757		args.flags |= NFSMNT_NOCONN;
758	if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
759		error = EINVAL;
760		goto out;
761	}
762	if (args.fhsize > 0) {
763		error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize);
764		if (error)
765			goto out;
766	}
767	error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
768	if (error)
769		goto out;
770	bzero(&hst[len], MNAMELEN - len);
771	if (args.krbnamelen > 0) {
772		if (args.krbnamelen >= 100) {
773			error = EINVAL;
774			goto out;
775		}
776		error = copyin(args.krbname, krbname, args.krbnamelen);
777		if (error)
778			goto out;
779		krbname[args.krbnamelen] = '\0';
780	} else {
781		krbname[0] = '\0';
782		args.krbnamelen = 0;
783	}
784	if (args.dirlen > 0) {
785		if (args.dirlen >= 100) {
786			error = EINVAL;
787			goto out;
788		}
789		error = copyin(args.dirpath, dirpath, args.dirlen);
790		if (error)
791			goto out;
792		dirpath[args.dirlen] = '\0';
793	} else {
794		dirpath[0] = '\0';
795		args.dirlen = 0;
796	}
797	if (args.srvkrbnamelen > 0) {
798		if (args.srvkrbnamelen >= 100) {
799			error = EINVAL;
800			goto out;
801		}
802		error = copyin(args.srvkrbname, srvkrbname, args.srvkrbnamelen);
803		if (error)
804			goto out;
805		srvkrbname[args.srvkrbnamelen] = '\0';
806	} else {
807		srvkrbname[0] = '\0';
808		args.srvkrbnamelen = 0;
809	}
810	/* sockargs() call must be after above copyin() calls */
811	error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen);
812	if (error)
813		goto out;
814	args.fh = nfh;
815	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
816	    &vp, td->td_ucred, td);
817out:
818	if (!error) {
819		MNT_ILOCK(mp);
820		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
821		MNT_IUNLOCK(mp);
822	}
823	return (error);
824}
825
826
827/*
828 * VFS Operations.
829 *
830 * mount system call
831 * It seems a bit dumb to copyinstr() the host and path here and then
832 * bcopy() them in mountnfs(), but I wanted to detect errors before
833 * doing the sockargs() call because sockargs() allocates an mbuf and
834 * an error after that means that I have to release the mbuf.
835 */
836/* ARGSUSED */
837static int
838nfs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
839{
840	int error;
841	struct nfs_args args;
842
843	error = copyin(data, &args, sizeof (struct nfs_args));
844	if (error)
845		return error;
846
847	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
848
849	error = kernel_mount(ma, flags);
850	return (error);
851}
852
853/*
854 * Common code for mount and mountroot
855 */
856static int
857mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
858    char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
859    struct vnode **vpp, struct ucred *cred, struct thread *td)
860{
861	struct nfsmount *nmp;
862	struct nfsnode *np;
863	int error, trycnt, ret, clearintr;
864	struct nfsvattr nfsva;
865	static u_int64_t clval = 0;
866
867	if (mp->mnt_flag & MNT_UPDATE) {
868		nmp = VFSTONFS(mp);
869		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
870		FREE(nam, M_SONAME);
871		return (0);
872	} else {
873		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
874		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
875		    M_NEWNFSMNT, M_WAITOK);
876		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
877		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
878		TAILQ_INIT(&nmp->nm_bufq);
879		if (clval == 0)
880			clval = (u_int64_t)nfsboottime.tv_sec;
881		nmp->nm_clval = clval++;
882		nmp->nm_krbnamelen = argp->krbnamelen;
883		nmp->nm_dirpathlen = argp->dirlen;
884		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
885		if (nmp->nm_dirpathlen > 0) {
886			/*
887			 * Since we will be doing dirpath as root,
888			 * set nm_uid to the real uid doing the mount,
889			 * since that is normally the user with a valid TGT.
890			 */
891			nmp->nm_uid = td->td_ucred->cr_ruid;
892		} else {
893			/*
894			 * Just set to -1, so the first Op
895			 * will set it later, to the uid of
896			 * the process doing that (usually
897			 * from a first open in the mount
898			 * point).
899			 */
900			nmp->nm_uid = (uid_t)-1;
901		}
902
903		/* Copy and null terminate all the names */
904		if (nmp->nm_krbnamelen > 0) {
905			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
906			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
907		}
908		if (nmp->nm_dirpathlen > 0) {
909			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
910			    nmp->nm_dirpathlen);
911			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
912			    + 1] = '\0';
913		}
914		if (nmp->nm_srvkrbnamelen > 0) {
915			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
916			    nmp->nm_srvkrbnamelen);
917			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
918			    + nmp->nm_srvkrbnamelen + 2] = '\0';
919		}
920		nmp->nm_sockreq.nr_cred = crhold(cred);
921		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
922		mp->mnt_data = nmp;
923	}
924	vfs_getnewfsid(mp);
925	nmp->nm_mountp = mp;
926	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
927
928	/*
929	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
930	 * high, depending on whether we end up with negative offsets in
931	 * the client or server somewhere.  2GB-1 may be safer.
932	 *
933	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
934	 * that we can handle until we find out otherwise.
935	 * XXX Our "safe" limit on the client is what we can store in our
936	 * buffer cache using signed(!) block numbers.
937	 */
938	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
939		nmp->nm_maxfilesize = 0xffffffffLL;
940	else
941		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
942
943	nmp->nm_timeo = NFS_TIMEO;
944	nmp->nm_retry = NFS_RETRANS;
945	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
946		nmp->nm_wsize = NFS_WSIZE;
947		nmp->nm_rsize = NFS_RSIZE;
948		nmp->nm_readdirsize = NFS_READDIRSIZE;
949	}
950	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
951	nmp->nm_numgrps = NFS_MAXGRPS;
952	nmp->nm_readahead = NFS_DEFRAHEAD;
953	nmp->nm_tprintf_delay = nfs_tprintf_delay;
954	if (nmp->nm_tprintf_delay < 0)
955		nmp->nm_tprintf_delay = 0;
956	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
957	if (nmp->nm_tprintf_initial_delay < 0)
958		nmp->nm_tprintf_initial_delay = 0;
959	nmp->nm_fhsize = argp->fhsize;
960	if (nmp->nm_fhsize > 0)
961		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
962	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
963	nmp->nm_nam = nam;
964	/* Set up the sockets and per-host congestion */
965	nmp->nm_sotype = argp->sotype;
966	nmp->nm_soproto = argp->proto;
967	nmp->nm_sockreq.nr_prog = NFS_PROG;
968	if ((argp->flags & NFSMNT_NFSV4))
969		nmp->nm_sockreq.nr_vers = NFS_VER4;
970	else if ((argp->flags & NFSMNT_NFSV3))
971		nmp->nm_sockreq.nr_vers = NFS_VER3;
972	else
973		nmp->nm_sockreq.nr_vers = NFS_VER2;
974
975	nfs_decode_args(mp, nmp, argp, cred, td);
976
977	/*
978	 * For Connection based sockets (TCP,...) do the connect here,
979	 * but make it interruptible, even for non-interuptible mounts.
980	 */
981	if ((nmp->nm_flag & NFSMNT_INT) == 0) {
982		nmp->nm_flag |= NFSMNT_INT;
983		clearintr = 1;
984	} else {
985		clearintr = 0;
986	}
987	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
988		goto bad;
989	if (clearintr)
990		nmp->nm_flag &= ~NFSMNT_INT;
991
992	/*
993	 * A reference count is needed on the nfsnode representing the
994	 * remote root.  If this object is not persistent, then backward
995	 * traversals of the mount point (i.e. "..") will not work if
996	 * the nfsnode gets flushed out of the cache. Ufs does not have
997	 * this problem, because one can identify root inodes by their
998	 * number == ROOTINO (2).
999	 */
1000	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1001	    nmp->nm_dirpathlen > 0) {
1002		/*
1003		 * If the fhsize on the mount point == 0 for V4, the mount
1004		 * path needs to be looked up.
1005		 */
1006		trycnt = 3;
1007		do {
1008			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1009			    cred, td);
1010			if (error)
1011				(void) nfs_catnap(PZERO, "nfsgetdirp");
1012		} while (error && --trycnt > 0);
1013		if (error) {
1014			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1015			goto bad;
1016		}
1017	}
1018	if (nmp->nm_fhsize > 0) {
1019		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1020		if (error)
1021			goto bad;
1022		*vpp = NFSTOV(np);
1023
1024		/*
1025		 * Get file attributes and transfer parameters for the
1026		 * mountpoint.  This has the side effect of filling in
1027		 * (*vpp)->v_type with the correct value.
1028		 */
1029		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1030		    cred, td, &nfsva, NULL);
1031		if (ret) {
1032			/*
1033			 * Just set default values to get things going.
1034			 */
1035			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1036			nfsva.na_vattr.va_type = VDIR;
1037			nfsva.na_vattr.va_mode = 0777;
1038			nfsva.na_vattr.va_nlink = 100;
1039			nfsva.na_vattr.va_uid = (uid_t)0;
1040			nfsva.na_vattr.va_gid = (gid_t)0;
1041			nfsva.na_vattr.va_fileid = 2;
1042			nfsva.na_vattr.va_gen = 1;
1043			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1044			nfsva.na_vattr.va_size = 512 * 1024;
1045		}
1046		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1047		if (argp->flags & NFSMNT_NFSV3)
1048			ncl_fsinfo(nmp, *vpp, cred, td);
1049
1050		/*
1051		 * Lose the lock but keep the ref.
1052		 */
1053		VOP_UNLOCK(*vpp, 0);
1054		return (0);
1055	}
1056	error = EIO;
1057
1058bad:
1059	newnfs_disconnect(&nmp->nm_sockreq);
1060	crfree(nmp->nm_sockreq.nr_cred);
1061	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1062	mtx_destroy(&nmp->nm_mtx);
1063	FREE(nmp, M_NEWNFSMNT);
1064	FREE(nam, M_SONAME);
1065	return (error);
1066}
1067
1068/*
1069 * unmount system call
1070 */
1071static int
1072nfs_unmount(struct mount *mp, int mntflags, struct thread *td)
1073{
1074	struct nfsmount *nmp;
1075	int error, flags = 0, trycnt = 0;
1076
1077	if (mntflags & MNT_FORCE)
1078		flags |= FORCECLOSE;
1079	nmp = VFSTONFS(mp);
1080	/*
1081	 * Goes something like this..
1082	 * - Call vflush() to clear out vnodes for this filesystem
1083	 * - Close the socket
1084	 * - Free up the data structures
1085	 */
1086	/* In the forced case, cancel any outstanding requests. */
1087	if (mntflags & MNT_FORCE) {
1088		error = newnfs_nmcancelreqs(nmp);
1089		if (error)
1090			goto out;
1091		/* For a forced close, get rid of the renew thread now */
1092		nfscl_umount(nmp, td);
1093	}
1094	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1095	do {
1096		error = vflush(mp, 1, flags, td);
1097		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1098			(void) nfs_catnap(PSOCK, "newndm");
1099	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1100	if (error)
1101		goto out;
1102
1103	/*
1104	 * We are now committed to the unmount.
1105	 */
1106	if ((mntflags & MNT_FORCE) == 0)
1107		nfscl_umount(nmp, td);
1108	newnfs_disconnect(&nmp->nm_sockreq);
1109	crfree(nmp->nm_sockreq.nr_cred);
1110	FREE(nmp->nm_nam, M_SONAME);
1111
1112	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1113	mtx_destroy(&nmp->nm_mtx);
1114	FREE(nmp, M_NEWNFSMNT);
1115out:
1116	return (error);
1117}
1118
1119/*
1120 * Return root of a filesystem
1121 */
1122static int
1123nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
1124{
1125	struct vnode *vp;
1126	struct nfsmount *nmp;
1127	struct nfsnode *np;
1128	int error;
1129
1130	nmp = VFSTONFS(mp);
1131	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1132	if (error)
1133		return error;
1134	vp = NFSTOV(np);
1135	/*
1136	 * Get transfer parameters and attributes for root vnode once.
1137	 */
1138	mtx_lock(&nmp->nm_mtx);
1139	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1140		mtx_unlock(&nmp->nm_mtx);
1141		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1142	} else
1143		mtx_unlock(&nmp->nm_mtx);
1144	if (vp->v_type == VNON)
1145	    vp->v_type = VDIR;
1146	vp->v_vflag |= VV_ROOT;
1147	*vpp = vp;
1148	return (0);
1149}
1150
1151/*
1152 * Flush out the buffer cache
1153 */
1154/* ARGSUSED */
1155static int
1156nfs_sync(struct mount *mp, int waitfor, struct thread *td)
1157{
1158	struct vnode *vp, *mvp;
1159	int error, allerror = 0;
1160
1161	/*
1162	 * Force stale buffer cache information to be flushed.
1163	 */
1164	MNT_ILOCK(mp);
1165loop:
1166	MNT_VNODE_FOREACH(vp, mp, mvp) {
1167		VI_LOCK(vp);
1168		MNT_IUNLOCK(mp);
1169		/* XXX Racy bv_cnt check. */
1170		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1171		    waitfor == MNT_LAZY) {
1172			VI_UNLOCK(vp);
1173			MNT_ILOCK(mp);
1174			continue;
1175		}
1176		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1177			MNT_ILOCK(mp);
1178			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1179			goto loop;
1180		}
1181		error = VOP_FSYNC(vp, waitfor, td);
1182		if (error)
1183			allerror = error;
1184		VOP_UNLOCK(vp, 0);
1185		vrele(vp);
1186
1187		MNT_ILOCK(mp);
1188	}
1189	MNT_IUNLOCK(mp);
1190	return (allerror);
1191}
1192
1193static int
1194nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1195{
1196	struct nfsmount *nmp = VFSTONFS(mp);
1197	struct vfsquery vq;
1198	int error;
1199
1200	bzero(&vq, sizeof(vq));
1201	switch (op) {
1202#if 0
1203	case VFS_CTL_NOLOCKS:
1204		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1205 		if (req->oldptr != NULL) {
1206 			error = SYSCTL_OUT(req, &val, sizeof(val));
1207 			if (error)
1208 				return (error);
1209 		}
1210 		if (req->newptr != NULL) {
1211 			error = SYSCTL_IN(req, &val, sizeof(val));
1212 			if (error)
1213 				return (error);
1214			if (val)
1215				nmp->nm_flag |= NFSMNT_NOLOCKS;
1216			else
1217				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1218 		}
1219		break;
1220#endif
1221	case VFS_CTL_QUERY:
1222		mtx_lock(&nmp->nm_mtx);
1223		if (nmp->nm_state & NFSSTA_TIMEO)
1224			vq.vq_flags |= VQ_NOTRESP;
1225		mtx_unlock(&nmp->nm_mtx);
1226#if 0
1227		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1228		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1229			vq.vq_flags |= VQ_NOTRESPLOCK;
1230#endif
1231		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1232		break;
1233 	case VFS_CTL_TIMEO:
1234 		if (req->oldptr != NULL) {
1235 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1236 			    sizeof(nmp->nm_tprintf_initial_delay));
1237 			if (error)
1238 				return (error);
1239 		}
1240 		if (req->newptr != NULL) {
1241			error = vfs_suser(mp, req->td);
1242			if (error)
1243				return (error);
1244 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1245 			    sizeof(nmp->nm_tprintf_initial_delay));
1246 			if (error)
1247 				return (error);
1248 			if (nmp->nm_tprintf_initial_delay < 0)
1249 				nmp->nm_tprintf_initial_delay = 0;
1250 		}
1251		break;
1252	default:
1253		return (ENOTSUP);
1254	}
1255	return (0);
1256}
1257
1258