nfs_clvfsops.c revision 192145
117683Spst/*- 217683Spst * Copyright (c) 1989, 1993, 1995 317683Spst * The Regents of the University of California. All rights reserved. 417683Spst * 517683Spst * This code is derived from software contributed to Berkeley by 617683Spst * Rick Macklem at The University of Guelph. 717683Spst * 817683Spst * Redistribution and use in source and binary forms, with or without 917683Spst * modification, are permitted provided that the following conditions 1017683Spst * are met: 1117683Spst * 1. Redistributions of source code must retain the above copyright 1217683Spst * notice, this list of conditions and the following disclaimer. 1317683Spst * 2. Redistributions in binary form must reproduce the above copyright 1417683Spst * notice, this list of conditions and the following disclaimer in the 1517683Spst * documentation and/or other materials provided with the distribution. 1617683Spst * 4. Neither the name of the University nor the names of its contributors 1717683Spst * may be used to endorse or promote products derived from this software 1817683Spst * without specific prior written permission. 1917683Spst * 2017683Spst * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2117683Spst * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2217683Spst * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2317683Spst * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24127664Sbms * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25214518Srpaulo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2617683Spst * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2717683Spst * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2875107Sfenner * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2975107Sfenner * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3075107Sfenner * SUCH DAMAGE. 3175107Sfenner * 32214518Srpaulo * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95 33214518Srpaulo */ 34214518Srpaulo 35214518Srpaulo#include <sys/cdefs.h> 36214518Srpaulo__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 192145 2009-05-15 16:03:11Z rmacklem $"); 37214518Srpaulo 38214518Srpaulo 39214518Srpaulo#include "opt_bootp.h" 40214518Srpaulo#include "opt_nfsroot.h" 41214518Srpaulo 42214518Srpaulo#include <sys/param.h> 43214518Srpaulo#include <sys/systm.h> 44214518Srpaulo#include <sys/kernel.h> 45214518Srpaulo#include <sys/bio.h> 4617683Spst#include <sys/buf.h> 4717683Spst#include <sys/clock.h> 4817683Spst#include <sys/lock.h> 49146768Ssam#include <sys/malloc.h> 5017683Spst#include <sys/mbuf.h> 5175107Sfenner#include <sys/module.h> 5275107Sfenner#include <sys/mount.h> 5317683Spst#include <sys/proc.h> 5417683Spst#include <sys/socket.h> 5517683Spst#include <sys/socketvar.h> 5617683Spst#include <sys/sockio.h> 5717683Spst#include <sys/sysctl.h> 5817683Spst#include <sys/vnode.h> 5917683Spst#include <sys/signalvar.h> 6017683Spst 6117683Spst#include <vm/vm.h> 6217683Spst#include <vm/vm_extern.h> 6317683Spst#include <vm/uma.h> 6417683Spst 65146768Ssam#include <net/if.h> 66146768Ssam#include <net/route.h> 67146768Ssam#include <netinet/in.h> 68146768Ssam 6917683Spst#include <fs/nfs/nfsport.h> 70190225Srpaulo#include <fs/nfsclient/nfsnode.h> 71190225Srpaulo#include <fs/nfsclient/nfsmount.h> 72190225Srpaulo#include <fs/nfsclient/nfs.h> 73190225Srpaulo#include <fs/nfsclient/nfsdiskless.h> 74146768Ssam 75146768Ssamextern int nfscl_ticks; 76146768Ssamextern struct timeval nfsboottime; 7717683Spstextern struct nfsstats newnfsstats; 7817683Spst 7917683SpstMALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header"); 80146768SsamMALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct"); 81146768Ssam 82146768SsamSYSCTL_DECL(_vfs_newnfs); 83146768SsamSYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW, 84146768Ssam &newnfsstats, nfsstats, "S,nfsstats"); 85146768Ssamstatic int nfs_ip_paranoia = 1; 86146768SsamSYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, 87146768Ssam &nfs_ip_paranoia, 0, ""); 88146768Ssamstatic int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY; 8917683SpstSYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY, 9017683Spst downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, ""); 9117683Spst/* how long between console messages "nfs server foo not responding" */ 9217683Spststatic int nfs_tprintf_delay = NFS_TPRINTF_DELAY; 9317683SpstSYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY, 9417683Spst downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, ""); 9517683Spst 9617683Spststatic void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, 9717683Spst struct nfs_args *argp, struct ucred *, struct thread *); 9817683Spststatic int mountnfs(struct nfs_args *, struct mount *, 9917683Spst struct sockaddr *, char *, u_char *, u_char *, u_char *, 10017683Spst struct vnode **, struct ucred *, struct thread *); 10117683Spststatic vfs_mount_t nfs_mount; 10217683Spststatic vfs_cmount_t nfs_cmount; 10317683Spststatic vfs_unmount_t nfs_unmount; 10417683Spststatic vfs_root_t nfs_root; 10517683Spststatic vfs_statfs_t nfs_statfs; 10617683Spststatic vfs_sync_t nfs_sync; 10717683Spststatic vfs_sysctl_t nfs_sysctl; 10817683Spst 10917683Spst/* 11017683Spst * nfs vfs operations. 11117683Spst */ 11217683Spststatic struct vfsops nfs_vfsops = { 11317683Spst .vfs_init = ncl_init, 11417683Spst .vfs_mount = nfs_mount, 11517683Spst .vfs_cmount = nfs_cmount, 11617683Spst .vfs_root = nfs_root, 11717683Spst .vfs_statfs = nfs_statfs, 11817683Spst .vfs_sync = nfs_sync, 11917683Spst .vfs_uninit = ncl_uninit, 12017683Spst .vfs_unmount = nfs_unmount, 12117683Spst .vfs_sysctl = nfs_sysctl, 12217683Spst}; 12317683SpstVFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK); 12417683Spst 12517683Spst/* So that loader and kldload(2) can find us, wherever we are.. */ 12617683SpstMODULE_VERSION(newnfs, 1); 12717683Spst 12817683Spst/* 12917683Spst * This structure must be filled in by a primary bootstrap or bootstrap 13017683Spst * server for a diskless/dataless machine. It is initialized below just 13117683Spst * to ensure that it is allocated to initialized data (.data not .bss). 13217683Spst */ 13317683Spststruct nfs_diskless newnfs_diskless = { { { 0 } } }; 13417683Spststruct nfsv3_diskless newnfsv3_diskless = { { { 0 } } }; 13517683Spstint newnfs_diskless_valid = 0; 13617683Spst 13717683SpstSYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD, 13817683Spst &newnfs_diskless_valid, 0, 13917683Spst "Has the diskless struct been filled correctly"); 14017683Spst 14117683SpstSYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, 14217683Spst newnfsv3_diskless.root_hostnam, 0, "Path to nfs root"); 14317683Spst 14417683SpstSYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, 14517683Spst &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr, 14617683Spst "%Ssockaddr_in", "Diskless root nfs address"); 14717683Spst 14817683Spst 14917683Spstvoid newnfsargs_ntoh(struct nfs_args *); 15017683Spststatic int nfs_mountdiskless(char *, 15117683Spst struct sockaddr_in *, struct nfs_args *, 15217683Spst struct thread *, struct vnode **, struct mount *); 15317683Spststatic void nfs_convert_diskless(void); 15417683Spststatic void nfs_convert_oargs(struct nfs_args *args, 155241231Sdelphij struct onfs_args *oargs); 15617683Spst 15717683Spstint 158241231Sdelphijnewnfs_iosize(struct nfsmount *nmp) 15917683Spst{ 16017683Spst int iosize, maxio; 16117683Spst 16217683Spst /* First, set the upper limit for iosize */ 16317683Spst if (nmp->nm_flag & NFSMNT_NFSV4) { 16417683Spst maxio = NFS_MAXBSIZE; 16517683Spst } else if (nmp->nm_flag & NFSMNT_NFSV3) { 16617683Spst if (nmp->nm_sotype == SOCK_DGRAM) 16717683Spst maxio = NFS_MAXDGRAMDATA; 16817683Spst else 16917683Spst maxio = NFS_MAXBSIZE; 17017683Spst } else { 17117683Spst maxio = NFS_V2MAXDATA; 17217683Spst } 17317683Spst if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0) 17417683Spst nmp->nm_rsize = maxio; 17517683Spst if (nmp->nm_rsize > MAXBSIZE) 17617683Spst nmp->nm_rsize = MAXBSIZE; 17717683Spst if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0) 17817683Spst nmp->nm_readdirsize = maxio; 17917683Spst if (nmp->nm_readdirsize > nmp->nm_rsize) 18017683Spst nmp->nm_readdirsize = nmp->nm_rsize; 18117683Spst if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0) 18217683Spst nmp->nm_wsize = maxio; 18317683Spst if (nmp->nm_wsize > MAXBSIZE) 18417683Spst nmp->nm_wsize = MAXBSIZE; 18517683Spst 18617683Spst /* 18717683Spst * Calculate the size used for io buffers. Use the larger 18817683Spst * of the two sizes to minimise nfs requests but make sure 18917683Spst * that it is at least one VM page to avoid wasting buffer 19017683Spst * space. 19117683Spst */ 19217683Spst iosize = imax(nmp->nm_rsize, nmp->nm_wsize); 19317683Spst iosize = imax(iosize, PAGE_SIZE); 19417683Spst nmp->nm_mountp->mnt_stat.f_iosize = iosize; 19517683Spst return (iosize); 19617683Spst} 19717683Spst 19817683Spststatic void 19917683Spstnfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) 20017683Spst{ 20117683Spst 20217683Spst args->version = NFS_ARGSVERSION; 20317683Spst args->addr = oargs->addr; 20417683Spst args->addrlen = oargs->addrlen; 20517683Spst args->sotype = oargs->sotype; 20617683Spst args->proto = oargs->proto; 20717683Spst args->fh = oargs->fh; 20817683Spst args->fhsize = oargs->fhsize; 20917683Spst args->flags = oargs->flags; 21017683Spst args->wsize = oargs->wsize; 21117683Spst args->rsize = oargs->rsize; 21217683Spst args->readdirsize = oargs->readdirsize; 21317683Spst args->timeo = oargs->timeo; 21417683Spst args->retrans = oargs->retrans; 21517683Spst args->readahead = oargs->readahead; 21617683Spst args->hostname = oargs->hostname; 21717683Spst} 21817683Spst 21917683Spststatic void 22017683Spstnfs_convert_diskless(void) 22117683Spst{ 22217683Spst 22317683Spst bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif, 22417683Spst sizeof (struct ifaliasreq)); 22517683Spst bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway, 22617683Spst sizeof (struct sockaddr_in)); 22717683Spst nfs_convert_oargs(&newnfsv3_diskless.root_args, 22817683Spst &newnfs_diskless.root_args); 22917683Spst if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) { 23017683Spst newnfsv3_diskless.root_fhsize = NFSX_MYFH; 23117683Spst bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh, 23217683Spst NFSX_MYFH); 23317683Spst } else { 23417683Spst newnfsv3_diskless.root_fhsize = NFSX_V2FH; 23517683Spst bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh, 23617683Spst NFSX_V2FH); 23717683Spst } 23817683Spst bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr, 23917683Spst sizeof(struct sockaddr_in)); 24017683Spst bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam, 24117683Spst MNAMELEN); 24217683Spst newnfsv3_diskless.root_time = newnfs_diskless.root_time; 24317683Spst bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam, 24417683Spst MAXHOSTNAMELEN); 24517683Spst newnfs_diskless_valid = 3; 24617683Spst} 24717683Spst 24817683Spst/* 24917683Spst * nfs statfs call 25017683Spst */ 25117683Spststatic int 25217683Spstnfs_statfs(struct mount *mp, struct statfs *sbp) 25317683Spst{ 25417683Spst struct vnode *vp; 25517683Spst struct thread *td; 25617683Spst struct nfsmount *nmp = VFSTONFS(mp); 25717683Spst struct nfsvattr nfsva; 25817683Spst struct nfsfsinfo fs; 25917683Spst struct nfsstatfs sb; 26017683Spst int error = 0, attrflag, gotfsinfo = 0, ret; 26117683Spst struct nfsnode *np; 26217683Spst 26317683Spst td = curthread; 26417683Spst 26517683Spst error = vfs_busy(mp, MBF_NOWAIT); 26617683Spst if (error) 26717683Spst return (error); 26817683Spst error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np); 26917683Spst if (error) { 27017683Spst vfs_unbusy(mp); 27117683Spst return (error); 27217683Spst } 27317683Spst vp = NFSTOV(np); 27417683Spst mtx_lock(&nmp->nm_mtx); 27517683Spst if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { 27617683Spst mtx_unlock(&nmp->nm_mtx); 27717683Spst error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva, 27817683Spst &attrflag, NULL); 27917683Spst if (!error) 28017683Spst gotfsinfo = 1; 28117683Spst } else 28217683Spst mtx_unlock(&nmp->nm_mtx); 28317683Spst if (!error) 28417683Spst error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva, 28517683Spst &attrflag, NULL); 28617683Spst if (attrflag == 0) { 28717683Spst ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, 28817683Spst td->td_ucred, td, &nfsva, NULL); 28917683Spst if (ret) { 29017683Spst /* 29117683Spst * Just set default values to get things going. 29217683Spst */ 29317683Spst NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); 29417683Spst nfsva.na_vattr.va_type = VDIR; 29517683Spst nfsva.na_vattr.va_mode = 0777; 29617683Spst nfsva.na_vattr.va_nlink = 100; 29717683Spst nfsva.na_vattr.va_uid = (uid_t)0; 29817683Spst nfsva.na_vattr.va_gid = (gid_t)0; 29917683Spst nfsva.na_vattr.va_fileid = 2; 30017683Spst nfsva.na_vattr.va_gen = 1; 30117683Spst nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; 30217683Spst nfsva.na_vattr.va_size = 512 * 1024; 30317683Spst } 30417683Spst } 30517683Spst (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 30617683Spst if (!error) { 30717683Spst mtx_lock(&nmp->nm_mtx); 30817683Spst if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4)) 30917683Spst nfscl_loadfsinfo(nmp, &fs); 31017683Spst nfscl_loadsbinfo(nmp, &sb, sbp); 31117683Spst sbp->f_flags = nmp->nm_flag; 31217683Spst sbp->f_iosize = newnfs_iosize(nmp); 31317683Spst mtx_unlock(&nmp->nm_mtx); 31417683Spst if (sbp != &mp->mnt_stat) { 31517683Spst bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); 31617683Spst bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); 31717683Spst } 31817683Spst strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN); 31917683Spst } else if (NFS_ISV4(vp)) { 32017683Spst error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 32117683Spst } 32217683Spst vput(vp); 32317683Spst vfs_unbusy(mp); 32417683Spst return (error); 32517683Spst} 32617683Spst 32717683Spst/* 32817683Spst * nfs version 3 fsinfo rpc call 32917683Spst */ 33017683Spstint 33117683Spstncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred, 33217683Spst struct thread *td) 33317683Spst{ 33417683Spst struct nfsfsinfo fs; 33517683Spst struct nfsvattr nfsva; 33617683Spst int error, attrflag; 33717683Spst 33817683Spst error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL); 33917683Spst if (!error) { 34017683Spst if (attrflag) 34117683Spst (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 34217683Spst 1); 34317683Spst mtx_lock(&nmp->nm_mtx); 34417683Spst nfscl_loadfsinfo(nmp, &fs); 34517683Spst mtx_unlock(&nmp->nm_mtx); 34617683Spst } 34717683Spst return (error); 34817683Spst} 34917683Spst 35017683Spst/* 35117683Spst * Mount a remote root fs via. nfs. This depends on the info in the 35217683Spst * newnfs_diskless structure that has been filled in properly by some primary 35317683Spst * bootstrap. 35417683Spst * It goes something like this: 35517683Spst * - do enough of "ifconfig" by calling ifioctl() so that the system 35617683Spst * can talk to the server 35717683Spst * - If newnfs_diskless.mygateway is filled in, use that address as 35817683Spst * a default gateway. 35917683Spst * - build the rootfs mount point and call mountnfs() to do the rest. 36017683Spst * 36117683Spst * It is assumed to be safe to read, modify, and write the nfsv3_diskless 36217683Spst * structure, as well as other global NFS client variables here, as 36317683Spst * nfs_mountroot() will be called once in the boot before any other NFS 36417683Spst * client activity occurs. 36517683Spst */ 36617683Spstint 36717683Spstncl_mountroot(struct mount *mp) 36817683Spst{ 36917683Spst struct thread *td = curthread; 37017683Spst INIT_VPROCG(TD_TO_VPROCG(td)); 37117683Spst struct nfsv3_diskless *nd = &newnfsv3_diskless; 37217683Spst struct socket *so; 37317683Spst struct vnode *vp; 37417683Spst struct ifreq ir; 37517683Spst int error, i; 37617683Spst u_long l; 37717683Spst char buf[128]; 37817683Spst char *cp; 37917683Spst 38017683Spst#if defined(BOOTP_NFSROOT) && defined(BOOTP) 38117683Spst bootpc_init(); /* use bootp to get nfs_diskless filled in */ 38217683Spst#elif defined(NFS_ROOT) 38317683Spst nfs_setup_diskless(); 38417683Spst#endif 38517683Spst 38617683Spst if (newnfs_diskless_valid == 0) 38717683Spst return (-1); 38817683Spst if (newnfs_diskless_valid == 1) 38917683Spst nfs_convert_diskless(); 39017683Spst 39117683Spst /* 39217683Spst * XXX splnet, so networks will receive... 39317683Spst */ 39417683Spst splnet(); 39517683Spst 39617683Spst /* 39717683Spst * Do enough of ifconfig(8) so that the critical net interface can 39817683Spst * talk to the server. 39917683Spst */ 40017683Spst error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0, 40117683Spst td->td_ucred, td); 40217683Spst if (error) 40317683Spst panic("nfs_mountroot: socreate(%04x): %d", 40417683Spst nd->myif.ifra_addr.sa_family, error); 40517683Spst 40617683Spst#if 0 /* XXX Bad idea */ 40717683Spst /* 40817683Spst * We might not have been told the right interface, so we pass 40917683Spst * over the first ten interfaces of the same kind, until we get 41017683Spst * one of them configured. 41117683Spst */ 41217683Spst 41317683Spst for (i = strlen(nd->myif.ifra_name) - 1; 41417683Spst nd->myif.ifra_name[i] >= '0' && 41517683Spst nd->myif.ifra_name[i] <= '9'; 41617683Spst nd->myif.ifra_name[i] ++) { 41717683Spst error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); 41817683Spst if(!error) 41917683Spst break; 42017683Spst } 42117683Spst#endif 42217683Spst error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); 42317683Spst if (error) 42417683Spst panic("nfs_mountroot: SIOCAIFADDR: %d", error); 42517683Spst if ((cp = getenv("boot.netif.mtu")) != NULL) { 42617683Spst ir.ifr_mtu = strtol(cp, NULL, 10); 42717683Spst bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ); 42817683Spst freeenv(cp); 42917683Spst error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td); 43017683Spst if (error) 43117683Spst printf("nfs_mountroot: SIOCSIFMTU: %d", error); 43217683Spst } 43317683Spst soclose(so); 43417683Spst 43517683Spst /* 43617683Spst * If the gateway field is filled in, set it as the default route. 43717683Spst * Note that pxeboot will set a default route of 0 if the route 43817683Spst * is not set by the DHCP server. Check also for a value of 0 43917683Spst * to avoid panicking inappropriately in that situation. 44017683Spst */ 44117683Spst if (nd->mygateway.sin_len != 0 && 44217683Spst nd->mygateway.sin_addr.s_addr != 0) { 44317683Spst struct sockaddr_in mask, sin; 44417683Spst 44517683Spst bzero((caddr_t)&mask, sizeof(mask)); 44617683Spst sin = mask; 44717683Spst sin.sin_family = AF_INET; 44817683Spst sin.sin_len = sizeof(sin); 44917683Spst /* XXX MRT use table 0 for this sort of thing */ 45017683Spst error = rtrequest(RTM_ADD, (struct sockaddr *)&sin, 45117683Spst (struct sockaddr *)&nd->mygateway, 45217683Spst (struct sockaddr *)&mask, 45317683Spst RTF_UP | RTF_GATEWAY, NULL); 45417683Spst if (error) 455146768Ssam panic("nfs_mountroot: RTM_ADD: %d", error); 456146768Ssam } 457146768Ssam 458146768Ssam /* 459146768Ssam * Create the rootfs mount point. 460146768Ssam */ 461146768Ssam nd->root_args.fh = nd->root_fh; 462146768Ssam nd->root_args.fhsize = nd->root_fhsize; 463146768Ssam l = ntohl(nd->root_saddr.sin_addr.s_addr); 464146768Ssam snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", 465146768Ssam (l >> 24) & 0xff, (l >> 16) & 0xff, 46617683Spst (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam); 46717683Spst printf("NFS ROOT: %s\n", buf); 46817683Spst nd->root_args.hostname = buf; 46917683Spst if ((error = nfs_mountdiskless(buf, 47017683Spst &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) { 47117683Spst return (error); 47217683Spst } 47317683Spst 47417683Spst /* 47517683Spst * This is not really an nfs issue, but it is much easier to 47617683Spst * set hostname here and then let the "/etc/rc.xxx" files 47717683Spst * mount the right /var based upon its preset value. 47817683Spst */ 47917683Spst mtx_lock(&hostname_mtx); 48017683Spst bcopy(nd->my_hostnam, V_hostname, MAXHOSTNAMELEN); 48117683Spst V_hostname[MAXHOSTNAMELEN - 1] = '\0'; 48217683Spst for (i = 0; i < MAXHOSTNAMELEN; i++) 48317683Spst if (V_hostname[i] == '\0') 48417683Spst break; 48517683Spst mtx_unlock(&hostname_mtx); 48617683Spst inittodr(ntohl(nd->root_time)); 48717683Spst return (0); 48817683Spst} 48917683Spst 49017683Spst/* 49117683Spst * Internal version of mount system call for diskless setup. 49217683Spst */ 49317683Spststatic int 49417683Spstnfs_mountdiskless(char *path, 49517683Spst struct sockaddr_in *sin, struct nfs_args *args, struct thread *td, 49617683Spst struct vnode **vpp, struct mount *mp) 49717683Spst{ 49817683Spst struct sockaddr *nam; 499146768Ssam int error; 500146768Ssam 501146768Ssam nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); 502146768Ssam if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp, 503146768Ssam td->td_ucred, td)) != 0) { 504146768Ssam printf("nfs_mountroot: mount %s on /: %d\n", path, error); 505146768Ssam return (error); 506146768Ssam } 507146768Ssam return (0); 508146768Ssam} 509146768Ssam 510146768Ssamstatic void 511146768Ssamnfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, 512146768Ssam struct ucred *cred, struct thread *td) 513146768Ssam{ 514146768Ssam int s; 515146768Ssam int adjsock; 516146768Ssam 517146768Ssam s = splnet(); 518146768Ssam 51917683Spst /* 52017683Spst * Set read-only flag if requested; otherwise, clear it if this is 52117683Spst * an update. If this is not an update, then either the read-only 52217683Spst * flag is already clear, or this is a root mount and it was set 52317683Spst * intentionally at some previous point. 52417683Spst */ 52517683Spst if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) { 52617683Spst MNT_ILOCK(mp); 52717683Spst mp->mnt_flag |= MNT_RDONLY; 52817683Spst MNT_IUNLOCK(mp); 52917683Spst } else if (mp->mnt_flag & MNT_UPDATE) { 53017683Spst MNT_ILOCK(mp); 53117683Spst mp->mnt_flag &= ~MNT_RDONLY; 53217683Spst MNT_IUNLOCK(mp); 53317683Spst } 53417683Spst 53517683Spst /* 53617683Spst * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes 53717683Spst * no sense in that context. Also, set up appropriate retransmit 53817683Spst * and soft timeout behavior. 53917683Spst */ 54017683Spst if (argp->sotype == SOCK_STREAM) { 54117683Spst nmp->nm_flag &= ~NFSMNT_NOCONN; 54217683Spst nmp->nm_timeo = NFS_MAXTIMEO; 54317683Spst } 54417683Spst 54517683Spst /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */ 54617683Spst if ((argp->flags & NFSMNT_NFSV3) == 0) 54717683Spst nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 54817683Spst 54917683Spst /* Also re-bind if we're switching to/from a connected UDP socket */ 55017683Spst adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) != 55117683Spst (argp->flags & NFSMNT_NOCONN)); 55217683Spst 55317683Spst /* Update flags atomically. Don't change the lock bits. */ 55417683Spst nmp->nm_flag = argp->flags | nmp->nm_flag; 55517683Spst splx(s); 55617683Spst 55717683Spst if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { 55817683Spst nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; 55917683Spst if (nmp->nm_timeo < NFS_MINTIMEO) 56017683Spst nmp->nm_timeo = NFS_MINTIMEO; 56117683Spst else if (nmp->nm_timeo > NFS_MAXTIMEO) 56217683Spst nmp->nm_timeo = NFS_MAXTIMEO; 56317683Spst } 56417683Spst 56517683Spst if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { 56617683Spst nmp->nm_retry = argp->retrans; 56717683Spst if (nmp->nm_retry > NFS_MAXREXMIT) 56817683Spst nmp->nm_retry = NFS_MAXREXMIT; 56917683Spst } 57017683Spst 57117683Spst if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { 57217683Spst nmp->nm_wsize = argp->wsize; 57317683Spst /* Round down to multiple of blocksize */ 57417683Spst nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1); 57517683Spst if (nmp->nm_wsize <= 0) 57617683Spst nmp->nm_wsize = NFS_FABLKSIZE; 57717683Spst } 57817683Spst 57917683Spst if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { 58017683Spst nmp->nm_rsize = argp->rsize; 58117683Spst /* Round down to multiple of blocksize */ 58217683Spst nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1); 58317683Spst if (nmp->nm_rsize <= 0) 58417683Spst nmp->nm_rsize = NFS_FABLKSIZE; 58517683Spst } 58617683Spst 58717683Spst if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { 58817683Spst nmp->nm_readdirsize = argp->readdirsize; 58917683Spst } 59017683Spst 59117683Spst if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) 59217683Spst nmp->nm_acregmin = argp->acregmin; 59317683Spst else 59417683Spst nmp->nm_acregmin = NFS_MINATTRTIMO; 59517683Spst if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) 59617683Spst nmp->nm_acregmax = argp->acregmax; 59717683Spst else 59817683Spst nmp->nm_acregmax = NFS_MAXATTRTIMO; 59917683Spst if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) 60017683Spst nmp->nm_acdirmin = argp->acdirmin; 60117683Spst else 60217683Spst nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; 60317683Spst if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) 60417683Spst nmp->nm_acdirmax = argp->acdirmax; 60517683Spst else 60617683Spst nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; 60717683Spst if (nmp->nm_acdirmin > nmp->nm_acdirmax) 60817683Spst nmp->nm_acdirmin = nmp->nm_acdirmax; 60917683Spst if (nmp->nm_acregmin > nmp->nm_acregmax) 61017683Spst nmp->nm_acregmin = nmp->nm_acregmax; 61117683Spst 61217683Spst if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { 61317683Spst if (argp->readahead <= NFS_MAXRAHEAD) 61417683Spst nmp->nm_readahead = argp->readahead; 61517683Spst else 61617683Spst nmp->nm_readahead = NFS_MAXRAHEAD; 61717683Spst } 61817683Spst if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) { 61917683Spst if (argp->wcommitsize < nmp->nm_wsize) 62017683Spst nmp->nm_wcommitsize = nmp->nm_wsize; 62117683Spst else 62217683Spst nmp->nm_wcommitsize = argp->wcommitsize; 62317683Spst } 62417683Spst 62517683Spst adjsock |= ((nmp->nm_sotype != argp->sotype) || 62617683Spst (nmp->nm_soproto != argp->proto)); 62717683Spst 62817683Spst if (nmp->nm_client != NULL && adjsock) { 62917683Spst int haslock = 0, error = 0; 63017683Spst 63117683Spst if (nmp->nm_sotype == SOCK_STREAM) { 63217683Spst error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock); 63317683Spst if (!error) 63417683Spst haslock = 1; 63517683Spst } 63617683Spst if (!error) { 63717683Spst newnfs_disconnect(&nmp->nm_sockreq); 63817683Spst if (haslock) 63917683Spst newnfs_sndunlock(&nmp->nm_sockreq.nr_lock); 64017683Spst nmp->nm_sotype = argp->sotype; 64117683Spst nmp->nm_soproto = argp->proto; 64217683Spst if (nmp->nm_sotype == SOCK_DGRAM) 64317683Spst while (newnfs_connect(nmp, &nmp->nm_sockreq, 64417683Spst cred, td, 0)) { 645172677Smlaier printf("newnfs_args: retrying connect\n"); 64617683Spst (void) nfs_catnap(PSOCK, "newnfscon"); 64717683Spst } 64817683Spst } 64917683Spst } else { 65017683Spst nmp->nm_sotype = argp->sotype; 65117683Spst nmp->nm_soproto = argp->proto; 65217683Spst } 65317683Spst} 65417683Spst 65517683Spststatic const char *nfs_opts[] = { "from", "nfs_args", 65617683Spst "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union", 65717683Spst "noclusterr", "noclusterw", "multilabel", "acls", "force", "update", 65817683Spst "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport", 65917683Spst "readdirsize", "soft", "hard", "mntudp", "tcp", "wsize", "rsize", 66017683Spst "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", 66117683Spst NULL }; 66217683Spst 66317683Spst/* 66417683Spst * VFS Operations. 66517683Spst * 66617683Spst * mount system call 66717683Spst * It seems a bit dumb to copyinstr() the host and path here and then 66817683Spst * bcopy() them in mountnfs(), but I wanted to detect errors before 66917683Spst * doing the sockargs() call because sockargs() allocates an mbuf and 67017683Spst * an error after that means that I have to release the mbuf. 67117683Spst */ 67217683Spst/* ARGSUSED */ 67317683Spststatic int 67417683Spstnfs_mount(struct mount *mp) 67517683Spst{ 67617683Spst struct nfs_args args = { 67717683Spst .version = NFS_ARGSVERSION, 67817683Spst .addr = NULL, 67917683Spst .addrlen = sizeof (struct sockaddr_in), 68017683Spst .sotype = SOCK_STREAM, 68117683Spst .proto = 0, 68217683Spst .fh = NULL, 68317683Spst .fhsize = 0, 68417683Spst .flags = 0, 68517683Spst .wsize = NFS_WSIZE, 68617683Spst .rsize = NFS_RSIZE, 68717683Spst .readdirsize = NFS_READDIRSIZE, 68817683Spst .timeo = 10, 68917683Spst .retrans = NFS_RETRANS, 69017683Spst .readahead = NFS_DEFRAHEAD, 69117683Spst .wcommitsize = 0, /* was: NQ_DEFLEASE */ 69217683Spst .hostname = NULL, 69317683Spst /* args version 4 */ 69417683Spst .acregmin = NFS_MINATTRTIMO, 69517683Spst .acregmax = NFS_MAXATTRTIMO, 69617683Spst .acdirmin = NFS_MINDIRATTRTIMO, 69717683Spst .acdirmax = NFS_MAXDIRATTRTIMO, 69817683Spst .dirlen = 0, 69917683Spst .krbnamelen = 0, 70017683Spst }; 70117683Spst int error; 70217683Spst struct sockaddr *nam; 70317683Spst struct vnode *vp; 70417683Spst struct thread *td; 70517683Spst char hst[MNAMELEN]; 70617683Spst size_t len; 70717683Spst u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100]; 70817683Spst 70917683Spst if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { 71017683Spst error = EINVAL; 71117683Spst goto out; 71217683Spst } 71317683Spst 71417683Spst td = curthread; 71517683Spst if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) { 71617683Spst error = ncl_mountroot(mp); 71717683Spst goto out; 71817683Spst } 71917683Spst 72017683Spst error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args); 72117683Spst if (error) 72217683Spst goto out; 72317683Spst 72417683Spst if (args.version != NFS_ARGSVERSION) { 72517683Spst error = EPROGMISMATCH; 72617683Spst goto out; 72717683Spst } 72817683Spst 729146768Ssam nfscl_init(); 730146768Ssam 731146768Ssam if (mp->mnt_flag & MNT_UPDATE) { 732146768Ssam struct nfsmount *nmp = VFSTONFS(mp); 73317683Spst 73417683Spst if (nmp == NULL) { 735146768Ssam error = EIO; 736146768Ssam goto out; 737146768Ssam } 738146768Ssam /* 739146768Ssam * When doing an update, we can't change version, 740146768Ssam * security, switch lockd strategies or change cookie 74117683Spst * translation 74217683Spst */ 743146768Ssam args.flags = (args.flags & 74417683Spst ~(NFSMNT_NFSV3 | 74517683Spst NFSMNT_NFSV4 | 74617683Spst NFSMNT_KERB | 74717683Spst NFSMNT_INTEGRITY | 74817683Spst NFSMNT_PRIVACY | 74917683Spst NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) | 75017683Spst (nmp->nm_flag & 75117683Spst (NFSMNT_NFSV3 | 75217683Spst NFSMNT_NFSV4 | 75317683Spst NFSMNT_KERB | 75417683Spst NFSMNT_INTEGRITY | 75517683Spst NFSMNT_PRIVACY | 75617683Spst NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)); 75717683Spst nfs_decode_args(mp, nmp, &args, td->td_ucred, td); 75817683Spst goto out; 75917683Spst } 76017683Spst 76117683Spst /* 76217683Spst * Make the nfs_ip_paranoia sysctl serve as the default connection 76317683Spst * or no-connection mode for those protocols that support 76417683Spst * no-connection mode (the flag will be cleared later for protocols 76517683Spst * that do not support no-connection mode). This will allow a client 76617683Spst * to receive replies from a different IP then the request was 76717683Spst * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), 76817683Spst * not 0. 76917683Spst */ 77017683Spst if (nfs_ip_paranoia == 0) 77117683Spst args.flags |= NFSMNT_NOCONN; 77217683Spst if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) { 77317683Spst error = EINVAL; 77417683Spst goto out; 77517683Spst } 77617683Spst if (args.fhsize > 0) { 77717683Spst error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); 77817683Spst if (error) 77917683Spst goto out; 780146768Ssam } 78117683Spst error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); 782146768Ssam if (error) 783146768Ssam goto out; 784146768Ssam bzero(&hst[len], MNAMELEN - len); 785146768Ssam if (args.krbnamelen > 0) { 786146768Ssam if (args.krbnamelen >= 100) { 787146768Ssam error = EINVAL; 78817683Spst goto out; 78917683Spst } 79017683Spst error = copyin(args.krbname, krbname, args.krbnamelen); 79117683Spst if (error) 79217683Spst goto out; 793146768Ssam krbname[args.krbnamelen] = '\0'; 79417683Spst } else { 795146768Ssam krbname[0] = '\0'; 796146768Ssam args.krbnamelen = 0; 797146768Ssam } 798146768Ssam if (args.dirlen > 0) { 79917683Spst if (args.dirlen >= 100) { 80017683Spst error = EINVAL; 801146768Ssam goto out; 80217683Spst } 803146768Ssam error = copyin(args.dirpath, dirpath, args.dirlen); 804146768Ssam if (error) 805146768Ssam goto out; 806146768Ssam dirpath[args.dirlen] = '\0'; 80717683Spst } else { 80817683Spst dirpath[0] = '\0'; 80917683Spst args.dirlen = 0; 810146768Ssam } 81117683Spst if (args.srvkrbnamelen > 0) { 81217683Spst if (args.srvkrbnamelen >= 100) { 81317683Spst error = EINVAL; 81417683Spst goto out; 81517683Spst } 81617683Spst error = copyin(args.srvkrbname, srvkrbname, args.srvkrbnamelen); 81717683Spst if (error) 81817683Spst goto out; 81917683Spst srvkrbname[args.srvkrbnamelen] = '\0'; 82017683Spst } else { 82117683Spst srvkrbname[0] = '\0'; 82217683Spst args.srvkrbnamelen = 0; 82317683Spst } 82417683Spst /* sockargs() call must be after above copyin() calls */ 82517683Spst error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); 82617683Spst if (error) 82717683Spst goto out; 828146768Ssam args.fh = nfh; 829146768Ssam error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname, 830146768Ssam &vp, td->td_ucred, td); 831146768Ssamout: 832146768Ssam if (!error) { 833146768Ssam MNT_ILOCK(mp); 834146768Ssam mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED); 835146768Ssam MNT_IUNLOCK(mp); 836146768Ssam } 837146768Ssam return (error); 83817683Spst} 83917683Spst 84017683Spst 84117683Spst/* 84217683Spst * VFS Operations. 84317683Spst * 84417683Spst * mount system call 84517683Spst * It seems a bit dumb to copyinstr() the host and path here and then 84617683Spst * bcopy() them in mountnfs(), but I wanted to detect errors before 847146768Ssam * doing the sockargs() call because sockargs() allocates an mbuf and 848146768Ssam * an error after that means that I have to release the mbuf. 849146768Ssam */ 850146768Ssam/* ARGSUSED */ 851146768Ssamstatic int 85217683Spstnfs_cmount(struct mntarg *ma, void *data, int flags) 853146768Ssam{ 854146768Ssam int error; 855146768Ssam struct nfs_args args; 856146768Ssam 857146768Ssam error = copyin(data, &args, sizeof (struct nfs_args)); 858146768Ssam if (error) 859146768Ssam return error; 860127664Sbms 861127664Sbms ma = mount_arg(ma, "nfs_args", &args, sizeof args); 862127664Sbms 863146768Ssam error = kernel_mount(ma, flags); 864146768Ssam return (error); 865146768Ssam} 866146768Ssam 867146768Ssam/* 868127664Sbms * Common code for mount and mountroot 869127664Sbms */ 870127664Sbmsstatic int 871127664Sbmsmountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, 872127664Sbms char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname, 873127664Sbms struct vnode **vpp, struct ucred *cred, struct thread *td) 874127664Sbms{ 875127664Sbms struct nfsmount *nmp; 876146768Ssam struct nfsnode *np; 877146768Ssam int error, trycnt, ret, clearintr; 878146768Ssam struct nfsvattr nfsva; 879146768Ssam static u_int64_t clval = 0; 880146768Ssam 881146768Ssam if (mp->mnt_flag & MNT_UPDATE) { 882146768Ssam nmp = VFSTONFS(mp); 883127664Sbms printf("%s: MNT_UPDATE is no longer handled here\n", __func__); 884127664Sbms FREE(nam, M_SONAME); 885146768Ssam return (0); 886127664Sbms } else { 887127664Sbms MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) + 888127664Sbms argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2, 889127664Sbms M_NEWNFSMNT, M_WAITOK); 890146768Ssam bzero((caddr_t)nmp, sizeof (struct nfsmount) + 891146768Ssam argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2); 892146768Ssam TAILQ_INIT(&nmp->nm_bufq); 893146768Ssam if (clval == 0) 894127664Sbms clval = (u_int64_t)nfsboottime.tv_sec; 895146768Ssam nmp->nm_clval = clval++; 89617683Spst nmp->nm_krbnamelen = argp->krbnamelen; 897127664Sbms nmp->nm_dirpathlen = argp->dirlen; 89817683Spst nmp->nm_srvkrbnamelen = argp->srvkrbnamelen; 89917683Spst if (nmp->nm_dirpathlen > 0) { 900146768Ssam /* 901146768Ssam * Since we will be doing dirpath as root, 902146768Ssam * set nm_uid to the real uid doing the mount, 903146768Ssam * since that is normally the user with a valid TGT. 904146768Ssam */ 905146768Ssam nmp->nm_uid = td->td_ucred->cr_ruid; 906146768Ssam } else { 907146768Ssam /* 908146768Ssam * Just set to -1, so the first Op 909146768Ssam * will set it later, to the uid of 910146768Ssam * the process doing that (usually 911146768Ssam * from a first open in the mount 912146768Ssam * point). 913146768Ssam */ 914146768Ssam nmp->nm_uid = (uid_t)-1; 91517683Spst } 91617683Spst 917127664Sbms /* Copy and null terminate all the names */ 918127664Sbms if (nmp->nm_krbnamelen > 0) { 919127664Sbms bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen); 920127664Sbms nmp->nm_name[nmp->nm_krbnamelen] = '\0'; 921127664Sbms } 922127664Sbms if (nmp->nm_dirpathlen > 0) { 923127664Sbms bcopy(dirpath, NFSMNT_DIRPATH(nmp), 924127664Sbms nmp->nm_dirpathlen); 925127664Sbms nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen 926127664Sbms + 1] = '\0'; 927190225Srpaulo } 928190225Srpaulo if (nmp->nm_srvkrbnamelen > 0) { 929190225Srpaulo bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp), 930190225Srpaulo nmp->nm_srvkrbnamelen); 931190225Srpaulo nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen 932190225Srpaulo + nmp->nm_srvkrbnamelen + 2] = '\0'; 933190225Srpaulo } 934190225Srpaulo nmp->nm_sockreq.nr_cred = crhold(cred); 935190225Srpaulo mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF); 936190225Srpaulo mp->mnt_data = nmp; 937190225Srpaulo } 93817683Spst vfs_getnewfsid(mp); 93917683Spst nmp->nm_mountp = mp; 94017683Spst mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK); 94117683Spst 94217683Spst /* 94317683Spst * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too 94417683Spst * high, depending on whether we end up with negative offsets in 94517683Spst * the client or server somewhere. 2GB-1 may be safer. 94617683Spst * 94717683Spst * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum 94817683Spst * that we can handle until we find out otherwise. 94917683Spst * XXX Our "safe" limit on the client is what we can store in our 95017683Spst * buffer cache using signed(!) block numbers. 95117683Spst */ 95217683Spst if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) 95317683Spst nmp->nm_maxfilesize = 0xffffffffLL; 95417683Spst else 95517683Spst nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1; 95617683Spst 95717683Spst nmp->nm_timeo = NFS_TIMEO; 95817683Spst nmp->nm_retry = NFS_RETRANS; 95917683Spst if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) { 96017683Spst nmp->nm_wsize = NFS_WSIZE; 96117683Spst nmp->nm_rsize = NFS_RSIZE; 96217683Spst nmp->nm_readdirsize = NFS_READDIRSIZE; 96317683Spst } 96417683Spst nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000); 96517683Spst nmp->nm_numgrps = NFS_MAXGRPS; 96617683Spst nmp->nm_readahead = NFS_DEFRAHEAD; 96717683Spst nmp->nm_tprintf_delay = nfs_tprintf_delay; 96817683Spst if (nmp->nm_tprintf_delay < 0) 96917683Spst nmp->nm_tprintf_delay = 0; 97017683Spst nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay; 97117683Spst if (nmp->nm_tprintf_initial_delay < 0) 97217683Spst nmp->nm_tprintf_initial_delay = 0; 97317683Spst nmp->nm_fhsize = argp->fhsize; 97417683Spst if (nmp->nm_fhsize > 0) 97517683Spst bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); 97617683Spst bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); 97717683Spst nmp->nm_nam = nam; 97817683Spst /* Set up the sockets and per-host congestion */ 97917683Spst nmp->nm_sotype = argp->sotype; 98017683Spst nmp->nm_soproto = argp->proto; 98117683Spst nmp->nm_sockreq.nr_prog = NFS_PROG; 98217683Spst if ((argp->flags & NFSMNT_NFSV4)) 98317683Spst nmp->nm_sockreq.nr_vers = NFS_VER4; 98417683Spst else if ((argp->flags & NFSMNT_NFSV3)) 98517683Spst nmp->nm_sockreq.nr_vers = NFS_VER3; 98617683Spst else 98717683Spst nmp->nm_sockreq.nr_vers = NFS_VER2; 98817683Spst 98917683Spst nfs_decode_args(mp, nmp, argp, cred, td); 99017683Spst 99117683Spst /* 99217683Spst * For Connection based sockets (TCP,...) do the connect here, 99317683Spst * but make it interruptible, even for non-interuptible mounts. 99417683Spst */ 99517683Spst if ((nmp->nm_flag & NFSMNT_INT) == 0) { 99617683Spst nmp->nm_flag |= NFSMNT_INT; 99717683Spst clearintr = 1; 99817683Spst } else { 99917683Spst clearintr = 0; 100017683Spst } 100117683Spst if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0))) 100217683Spst goto bad; 100317683Spst if (clearintr) 100417683Spst nmp->nm_flag &= ~NFSMNT_INT; 100517683Spst 100617683Spst /* 100717683Spst * A reference count is needed on the nfsnode representing the 100817683Spst * remote root. If this object is not persistent, then backward 100917683Spst * traversals of the mount point (i.e. "..") will not work if 101017683Spst * the nfsnode gets flushed out of the cache. Ufs does not have 101117683Spst * this problem, because one can identify root inodes by their 101217683Spst * number == ROOTINO (2). 101317683Spst */ 101417683Spst if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) && 101517683Spst nmp->nm_dirpathlen > 0) { 101617683Spst /* 101717683Spst * If the fhsize on the mount point == 0 for V4, the mount 101817683Spst * path needs to be looked up. 101917683Spst */ 102017683Spst trycnt = 3; 102117683Spst do { 102217683Spst error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), 102317683Spst cred, td); 102417683Spst if (error) 102517683Spst (void) nfs_catnap(PZERO, "nfsgetdirp"); 102617683Spst } while (error && --trycnt > 0); 102717683Spst if (error) { 102817683Spst error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 102917683Spst goto bad; 103017683Spst } 103117683Spst } 103217683Spst if (nmp->nm_fhsize > 0) { 103317683Spst error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np); 103417683Spst if (error) 103517683Spst goto bad; 103617683Spst *vpp = NFSTOV(np); 103717683Spst 103817683Spst /* 103917683Spst * Get file attributes and transfer parameters for the 104017683Spst * mountpoint. This has the side effect of filling in 104117683Spst * (*vpp)->v_type with the correct value. 104217683Spst */ 104317683Spst ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, 104417683Spst cred, td, &nfsva, NULL); 104517683Spst if (ret) { 104617683Spst /* 104717683Spst * Just set default values to get things going. 104817683Spst */ 104917683Spst NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); 105017683Spst nfsva.na_vattr.va_type = VDIR; 105117683Spst nfsva.na_vattr.va_mode = 0777; 105217683Spst nfsva.na_vattr.va_nlink = 100; 105317683Spst nfsva.na_vattr.va_uid = (uid_t)0; 105498530Sfenner nfsva.na_vattr.va_gid = (gid_t)0; 105598530Sfenner nfsva.na_vattr.va_fileid = 2; 105698530Sfenner nfsva.na_vattr.va_gen = 1; 105798530Sfenner nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; 105817683Spst nfsva.na_vattr.va_size = 512 * 1024; 105917683Spst } 106017683Spst (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1); 106117683Spst if (argp->flags & NFSMNT_NFSV3) 106217683Spst ncl_fsinfo(nmp, *vpp, cred, td); 106317683Spst 106417683Spst /* 106517683Spst * Lose the lock but keep the ref. 106617683Spst */ 106717683Spst VOP_UNLOCK(*vpp, 0); 106817683Spst return (0); 106917683Spst } 107017683Spst error = EIO; 107117683Spst 107217683Spstbad: 107317683Spst newnfs_disconnect(&nmp->nm_sockreq); 107417683Spst crfree(nmp->nm_sockreq.nr_cred); 107517683Spst mtx_destroy(&nmp->nm_sockreq.nr_mtx); 107617683Spst mtx_destroy(&nmp->nm_mtx); 107717683Spst FREE(nmp, M_NEWNFSMNT); 107817683Spst FREE(nam, M_SONAME); 107917683Spst return (error); 108017683Spst} 108117683Spst 108217683Spst/* 108317683Spst * unmount system call 108417683Spst */ 108517683Spststatic int 108617683Spstnfs_unmount(struct mount *mp, int mntflags) 108717683Spst{ 108817683Spst struct thread *td; 108917683Spst struct nfsmount *nmp; 109017683Spst int error, flags = 0, trycnt = 0; 109117683Spst 109217683Spst td = curthread; 109317683Spst 109417683Spst if (mntflags & MNT_FORCE) 109517683Spst flags |= FORCECLOSE; 109617683Spst nmp = VFSTONFS(mp); 109717683Spst /* 109817683Spst * Goes something like this.. 109917683Spst * - Call vflush() to clear out vnodes for this filesystem 110017683Spst * - Close the socket 110117683Spst * - Free up the data structures 110217683Spst */ 110317683Spst /* In the forced case, cancel any outstanding requests. */ 110417683Spst if (mntflags & MNT_FORCE) { 110517683Spst error = newnfs_nmcancelreqs(nmp); 1106127664Sbms if (error) 110717683Spst goto out; 110817683Spst /* For a forced close, get rid of the renew thread now */ 110917683Spst nfscl_umount(nmp, td); 1110127664Sbms } 111117683Spst /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */ 111217683Spst do { 111317683Spst error = vflush(mp, 1, flags, td); 111417683Spst if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30) 111517683Spst (void) nfs_catnap(PSOCK, "newndm"); 1116127664Sbms } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30); 111717683Spst if (error) 111817683Spst goto out; 111917683Spst 112017683Spst /* 112117683Spst * We are now committed to the unmount. 112217683Spst */ 112317683Spst if ((mntflags & MNT_FORCE) == 0) 112417683Spst nfscl_umount(nmp, td); 112517683Spst newnfs_disconnect(&nmp->nm_sockreq); 112617683Spst crfree(nmp->nm_sockreq.nr_cred); 112717683Spst FREE(nmp->nm_nam, M_SONAME); 112817683Spst 112917683Spst mtx_destroy(&nmp->nm_sockreq.nr_mtx); 113017683Spst mtx_destroy(&nmp->nm_mtx); 113117683Spst FREE(nmp, M_NEWNFSMNT); 113217683Spstout: 113317683Spst return (error); 113417683Spst} 113517683Spst 113617683Spst/* 113717683Spst * Return root of a filesystem 113817683Spst */ 113917683Spststatic int 114017683Spstnfs_root(struct mount *mp, int flags, struct vnode **vpp) 114117683Spst{ 114217683Spst struct vnode *vp; 114317683Spst struct nfsmount *nmp; 114417683Spst struct nfsnode *np; 114517683Spst int error; 114617683Spst 114717683Spst nmp = VFSTONFS(mp); 114817683Spst error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np); 114917683Spst if (error) 115017683Spst return error; 115117683Spst vp = NFSTOV(np); 115217683Spst /* 115317683Spst * Get transfer parameters and attributes for root vnode once. 115417683Spst */ 115517683Spst mtx_lock(&nmp->nm_mtx); 115617683Spst if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { 115717683Spst mtx_unlock(&nmp->nm_mtx); 115817683Spst ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread); 115917683Spst } else 116017683Spst mtx_unlock(&nmp->nm_mtx); 116117683Spst if (vp->v_type == VNON) 116217683Spst vp->v_type = VDIR; 116317683Spst vp->v_vflag |= VV_ROOT; 116417683Spst *vpp = vp; 116517683Spst return (0); 116617683Spst} 116717683Spst 116817683Spst/* 116917683Spst * Flush out the buffer cache 117017683Spst */ 117117683Spst/* ARGSUSED */ 117217683Spststatic int 117317683Spstnfs_sync(struct mount *mp, int waitfor) 117417683Spst{ 117517683Spst struct vnode *vp, *mvp; 117617683Spst struct thread *td; 117717683Spst int error, allerror = 0; 117817683Spst 117917683Spst td = curthread; 118017683Spst 118117683Spst /* 118217683Spst * Force stale buffer cache information to be flushed. 118317683Spst */ 118417683Spst MNT_ILOCK(mp); 118517683Spstloop: 118617683Spst MNT_VNODE_FOREACH(vp, mp, mvp) { 118717683Spst VI_LOCK(vp); 118817683Spst MNT_IUNLOCK(mp); 118917683Spst /* XXX Racy bv_cnt check. */ 119017683Spst if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 || 119117683Spst waitfor == MNT_LAZY) { 119217683Spst VI_UNLOCK(vp); 119317683Spst MNT_ILOCK(mp); 119417683Spst continue; 119517683Spst } 119617683Spst if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 119717683Spst MNT_ILOCK(mp); 119817683Spst MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 119917683Spst goto loop; 120017683Spst } 120117683Spst error = VOP_FSYNC(vp, waitfor, td); 120217683Spst if (error) 120317683Spst allerror = error; 120417683Spst VOP_UNLOCK(vp, 0); 120517683Spst vrele(vp); 120617683Spst 120717683Spst MNT_ILOCK(mp); 120817683Spst } 120917683Spst MNT_IUNLOCK(mp); 121017683Spst return (allerror); 121117683Spst} 121217683Spst 121317683Spststatic int 121417683Spstnfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req) 121517683Spst{ 121617683Spst struct nfsmount *nmp = VFSTONFS(mp); 121717683Spst struct vfsquery vq; 121817683Spst int error; 121917683Spst 122017683Spst bzero(&vq, sizeof(vq)); 122117683Spst switch (op) { 122217683Spst#if 0 1223146768Ssam case VFS_CTL_NOLOCKS: 122417683Spst val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0; 122556889Sfenner if (req->oldptr != NULL) { 122656889Sfenner error = SYSCTL_OUT(req, &val, sizeof(val)); 122756889Sfenner if (error) 122856889Sfenner return (error); 122956889Sfenner } 123056889Sfenner if (req->newptr != NULL) { 123156889Sfenner error = SYSCTL_IN(req, &val, sizeof(val)); 123256889Sfenner if (error) 123317683Spst return (error); 123417683Spst if (val) 123517683Spst nmp->nm_flag |= NFSMNT_NOLOCKS; 123617683Spst else 1237146768Ssam nmp->nm_flag &= ~NFSMNT_NOLOCKS; 1238146768Ssam } 1239146768Ssam break; 1240146768Ssam#endif 1241146768Ssam case VFS_CTL_QUERY: 124217683Spst mtx_lock(&nmp->nm_mtx); 1243146768Ssam if (nmp->nm_state & NFSSTA_TIMEO) 1244146768Ssam vq.vq_flags |= VQ_NOTRESP; 1245146768Ssam mtx_unlock(&nmp->nm_mtx); 1246146768Ssam#if 0 1247146768Ssam if (!(nmp->nm_flag & NFSMNT_NOLOCKS) && 1248146768Ssam (nmp->nm_state & NFSSTA_LOCKTIMEO)) 1249146768Ssam vq.vq_flags |= VQ_NOTRESPLOCK; 125017683Spst#endif 1251146768Ssam error = SYSCTL_OUT(req, &vq, sizeof(vq)); 1252146768Ssam break; 1253146768Ssam case VFS_CTL_TIMEO: 1254146768Ssam if (req->oldptr != NULL) { 1255146768Ssam error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay, 1256146768Ssam sizeof(nmp->nm_tprintf_initial_delay)); 1257146768Ssam if (error) 1258146768Ssam return (error); 125917683Spst } 126017683Spst if (req->newptr != NULL) { 126117683Spst error = vfs_suser(mp, req->td); 126217683Spst if (error) 126317683Spst return (error); 126417683Spst error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay, 126517683Spst sizeof(nmp->nm_tprintf_initial_delay)); 1266146768Ssam if (error) 126717683Spst return (error); 126817683Spst if (nmp->nm_tprintf_initial_delay < 0) 126917683Spst nmp->nm_tprintf_initial_delay = 0; 127017683Spst } 127117683Spst break; 1272146768Ssam default: 1273146768Ssam return (ENOTSUP); 127417683Spst } 1275146768Ssam return (0); 1276146768Ssam} 1277146768Ssam 1278146768Ssam