nfs_clvfsops.c revision 231636
1214455Srpaulo/*-
2214455Srpaulo * Copyright (c) 1989, 1993, 1995
3214455Srpaulo *	The Regents of the University of California.  All rights reserved.
4214455Srpaulo *
5214455Srpaulo * This code is derived from software contributed to Berkeley by
6214455Srpaulo * Rick Macklem at The University of Guelph.
7214455Srpaulo *
8214455Srpaulo * Redistribution and use in source and binary forms, with or without
9214455Srpaulo * modification, are permitted provided that the following conditions
10214455Srpaulo * are met:
11214455Srpaulo * 1. Redistributions of source code must retain the above copyright
12214455Srpaulo *    notice, this list of conditions and the following disclaimer.
13214455Srpaulo * 2. Redistributions in binary form must reproduce the above copyright
14214455Srpaulo *    notice, this list of conditions and the following disclaimer in the
15214455Srpaulo *    documentation and/or other materials provided with the distribution.
16214455Srpaulo * 4. Neither the name of the University nor the names of its contributors
17214455Srpaulo *    may be used to endorse or promote products derived from this software
18214455Srpaulo *    without specific prior written permission.
19214455Srpaulo *
20214455Srpaulo * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21235426Sdelphij * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22214455Srpaulo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23214455Srpaulo * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24214455Srpaulo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25214455Srpaulo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26214455Srpaulo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27214455Srpaulo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28214455Srpaulo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29214455Srpaulo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30214455Srpaulo * SUCH DAMAGE.
31214455Srpaulo *
32214455Srpaulo *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33214455Srpaulo */
34214455Srpaulo
35214455Srpaulo#include <sys/cdefs.h>
36214455Srpaulo__FBSDID("$FreeBSD: stable/9/sys/fs/nfsclient/nfs_clvfsops.c 231636 2012-02-14 04:48:36Z rmacklem $");
37214455Srpaulo
38214455Srpaulo
39214455Srpaulo#include "opt_bootp.h"
40214455Srpaulo#include "opt_nfsroot.h"
41214455Srpaulo
42214455Srpaulo#include <sys/param.h>
43214455Srpaulo#include <sys/systm.h>
44214455Srpaulo#include <sys/kernel.h>
45214455Srpaulo#include <sys/bio.h>
46214455Srpaulo#include <sys/buf.h>
47214455Srpaulo#include <sys/clock.h>
48214455Srpaulo#include <sys/jail.h>
49214455Srpaulo#include <sys/limits.h>
50214455Srpaulo#include <sys/lock.h>
51214455Srpaulo#include <sys/malloc.h>
52214455Srpaulo#include <sys/mbuf.h>
53214455Srpaulo#include <sys/module.h>
54214455Srpaulo#include <sys/mount.h>
55214455Srpaulo#include <sys/proc.h>
56214455Srpaulo#include <sys/socket.h>
57214455Srpaulo#include <sys/socketvar.h>
58214455Srpaulo#include <sys/sockio.h>
59214455Srpaulo#include <sys/sysctl.h>
60214455Srpaulo#include <sys/vnode.h>
61214455Srpaulo#include <sys/signalvar.h>
62214455Srpaulo
63214455Srpaulo#include <vm/vm.h>
64214455Srpaulo#include <vm/vm_extern.h>
65214455Srpaulo#include <vm/uma.h>
66214455Srpaulo
67214455Srpaulo#include <net/if.h>
68214455Srpaulo#include <net/route.h>
69214455Srpaulo#include <netinet/in.h>
70214455Srpaulo
71214455Srpaulo#include <fs/nfs/nfsport.h>
72214455Srpaulo#include <fs/nfsclient/nfsnode.h>
73214455Srpaulo#include <fs/nfsclient/nfsmount.h>
74214455Srpaulo#include <fs/nfsclient/nfs.h>
75214455Srpaulo#include <nfs/nfsdiskless.h>
76214455Srpaulo
77214455SrpauloFEATURE(nfscl, "NFSv4 client");
78214455Srpaulo
79214455Srpauloextern int nfscl_ticks;
80214455Srpauloextern struct timeval nfsboottime;
81214455Srpauloextern struct nfsstats	newnfsstats;
82214455Srpauloextern int nfsrv_useacl;
83214455Srpaulo
84214455SrpauloMALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
85214455SrpauloMALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
86214455Srpaulo
87214455SrpauloSYSCTL_DECL(_vfs_nfs);
88214455Srpaulostatic int nfs_ip_paranoia = 1;
89214455SrpauloSYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
90214455Srpaulo    &nfs_ip_paranoia, 0, "");
91214455Srpaulostatic int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
92214455SrpauloSYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
93214455Srpaulo        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
94214455Srpaulo/* how long between console messages "nfs server foo not responding" */
95214455Srpaulostatic int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
96214455SrpauloSYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
97235426Sdelphij        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
98235426Sdelphij
99235426Sdelphijstatic int	nfs_mountroot(struct mount *);
100235426Sdelphijstatic void	nfs_sec_name(char *, int *);
101235426Sdelphijstatic void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
102235426Sdelphij		    struct nfs_args *argp, const char *, struct ucred *,
103235426Sdelphij		    struct thread *);
104235426Sdelphijstatic int	mountnfs(struct nfs_args *, struct mount *,
105235426Sdelphij		    struct sockaddr *, char *, u_char *, int, u_char *, int,
106235426Sdelphij		    u_char *, int, struct vnode **, struct ucred *,
107235426Sdelphij		    struct thread *, int);
108235426Sdelphijstatic void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
109235426Sdelphij		    struct sockaddr_storage *, int *, off_t *,
110235426Sdelphij		    struct timeval *);
111235426Sdelphijstatic vfs_mount_t nfs_mount;
112235426Sdelphijstatic vfs_cmount_t nfs_cmount;
113235426Sdelphijstatic vfs_unmount_t nfs_unmount;
114214455Srpaulostatic vfs_root_t nfs_root;
115214455Srpaulostatic vfs_statfs_t nfs_statfs;
116214455Srpaulostatic vfs_sync_t nfs_sync;
117214455Srpaulostatic vfs_sysctl_t nfs_sysctl;
118214455Srpaulo
119214455Srpaulo/*
120214455Srpaulo * nfs vfs operations.
121241231Sdelphij */
122235426Sdelphijstatic struct vfsops nfs_vfsops = {
123214455Srpaulo	.vfs_init =		ncl_init,
124214455Srpaulo	.vfs_mount =		nfs_mount,
125214455Srpaulo	.vfs_cmount =		nfs_cmount,
126214455Srpaulo	.vfs_root =		nfs_root,
127214455Srpaulo	.vfs_statfs =		nfs_statfs,
128214455Srpaulo	.vfs_sync =		nfs_sync,
129214455Srpaulo	.vfs_uninit =		ncl_uninit,
130214455Srpaulo	.vfs_unmount =		nfs_unmount,
131214455Srpaulo	.vfs_sysctl =		nfs_sysctl,
132214455Srpaulo};
133214455SrpauloVFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
134214455Srpaulo
135214455Srpaulo/* So that loader and kldload(2) can find us, wherever we are.. */
136214455SrpauloMODULE_VERSION(nfs, 1);
137214455SrpauloMODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
138214455SrpauloMODULE_DEPEND(nfs, krpc, 1, 1, 1);
139214455SrpauloMODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
140214455SrpauloMODULE_DEPEND(nfs, nfslock, 1, 1, 1);
141214455Srpaulo
142214455Srpaulo/*
143214455Srpaulo * This structure is now defined in sys/nfs/nfs_diskless.c so that it
144214455Srpaulo * can be shared by both NFS clients. It is declared here so that it
145214455Srpaulo * will be defined for kernels built without NFS_ROOT, although it
146214455Srpaulo * isn't used in that case.
147214455Srpaulo */
148235426Sdelphij#if !defined(NFS_ROOT) && !defined(NFSCLIENT)
149235426Sdelphijstruct nfs_diskless	nfs_diskless = { { { 0 } } };
150235426Sdelphijstruct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
151235426Sdelphijint			nfs_diskless_valid = 0;
152235426Sdelphij#endif
153235426Sdelphij
154214455SrpauloSYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
155214455Srpaulo    &nfs_diskless_valid, 0,
156214455Srpaulo    "Has the diskless struct been filled correctly");
157214455Srpaulo
158235426SdelphijSYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
159235426Sdelphij    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
160235426Sdelphij
161235426SdelphijSYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
162235426Sdelphij    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
163235426Sdelphij    "%Ssockaddr_in", "Diskless root nfs address");
164235426Sdelphij
165235426Sdelphij
166235426Sdelphijvoid		newnfsargs_ntoh(struct nfs_args *);
167235426Sdelphijstatic int	nfs_mountdiskless(char *,
168235426Sdelphij		    struct sockaddr_in *, struct nfs_args *,
169235426Sdelphij		    struct thread *, struct vnode **, struct mount *);
170235426Sdelphijstatic void	nfs_convert_diskless(void);
171214455Srpaulostatic void	nfs_convert_oargs(struct nfs_args *args,
172214455Srpaulo		    struct onfs_args *oargs);
173214455Srpaulo
174214455Srpauloint
175214455Srpaulonewnfs_iosize(struct nfsmount *nmp)
176214455Srpaulo{
177214455Srpaulo	int iosize, maxio;
178214455Srpaulo
179214455Srpaulo	/* First, set the upper limit for iosize */
180214455Srpaulo	if (nmp->nm_flag & NFSMNT_NFSV4) {
181214455Srpaulo		maxio = NFS_MAXBSIZE;
182214455Srpaulo	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
183214455Srpaulo		if (nmp->nm_sotype == SOCK_DGRAM)
184214455Srpaulo			maxio = NFS_MAXDGRAMDATA;
185214455Srpaulo		else
186214455Srpaulo			maxio = NFS_MAXBSIZE;
187214455Srpaulo	} else {
188214455Srpaulo		maxio = NFS_V2MAXDATA;
189214455Srpaulo	}
190214455Srpaulo	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
191214455Srpaulo		nmp->nm_rsize = maxio;
192214455Srpaulo	if (nmp->nm_rsize > MAXBSIZE)
193214455Srpaulo		nmp->nm_rsize = MAXBSIZE;
194214455Srpaulo	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
195214455Srpaulo		nmp->nm_readdirsize = maxio;
196241231Sdelphij	if (nmp->nm_readdirsize > nmp->nm_rsize)
197241231Sdelphij		nmp->nm_readdirsize = nmp->nm_rsize;
198214455Srpaulo	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
199214455Srpaulo		nmp->nm_wsize = maxio;
200214455Srpaulo	if (nmp->nm_wsize > MAXBSIZE)
201214455Srpaulo		nmp->nm_wsize = MAXBSIZE;
202214455Srpaulo
203214455Srpaulo	/*
204214455Srpaulo	 * Calculate the size used for io buffers.  Use the larger
205214455Srpaulo	 * of the two sizes to minimise nfs requests but make sure
206214455Srpaulo	 * that it is at least one VM page to avoid wasting buffer
207214455Srpaulo	 * space.
208214455Srpaulo	 */
209214455Srpaulo	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
210214455Srpaulo	iosize = imax(iosize, PAGE_SIZE);
211214455Srpaulo	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
212214455Srpaulo	return (iosize);
213214455Srpaulo}
214214455Srpaulo
215241231Sdelphijstatic void
216214455Srpaulonfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
217214455Srpaulo{
218214455Srpaulo
219214455Srpaulo	args->version = NFS_ARGSVERSION;
220214455Srpaulo	args->addr = oargs->addr;
221214455Srpaulo	args->addrlen = oargs->addrlen;
222214455Srpaulo	args->sotype = oargs->sotype;
223214455Srpaulo	args->proto = oargs->proto;
224214455Srpaulo	args->fh = oargs->fh;
225214455Srpaulo	args->fhsize = oargs->fhsize;
226214455Srpaulo	args->flags = oargs->flags;
227214455Srpaulo	args->wsize = oargs->wsize;
228214455Srpaulo	args->rsize = oargs->rsize;
229214455Srpaulo	args->readdirsize = oargs->readdirsize;
230214455Srpaulo	args->timeo = oargs->timeo;
231214455Srpaulo	args->retrans = oargs->retrans;
232214455Srpaulo	args->readahead = oargs->readahead;
233214455Srpaulo	args->hostname = oargs->hostname;
234214455Srpaulo}
235214455Srpaulo
236214455Srpaulostatic void
237214455Srpaulonfs_convert_diskless(void)
238214455Srpaulo{
239214455Srpaulo
240214455Srpaulo	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
241214455Srpaulo		sizeof(struct ifaliasreq));
242214455Srpaulo	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
243214455Srpaulo		sizeof(struct sockaddr_in));
244214455Srpaulo	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
245214455Srpaulo	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
246214455Srpaulo		nfsv3_diskless.root_fhsize = NFSX_MYFH;
247214455Srpaulo		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
248214455Srpaulo	} else {
249214455Srpaulo		nfsv3_diskless.root_fhsize = NFSX_V2FH;
250214455Srpaulo		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
251214455Srpaulo	}
252214455Srpaulo	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
253214455Srpaulo		sizeof(struct sockaddr_in));
254214455Srpaulo	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
255214455Srpaulo	nfsv3_diskless.root_time = nfs_diskless.root_time;
256214455Srpaulo	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
257214455Srpaulo		MAXHOSTNAMELEN);
258214455Srpaulo	nfs_diskless_valid = 3;
259214455Srpaulo}
260214455Srpaulo
261214455Srpaulo/*
262214455Srpaulo * nfs statfs call
263214455Srpaulo */
264214455Srpaulostatic int
265214455Srpaulonfs_statfs(struct mount *mp, struct statfs *sbp)
266214455Srpaulo{
267214455Srpaulo	struct vnode *vp;
268214455Srpaulo	struct thread *td;
269214455Srpaulo	struct nfsmount *nmp = VFSTONFS(mp);
270214455Srpaulo	struct nfsvattr nfsva;
271214455Srpaulo	struct nfsfsinfo fs;
272214455Srpaulo	struct nfsstatfs sb;
273214455Srpaulo	int error = 0, attrflag, gotfsinfo = 0, ret;
274214455Srpaulo	struct nfsnode *np;
275214455Srpaulo
276214455Srpaulo	td = curthread;
277214455Srpaulo
278214455Srpaulo	error = vfs_busy(mp, MBF_NOWAIT);
279214455Srpaulo	if (error)
280214455Srpaulo		return (error);
281214455Srpaulo	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
282214455Srpaulo	if (error) {
283214455Srpaulo		vfs_unbusy(mp);
284214455Srpaulo		return (error);
285214455Srpaulo	}
286214455Srpaulo	vp = NFSTOV(np);
287214455Srpaulo	mtx_lock(&nmp->nm_mtx);
288214455Srpaulo	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
289214455Srpaulo		mtx_unlock(&nmp->nm_mtx);
290214455Srpaulo		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
291214455Srpaulo		    &attrflag, NULL);
292214455Srpaulo		if (!error)
293214455Srpaulo			gotfsinfo = 1;
294214455Srpaulo	} else
295214455Srpaulo		mtx_unlock(&nmp->nm_mtx);
296214455Srpaulo	if (!error)
297214455Srpaulo		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
298214455Srpaulo		    &attrflag, NULL);
299214455Srpaulo	if (attrflag == 0) {
300214455Srpaulo		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
301214455Srpaulo		    td->td_ucred, td, &nfsva, NULL);
302214455Srpaulo		if (ret) {
303214455Srpaulo			/*
304214455Srpaulo			 * Just set default values to get things going.
305214455Srpaulo			 */
306214455Srpaulo			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
307214455Srpaulo			nfsva.na_vattr.va_type = VDIR;
308214455Srpaulo			nfsva.na_vattr.va_mode = 0777;
309214455Srpaulo			nfsva.na_vattr.va_nlink = 100;
310214455Srpaulo			nfsva.na_vattr.va_uid = (uid_t)0;
311241231Sdelphij			nfsva.na_vattr.va_gid = (gid_t)0;
312214455Srpaulo			nfsva.na_vattr.va_fileid = 2;
313214455Srpaulo			nfsva.na_vattr.va_gen = 1;
314214455Srpaulo			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
315214455Srpaulo			nfsva.na_vattr.va_size = 512 * 1024;
316214455Srpaulo		}
317214455Srpaulo	}
318214455Srpaulo	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
319214455Srpaulo	if (!error) {
320214455Srpaulo	    mtx_lock(&nmp->nm_mtx);
321214455Srpaulo	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
322241231Sdelphij		nfscl_loadfsinfo(nmp, &fs);
323214455Srpaulo	    nfscl_loadsbinfo(nmp, &sb, sbp);
324214455Srpaulo	    sbp->f_iosize = newnfs_iosize(nmp);
325214455Srpaulo	    mtx_unlock(&nmp->nm_mtx);
326214455Srpaulo	    if (sbp != &mp->mnt_stat) {
327214455Srpaulo		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
328214455Srpaulo		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
329214455Srpaulo	    }
330214455Srpaulo	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
331214455Srpaulo	} else if (NFS_ISV4(vp)) {
332214455Srpaulo		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
333214455Srpaulo	}
334214455Srpaulo	vput(vp);
335214455Srpaulo	vfs_unbusy(mp);
336214455Srpaulo	return (error);
337214455Srpaulo}
338214455Srpaulo
339214455Srpaulo/*
340214455Srpaulo * nfs version 3 fsinfo rpc call
341214455Srpaulo */
342214455Srpauloint
343214455Srpauloncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
344214455Srpaulo    struct thread *td)
345214455Srpaulo{
346214455Srpaulo	struct nfsfsinfo fs;
347214455Srpaulo	struct nfsvattr nfsva;
348214455Srpaulo	int error, attrflag;
349214455Srpaulo
350214455Srpaulo	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
351214455Srpaulo	if (!error) {
352214455Srpaulo		if (attrflag)
353214455Srpaulo			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
354214455Srpaulo			    1);
355214455Srpaulo		mtx_lock(&nmp->nm_mtx);
356214455Srpaulo		nfscl_loadfsinfo(nmp, &fs);
357214455Srpaulo		mtx_unlock(&nmp->nm_mtx);
358214455Srpaulo	}
359214455Srpaulo	return (error);
360214455Srpaulo}
361214455Srpaulo
362214455Srpaulo/*
363214455Srpaulo * Mount a remote root fs via. nfs. This depends on the info in the
364214455Srpaulo * nfs_diskless structure that has been filled in properly by some primary
365214455Srpaulo * bootstrap.
366214455Srpaulo * It goes something like this:
367214455Srpaulo * - do enough of "ifconfig" by calling ifioctl() so that the system
368214455Srpaulo *   can talk to the server
369214455Srpaulo * - If nfs_diskless.mygateway is filled in, use that address as
370214455Srpaulo *   a default gateway.
371214455Srpaulo * - build the rootfs mount point and call mountnfs() to do the rest.
372214455Srpaulo *
373214455Srpaulo * It is assumed to be safe to read, modify, and write the nfsv3_diskless
374214455Srpaulo * structure, as well as other global NFS client variables here, as
375214455Srpaulo * nfs_mountroot() will be called once in the boot before any other NFS
376214455Srpaulo * client activity occurs.
377214455Srpaulo */
378214455Srpaulostatic int
379214455Srpaulonfs_mountroot(struct mount *mp)
380214455Srpaulo{
381214455Srpaulo	struct thread *td = curthread;
382214455Srpaulo	struct nfsv3_diskless *nd = &nfsv3_diskless;
383214455Srpaulo	struct socket *so;
384214455Srpaulo	struct vnode *vp;
385214455Srpaulo	struct ifreq ir;
386214455Srpaulo	int error;
387214455Srpaulo	u_long l;
388214455Srpaulo	char buf[128];
389214455Srpaulo	char *cp;
390214455Srpaulo
391214455Srpaulo#if defined(BOOTP_NFSROOT) && defined(BOOTP)
392214455Srpaulo	bootpc_init();		/* use bootp to get nfs_diskless filled in */
393214455Srpaulo#elif defined(NFS_ROOT)
394214455Srpaulo	nfs_setup_diskless();
395214455Srpaulo#endif
396214455Srpaulo
397214455Srpaulo	if (nfs_diskless_valid == 0)
398214455Srpaulo		return (-1);
399214455Srpaulo	if (nfs_diskless_valid == 1)
400214455Srpaulo		nfs_convert_diskless();
401214455Srpaulo
402214455Srpaulo	/*
403214455Srpaulo	 * XXX splnet, so networks will receive...
404214455Srpaulo	 */
405214455Srpaulo	splnet();
406214455Srpaulo
407214455Srpaulo	/*
408214455Srpaulo	 * Do enough of ifconfig(8) so that the critical net interface can
409214455Srpaulo	 * talk to the server.
410214455Srpaulo	 */
411214455Srpaulo	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
412214455Srpaulo	    td->td_ucred, td);
413214455Srpaulo	if (error)
414214455Srpaulo		panic("nfs_mountroot: socreate(%04x): %d",
415214455Srpaulo			nd->myif.ifra_addr.sa_family, error);
416214455Srpaulo
417214455Srpaulo#if 0 /* XXX Bad idea */
418214455Srpaulo	/*
419214455Srpaulo	 * We might not have been told the right interface, so we pass
420214455Srpaulo	 * over the first ten interfaces of the same kind, until we get
421214455Srpaulo	 * one of them configured.
422214455Srpaulo	 */
423214455Srpaulo
424214455Srpaulo	for (i = strlen(nd->myif.ifra_name) - 1;
425214455Srpaulo		nd->myif.ifra_name[i] >= '0' &&
426214455Srpaulo		nd->myif.ifra_name[i] <= '9';
427214455Srpaulo		nd->myif.ifra_name[i] ++) {
428214455Srpaulo		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
429214455Srpaulo		if(!error)
430214455Srpaulo			break;
431214455Srpaulo	}
432214455Srpaulo#endif
433214455Srpaulo	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
434214455Srpaulo	if (error)
435214455Srpaulo		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
436214455Srpaulo	if ((cp = getenv("boot.netif.mtu")) != NULL) {
437214455Srpaulo		ir.ifr_mtu = strtol(cp, NULL, 10);
438214455Srpaulo		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
439214455Srpaulo		freeenv(cp);
440214455Srpaulo		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
441214455Srpaulo		if (error)
442214455Srpaulo			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
443214455Srpaulo	}
444214455Srpaulo	soclose(so);
445214455Srpaulo
446214455Srpaulo	/*
447214455Srpaulo	 * If the gateway field is filled in, set it as the default route.
448214455Srpaulo	 * Note that pxeboot will set a default route of 0 if the route
449214455Srpaulo	 * is not set by the DHCP server.  Check also for a value of 0
450214455Srpaulo	 * to avoid panicking inappropriately in that situation.
451214455Srpaulo	 */
452214455Srpaulo	if (nd->mygateway.sin_len != 0 &&
453214455Srpaulo	    nd->mygateway.sin_addr.s_addr != 0) {
454214455Srpaulo		struct sockaddr_in mask, sin;
455214455Srpaulo
456214455Srpaulo		bzero((caddr_t)&mask, sizeof(mask));
457214455Srpaulo		sin = mask;
458214455Srpaulo		sin.sin_family = AF_INET;
459214455Srpaulo		sin.sin_len = sizeof(sin);
460214455Srpaulo                /* XXX MRT use table 0 for this sort of thing */
461214455Srpaulo		CURVNET_SET(TD_TO_VNET(td));
462214455Srpaulo		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
463214455Srpaulo		    (struct sockaddr *)&nd->mygateway,
464214455Srpaulo		    (struct sockaddr *)&mask,
465214455Srpaulo		    RTF_UP | RTF_GATEWAY, NULL);
466214455Srpaulo		CURVNET_RESTORE();
467214455Srpaulo		if (error)
468214455Srpaulo			panic("nfs_mountroot: RTM_ADD: %d", error);
469214455Srpaulo	}
470214455Srpaulo
471214455Srpaulo	/*
472214455Srpaulo	 * Create the rootfs mount point.
473214455Srpaulo	 */
474214455Srpaulo	nd->root_args.fh = nd->root_fh;
475214455Srpaulo	nd->root_args.fhsize = nd->root_fhsize;
476214455Srpaulo	l = ntohl(nd->root_saddr.sin_addr.s_addr);
477214455Srpaulo	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
478214455Srpaulo		(l >> 24) & 0xff, (l >> 16) & 0xff,
479214455Srpaulo		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
480214455Srpaulo	printf("NFS ROOT: %s\n", buf);
481214455Srpaulo	nd->root_args.hostname = buf;
482214455Srpaulo	if ((error = nfs_mountdiskless(buf,
483214455Srpaulo	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
484214455Srpaulo		return (error);
485214455Srpaulo	}
486214455Srpaulo
487214455Srpaulo	/*
488214455Srpaulo	 * This is not really an nfs issue, but it is much easier to
489214455Srpaulo	 * set hostname here and then let the "/etc/rc.xxx" files
490214455Srpaulo	 * mount the right /var based upon its preset value.
491214455Srpaulo	 */
492214455Srpaulo	mtx_lock(&prison0.pr_mtx);
493214455Srpaulo	strlcpy(prison0.pr_hostname, nd->my_hostnam,
494214455Srpaulo	    sizeof(prison0.pr_hostname));
495214455Srpaulo	mtx_unlock(&prison0.pr_mtx);
496214455Srpaulo	inittodr(ntohl(nd->root_time));
497214455Srpaulo	return (0);
498214455Srpaulo}
499214455Srpaulo
500214455Srpaulo/*
501214455Srpaulo * Internal version of mount system call for diskless setup.
502214455Srpaulo */
503214455Srpaulostatic int
504214455Srpaulonfs_mountdiskless(char *path,
505214455Srpaulo    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
506214455Srpaulo    struct vnode **vpp, struct mount *mp)
507214455Srpaulo{
508214455Srpaulo	struct sockaddr *nam;
509214455Srpaulo	int dirlen, error;
510214455Srpaulo	char *dirpath;
511214455Srpaulo
512214455Srpaulo	/*
513214455Srpaulo	 * Find the directory path in "path", which also has the server's
514214455Srpaulo	 * name/ip address in it.
515214455Srpaulo	 */
516214455Srpaulo	dirpath = strchr(path, ':');
517214455Srpaulo	if (dirpath != NULL)
518214455Srpaulo		dirlen = strlen(++dirpath);
519214455Srpaulo	else
520214455Srpaulo		dirlen = 0;
521214455Srpaulo	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
522214455Srpaulo	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
523214455Srpaulo	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
524214455Srpaulo		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
525214455Srpaulo		return (error);
526214455Srpaulo	}
527214455Srpaulo	return (0);
528214455Srpaulo}
529214455Srpaulo
530214455Srpaulostatic void
531214455Srpaulonfs_sec_name(char *sec, int *flagsp)
532214455Srpaulo{
533214455Srpaulo	if (!strcmp(sec, "krb5"))
534214455Srpaulo		*flagsp |= NFSMNT_KERB;
535214455Srpaulo	else if (!strcmp(sec, "krb5i"))
536214455Srpaulo		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
537214455Srpaulo	else if (!strcmp(sec, "krb5p"))
538214455Srpaulo		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
539214455Srpaulo}
540214455Srpaulo
541214455Srpaulostatic void
542214455Srpaulonfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
543214455Srpaulo    const char *hostname, struct ucred *cred, struct thread *td)
544214455Srpaulo{
545214455Srpaulo	int s;
546214455Srpaulo	int adjsock;
547214455Srpaulo	char *p;
548214455Srpaulo
549214455Srpaulo	s = splnet();
550214455Srpaulo
551214455Srpaulo	/*
552214455Srpaulo	 * Set read-only flag if requested; otherwise, clear it if this is
553214455Srpaulo	 * an update.  If this is not an update, then either the read-only
554214455Srpaulo	 * flag is already clear, or this is a root mount and it was set
555214455Srpaulo	 * intentionally at some previous point.
556214455Srpaulo	 */
557214455Srpaulo	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
558214455Srpaulo		MNT_ILOCK(mp);
559214455Srpaulo		mp->mnt_flag |= MNT_RDONLY;
560214455Srpaulo		MNT_IUNLOCK(mp);
561214455Srpaulo	} else if (mp->mnt_flag & MNT_UPDATE) {
562214455Srpaulo		MNT_ILOCK(mp);
563214455Srpaulo		mp->mnt_flag &= ~MNT_RDONLY;
564214455Srpaulo		MNT_IUNLOCK(mp);
565214455Srpaulo	}
566214455Srpaulo
567214455Srpaulo	/*
568235426Sdelphij	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
569214455Srpaulo	 * no sense in that context.  Also, set up appropriate retransmit
570214455Srpaulo	 * and soft timeout behavior.
571214455Srpaulo	 */
572214455Srpaulo	if (argp->sotype == SOCK_STREAM) {
573214455Srpaulo		nmp->nm_flag &= ~NFSMNT_NOCONN;
574235426Sdelphij		nmp->nm_timeo = NFS_MAXTIMEO;
575214455Srpaulo		if ((argp->flags & NFSMNT_NFSV4) != 0)
576214455Srpaulo			nmp->nm_retry = INT_MAX;
577214455Srpaulo		else
578214455Srpaulo			nmp->nm_retry = NFS_RETRANS_TCP;
579214455Srpaulo	}
580214455Srpaulo
581235426Sdelphij	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
582214455Srpaulo	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
583214455Srpaulo		argp->flags &= ~NFSMNT_RDIRPLUS;
584214455Srpaulo		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
585214455Srpaulo	}
586214455Srpaulo
587214455Srpaulo	/* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */
588235426Sdelphij	if ((argp->flags & NFSMNT_NFSV4) != 0) {
589214455Srpaulo		argp->flags &= ~NFSMNT_RESVPORT;
590214455Srpaulo		nmp->nm_flag &= ~NFSMNT_RESVPORT;
591214455Srpaulo	}
592214455Srpaulo
593214455Srpaulo	/* Re-bind if rsrvd port requested and wasn't on one */
594235426Sdelphij	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
595214455Srpaulo		  && (argp->flags & NFSMNT_RESVPORT);
596214455Srpaulo	/* Also re-bind if we're switching to/from a connected UDP socket */
597214455Srpaulo	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
598214455Srpaulo		    (argp->flags & NFSMNT_NOCONN));
599214455Srpaulo
600235426Sdelphij	/* Update flags atomically.  Don't change the lock bits. */
601214455Srpaulo	nmp->nm_flag = argp->flags | nmp->nm_flag;
602214455Srpaulo	splx(s);
603214455Srpaulo
604214455Srpaulo	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
605214455Srpaulo		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
606214455Srpaulo		if (nmp->nm_timeo < NFS_MINTIMEO)
607214455Srpaulo			nmp->nm_timeo = NFS_MINTIMEO;
608214455Srpaulo		else if (nmp->nm_timeo > NFS_MAXTIMEO)
609214455Srpaulo			nmp->nm_timeo = NFS_MAXTIMEO;
610214455Srpaulo	}
611214455Srpaulo
612214455Srpaulo	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
613214455Srpaulo		nmp->nm_retry = argp->retrans;
614214455Srpaulo		if (nmp->nm_retry > NFS_MAXREXMIT)
615214455Srpaulo			nmp->nm_retry = NFS_MAXREXMIT;
616214455Srpaulo	}
617214455Srpaulo
618214455Srpaulo	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
619235426Sdelphij		nmp->nm_wsize = argp->wsize;
620214455Srpaulo		/* Round down to multiple of blocksize */
621214455Srpaulo		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
622214455Srpaulo		if (nmp->nm_wsize <= 0)
623214455Srpaulo			nmp->nm_wsize = NFS_FABLKSIZE;
624214455Srpaulo	}
625214455Srpaulo
626235426Sdelphij	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
627235426Sdelphij		nmp->nm_rsize = argp->rsize;
628214455Srpaulo		/* Round down to multiple of blocksize */
629214455Srpaulo		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
630214455Srpaulo		if (nmp->nm_rsize <= 0)
631214455Srpaulo			nmp->nm_rsize = NFS_FABLKSIZE;
632214455Srpaulo	}
633214455Srpaulo
634235426Sdelphij	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
635214455Srpaulo		nmp->nm_readdirsize = argp->readdirsize;
636214455Srpaulo	}
637214455Srpaulo
638214455Srpaulo	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
639214455Srpaulo		nmp->nm_acregmin = argp->acregmin;
640214455Srpaulo	else
641214455Srpaulo		nmp->nm_acregmin = NFS_MINATTRTIMO;
642214455Srpaulo	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
643214455Srpaulo		nmp->nm_acregmax = argp->acregmax;
644214455Srpaulo	else
645214455Srpaulo		nmp->nm_acregmax = NFS_MAXATTRTIMO;
646235426Sdelphij	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
647214455Srpaulo		nmp->nm_acdirmin = argp->acdirmin;
648214455Srpaulo	else
649214455Srpaulo		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
650214455Srpaulo	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
651214455Srpaulo		nmp->nm_acdirmax = argp->acdirmax;
652214455Srpaulo	else
653214455Srpaulo		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
654214455Srpaulo	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
655214455Srpaulo		nmp->nm_acdirmin = nmp->nm_acdirmax;
656214455Srpaulo	if (nmp->nm_acregmin > nmp->nm_acregmax)
657235426Sdelphij		nmp->nm_acregmin = nmp->nm_acregmax;
658214455Srpaulo
659214455Srpaulo	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
660214455Srpaulo		if (argp->readahead <= NFS_MAXRAHEAD)
661214455Srpaulo			nmp->nm_readahead = argp->readahead;
662214455Srpaulo		else
663214455Srpaulo			nmp->nm_readahead = NFS_MAXRAHEAD;
664214455Srpaulo	}
665214455Srpaulo	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
666235426Sdelphij		if (argp->wcommitsize < nmp->nm_wsize)
667214455Srpaulo			nmp->nm_wcommitsize = nmp->nm_wsize;
668214455Srpaulo		else
669214455Srpaulo			nmp->nm_wcommitsize = argp->wcommitsize;
670214455Srpaulo	}
671214455Srpaulo
672235426Sdelphij	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
673214455Srpaulo		    (nmp->nm_soproto != argp->proto));
674214455Srpaulo
675214455Srpaulo	if (nmp->nm_client != NULL && adjsock) {
676214455Srpaulo		int haslock = 0, error = 0;
677214455Srpaulo
678214455Srpaulo		if (nmp->nm_sotype == SOCK_STREAM) {
679214455Srpaulo			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
680214455Srpaulo			if (!error)
681214455Srpaulo				haslock = 1;
682214455Srpaulo		}
683214455Srpaulo		if (!error) {
684214455Srpaulo		    newnfs_disconnect(&nmp->nm_sockreq);
685214455Srpaulo		    if (haslock)
686214455Srpaulo			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
687214455Srpaulo		    nmp->nm_sotype = argp->sotype;
688214455Srpaulo		    nmp->nm_soproto = argp->proto;
689214455Srpaulo		    if (nmp->nm_sotype == SOCK_DGRAM)
690214455Srpaulo			while (newnfs_connect(nmp, &nmp->nm_sockreq,
691214455Srpaulo			    cred, td, 0)) {
692214455Srpaulo				printf("newnfs_args: retrying connect\n");
693214455Srpaulo				(void) nfs_catnap(PSOCK, 0, "newnfscon");
694214455Srpaulo			}
695214455Srpaulo		}
696214455Srpaulo	} else {
697214455Srpaulo		nmp->nm_sotype = argp->sotype;
698214455Srpaulo		nmp->nm_soproto = argp->proto;
699214455Srpaulo	}
700214455Srpaulo
701214455Srpaulo	if (hostname != NULL) {
702214455Srpaulo		strlcpy(nmp->nm_hostname, hostname,
703214455Srpaulo		    sizeof(nmp->nm_hostname));
704214455Srpaulo		p = strchr(nmp->nm_hostname, ':');
705214455Srpaulo		if (p != NULL)
706214455Srpaulo			*p = '\0';
707214455Srpaulo	}
708214455Srpaulo}
709214455Srpaulo
710214455Srpaulostatic const char *nfs_opts[] = { "from", "nfs_args",
711214455Srpaulo    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
712214455Srpaulo    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
713214455Srpaulo    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
714214455Srpaulo    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
715214455Srpaulo    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
716214455Srpaulo    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
717214455Srpaulo    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
718214455Srpaulo    "negnametimeo", "nocto", "wcommitsize",
719214455Srpaulo    NULL };
720214455Srpaulo
721214455Srpaulo/*
722214455Srpaulo * VFS Operations.
723214455Srpaulo *
724214455Srpaulo * mount system call
725214455Srpaulo * It seems a bit dumb to copyinstr() the host and path here and then
726214455Srpaulo * bcopy() them in mountnfs(), but I wanted to detect errors before
727214455Srpaulo * doing the sockargs() call because sockargs() allocates an mbuf and
728214455Srpaulo * an error after that means that I have to release the mbuf.
729214455Srpaulo */
730214455Srpaulo/* ARGSUSED */
731214455Srpaulostatic int
732214455Srpaulonfs_mount(struct mount *mp)
733214455Srpaulo{
734235426Sdelphij	struct nfs_args args = {
735214455Srpaulo	    .version = NFS_ARGSVERSION,
736214455Srpaulo	    .addr = NULL,
737214455Srpaulo	    .addrlen = sizeof (struct sockaddr_in),
738214455Srpaulo	    .sotype = SOCK_STREAM,
739214455Srpaulo	    .proto = 0,
740214455Srpaulo	    .fh = NULL,
741214455Srpaulo	    .fhsize = 0,
742214455Srpaulo	    .flags = NFSMNT_RESVPORT,
743235426Sdelphij	    .wsize = NFS_WSIZE,
744214455Srpaulo	    .rsize = NFS_RSIZE,
745214455Srpaulo	    .readdirsize = NFS_READDIRSIZE,
746214455Srpaulo	    .timeo = 10,
747214455Srpaulo	    .retrans = NFS_RETRANS,
748214455Srpaulo	    .readahead = NFS_DEFRAHEAD,
749235426Sdelphij	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
750235426Sdelphij	    .hostname = NULL,
751214455Srpaulo	    .acregmin = NFS_MINATTRTIMO,
752235426Sdelphij	    .acregmax = NFS_MAXATTRTIMO,
753235426Sdelphij	    .acdirmin = NFS_MINDIRATTRTIMO,
754235426Sdelphij	    .acdirmax = NFS_MAXDIRATTRTIMO,
755235426Sdelphij	};
756235426Sdelphij	int error = 0, ret, len;
757235426Sdelphij	struct sockaddr *nam = NULL;
758214455Srpaulo	struct vnode *vp;
759235426Sdelphij	struct thread *td;
760235426Sdelphij	char hst[MNAMELEN];
761235426Sdelphij	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
762235426Sdelphij	char *opt, *name, *secname;
763235426Sdelphij	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
764235426Sdelphij	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
765235426Sdelphij	size_t hstlen;
766235426Sdelphij
767235426Sdelphij	has_nfs_args_opt = 0;
768235426Sdelphij	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
769235426Sdelphij		error = EINVAL;
770235426Sdelphij		goto out;
771235426Sdelphij	}
772235426Sdelphij
773235426Sdelphij	td = curthread;
774235426Sdelphij	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
775235426Sdelphij		error = nfs_mountroot(mp);
776235426Sdelphij		goto out;
777235426Sdelphij	}
778235426Sdelphij
779235426Sdelphij	nfscl_init();
780235426Sdelphij
781235426Sdelphij	/*
782235426Sdelphij	 * The old mount_nfs program passed the struct nfs_args
783235426Sdelphij	 * from userspace to kernel.  The new mount_nfs program
784235426Sdelphij	 * passes string options via nmount() from userspace to kernel
785235426Sdelphij	 * and we populate the struct nfs_args in the kernel.
786235426Sdelphij	 */
787235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
788235426Sdelphij		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
789235426Sdelphij		    sizeof(args));
790235426Sdelphij		if (error != 0)
791235426Sdelphij			goto out;
792235426Sdelphij
793235426Sdelphij		if (args.version != NFS_ARGSVERSION) {
794235426Sdelphij			error = EPROGMISMATCH;
795235426Sdelphij			goto out;
796235426Sdelphij		}
797235426Sdelphij		has_nfs_args_opt = 1;
798235426Sdelphij	}
799235426Sdelphij
800235426Sdelphij	/* Handle the new style options. */
801235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
802235426Sdelphij		args.flags |= NFSMNT_NOCONN;
803235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
804235426Sdelphij		args.flags |= NFSMNT_NOCONN;
805235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
806235426Sdelphij		args.flags |= NFSMNT_NOLOCKD;
807235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
808235426Sdelphij		args.flags &= ~NFSMNT_NOLOCKD;
809235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
810235426Sdelphij		args.flags |= NFSMNT_INT;
811235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
812235426Sdelphij		args.flags |= NFSMNT_RDIRPLUS;
813235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
814235426Sdelphij		args.flags |= NFSMNT_RESVPORT;
815235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
816235426Sdelphij		args.flags &= ~NFSMNT_RESVPORT;
817235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
818235426Sdelphij		args.flags |= NFSMNT_SOFT;
819235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
820235426Sdelphij		args.flags &= ~NFSMNT_SOFT;
821235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
822235426Sdelphij		args.sotype = SOCK_DGRAM;
823235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
824235426Sdelphij		args.sotype = SOCK_DGRAM;
825235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
826235426Sdelphij		args.sotype = SOCK_STREAM;
827235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
828235426Sdelphij		args.flags |= NFSMNT_NFSV3;
829235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
830235426Sdelphij		args.flags |= NFSMNT_NFSV4;
831235426Sdelphij		args.sotype = SOCK_STREAM;
832235426Sdelphij	}
833235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
834235426Sdelphij		args.flags |= NFSMNT_ALLGSSNAME;
835235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
836235426Sdelphij		args.flags |= NFSMNT_NOCTO;
837235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
838235426Sdelphij		if (opt == NULL) {
839235426Sdelphij			vfs_mount_error(mp, "illegal readdirsize");
840235426Sdelphij			error = EINVAL;
841235426Sdelphij			goto out;
842235426Sdelphij		}
843235426Sdelphij		ret = sscanf(opt, "%d", &args.readdirsize);
844251129Sdelphij		if (ret != 1 || args.readdirsize <= 0) {
845235426Sdelphij			vfs_mount_error(mp, "illegal readdirsize: %s",
846235426Sdelphij			    opt);
847235426Sdelphij			error = EINVAL;
848235426Sdelphij			goto out;
849235426Sdelphij		}
850241231Sdelphij		args.flags |= NFSMNT_READDIRSIZE;
851241231Sdelphij	}
852241231Sdelphij	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
853241231Sdelphij		if (opt == NULL) {
854241231Sdelphij			vfs_mount_error(mp, "illegal readahead");
855241231Sdelphij			error = EINVAL;
856235426Sdelphij			goto out;
857241231Sdelphij		}
858241231Sdelphij		ret = sscanf(opt, "%d", &args.readahead);
859241231Sdelphij		if (ret != 1 || args.readahead <= 0) {
860241231Sdelphij			vfs_mount_error(mp, "illegal readahead: %s",
861241231Sdelphij			    opt);
862241231Sdelphij			error = EINVAL;
863241231Sdelphij			goto out;
864241231Sdelphij		}
865241231Sdelphij		args.flags |= NFSMNT_READAHEAD;
866241231Sdelphij	}
867241231Sdelphij	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
868241231Sdelphij		if (opt == NULL) {
869241231Sdelphij			vfs_mount_error(mp, "illegal wsize");
870241231Sdelphij			error = EINVAL;
871241231Sdelphij			goto out;
872241231Sdelphij		}
873241231Sdelphij		ret = sscanf(opt, "%d", &args.wsize);
874241231Sdelphij		if (ret != 1 || args.wsize <= 0) {
875241231Sdelphij			vfs_mount_error(mp, "illegal wsize: %s",
876241231Sdelphij			    opt);
877241231Sdelphij			error = EINVAL;
878241231Sdelphij			goto out;
879241231Sdelphij		}
880241231Sdelphij		args.flags |= NFSMNT_WSIZE;
881241231Sdelphij	}
882241231Sdelphij	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
883241231Sdelphij		if (opt == NULL) {
884241231Sdelphij			vfs_mount_error(mp, "illegal rsize");
885241231Sdelphij			error = EINVAL;
886251129Sdelphij			goto out;
887251129Sdelphij		}
888251129Sdelphij		ret = sscanf(opt, "%d", &args.rsize);
889251129Sdelphij		if (ret != 1 || args.rsize <= 0) {
890251129Sdelphij			vfs_mount_error(mp, "illegal wsize: %s",
891251129Sdelphij			    opt);
892241231Sdelphij			error = EINVAL;
893251129Sdelphij			goto out;
894251129Sdelphij		}
895251129Sdelphij		args.flags |= NFSMNT_RSIZE;
896251129Sdelphij	}
897251129Sdelphij	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
898251129Sdelphij		if (opt == NULL) {
899251129Sdelphij			vfs_mount_error(mp, "illegal retrans");
900251129Sdelphij			error = EINVAL;
901251129Sdelphij			goto out;
902214455Srpaulo		}
903214455Srpaulo		ret = sscanf(opt, "%d", &args.retrans);
904214455Srpaulo		if (ret != 1 || args.retrans <= 0) {
905214455Srpaulo			vfs_mount_error(mp, "illegal retrans: %s",
906214455Srpaulo			    opt);
907214455Srpaulo			error = EINVAL;
908214455Srpaulo			goto out;
909214455Srpaulo		}
910214455Srpaulo		args.flags |= NFSMNT_RETRANS;
911214455Srpaulo	}
912214455Srpaulo	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
913214455Srpaulo		ret = sscanf(opt, "%d", &args.acregmin);
914214455Srpaulo		if (ret != 1 || args.acregmin < 0) {
915214455Srpaulo			vfs_mount_error(mp, "illegal acregmin: %s",
916241231Sdelphij			    opt);
917235426Sdelphij			error = EINVAL;
918214455Srpaulo			goto out;
919214455Srpaulo		}
920214455Srpaulo		args.flags |= NFSMNT_ACREGMIN;
921241231Sdelphij	}
922214455Srpaulo	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
923214455Srpaulo		ret = sscanf(opt, "%d", &args.acregmax);
924214455Srpaulo		if (ret != 1 || args.acregmax < 0) {
925214455Srpaulo			vfs_mount_error(mp, "illegal acregmax: %s",
926214455Srpaulo			    opt);
927214455Srpaulo			error = EINVAL;
928214455Srpaulo			goto out;
929214455Srpaulo		}
930214455Srpaulo		args.flags |= NFSMNT_ACREGMAX;
931214455Srpaulo	}
932214455Srpaulo	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
933214455Srpaulo		ret = sscanf(opt, "%d", &args.acdirmin);
934214455Srpaulo		if (ret != 1 || args.acdirmin < 0) {
935214455Srpaulo			vfs_mount_error(mp, "illegal acdirmin: %s",
936214455Srpaulo			    opt);
937214455Srpaulo			error = EINVAL;
938214455Srpaulo			goto out;
939214455Srpaulo		}
940214455Srpaulo		args.flags |= NFSMNT_ACDIRMIN;
941214455Srpaulo	}
942214455Srpaulo	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
943214455Srpaulo		ret = sscanf(opt, "%d", &args.acdirmax);
944214455Srpaulo		if (ret != 1 || args.acdirmax < 0) {
945214455Srpaulo			vfs_mount_error(mp, "illegal acdirmax: %s",
946214455Srpaulo			    opt);
947214455Srpaulo			error = EINVAL;
948214455Srpaulo			goto out;
949214455Srpaulo		}
950214455Srpaulo		args.flags |= NFSMNT_ACDIRMAX;
951214455Srpaulo	}
952214455Srpaulo	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
953214455Srpaulo		ret = sscanf(opt, "%d", &args.wcommitsize);
954214455Srpaulo		if (ret != 1 || args.wcommitsize < 0) {
955214455Srpaulo			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
956214455Srpaulo			error = EINVAL;
957214455Srpaulo			goto out;
958214455Srpaulo		}
959235426Sdelphij		args.flags |= NFSMNT_WCOMMITSIZE;
960235426Sdelphij	}
961235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
962214455Srpaulo		ret = sscanf(opt, "%d", &args.timeo);
963214455Srpaulo		if (ret != 1 || args.timeo <= 0) {
964214455Srpaulo			vfs_mount_error(mp, "illegal timeout: %s",
965214455Srpaulo			    opt);
966214455Srpaulo			error = EINVAL;
967214455Srpaulo			goto out;
968214455Srpaulo		}
969214455Srpaulo		args.flags |= NFSMNT_TIMEO;
970214455Srpaulo	}
971214455Srpaulo	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
972235426Sdelphij	    == 0) {
973241231Sdelphij		ret = sscanf(opt, "%d", &negnametimeo);
974241231Sdelphij		if (ret != 1 || negnametimeo < 0) {
975241231Sdelphij			vfs_mount_error(mp, "illegal negnametimeo: %s",
976241231Sdelphij			    opt);
977241231Sdelphij			error = EINVAL;
978241231Sdelphij			goto out;
979235426Sdelphij		}
980235426Sdelphij	}
981235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "sec",
982235426Sdelphij		(void **) &secname, NULL) == 0)
983235426Sdelphij		nfs_sec_name(secname, &args.flags);
984235426Sdelphij
985235426Sdelphij	if (mp->mnt_flag & MNT_UPDATE) {
986235426Sdelphij		struct nfsmount *nmp = VFSTONFS(mp);
987214455Srpaulo
988214455Srpaulo		if (nmp == NULL) {
989214455Srpaulo			error = EIO;
990214455Srpaulo			goto out;
991214455Srpaulo		}
992214455Srpaulo
993214455Srpaulo		/*
994235426Sdelphij		 * If a change from TCP->UDP is done and there are thread(s)
995235426Sdelphij		 * that have I/O RPC(s) in progress with a tranfer size
996214455Srpaulo		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
997214455Srpaulo		 * hung, retrying the RPC(s) forever. Usually these threads
998214455Srpaulo		 * will be seen doing an uninterruptible sleep on wait channel
999214455Srpaulo		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1000214455Srpaulo		 */
1001214455Srpaulo		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1002214455Srpaulo			tprintf(td->td_proc, LOG_WARNING,
1003214455Srpaulo	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1004214455Srpaulo
1005235426Sdelphij		/*
1006241231Sdelphij		 * When doing an update, we can't change version,
1007241231Sdelphij		 * security, switch lockd strategies or change cookie
1008241231Sdelphij		 * translation
1009241231Sdelphij		 */
1010241231Sdelphij		args.flags = (args.flags &
1011241231Sdelphij		    ~(NFSMNT_NFSV3 |
1012241231Sdelphij		      NFSMNT_NFSV4 |
1013241231Sdelphij		      NFSMNT_KERB |
1014241231Sdelphij		      NFSMNT_INTEGRITY |
1015235426Sdelphij		      NFSMNT_PRIVACY |
1016235426Sdelphij		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1017235426Sdelphij		    (nmp->nm_flag &
1018235426Sdelphij			(NFSMNT_NFSV3 |
1019235426Sdelphij			 NFSMNT_NFSV4 |
1020235426Sdelphij			 NFSMNT_KERB |
1021235426Sdelphij			 NFSMNT_INTEGRITY |
1022235426Sdelphij			 NFSMNT_PRIVACY |
1023235426Sdelphij			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1024214455Srpaulo		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1025214455Srpaulo		goto out;
1026214455Srpaulo	}
1027214455Srpaulo
1028214455Srpaulo	/*
1029214455Srpaulo	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1030214455Srpaulo	 * or no-connection mode for those protocols that support
1031214455Srpaulo	 * no-connection mode (the flag will be cleared later for protocols
1032214455Srpaulo	 * that do not support no-connection mode).  This will allow a client
1033214455Srpaulo	 * to receive replies from a different IP then the request was
1034214455Srpaulo	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1035214455Srpaulo	 * not 0.
1036214455Srpaulo	 */
1037214455Srpaulo	if (nfs_ip_paranoia == 0)
1038214455Srpaulo		args.flags |= NFSMNT_NOCONN;
1039214455Srpaulo
1040214455Srpaulo	if (has_nfs_args_opt != 0) {
1041214455Srpaulo		/*
1042214455Srpaulo		 * In the 'nfs_args' case, the pointers in the args
1043214455Srpaulo		 * structure are in userland - we copy them in here.
1044214455Srpaulo		 */
1045214455Srpaulo		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1046214455Srpaulo			vfs_mount_error(mp, "Bad file handle");
1047214455Srpaulo			error = EINVAL;
1048214455Srpaulo			goto out;
1049214455Srpaulo		}
1050214455Srpaulo		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1051235426Sdelphij		    args.fhsize);
1052235426Sdelphij		if (error != 0)
1053235426Sdelphij			goto out;
1054214455Srpaulo		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1055214455Srpaulo		if (error != 0)
1056235426Sdelphij			goto out;
1057235426Sdelphij		bzero(&hst[hstlen], MNAMELEN - hstlen);
1058235426Sdelphij		args.hostname = hst;
1059235426Sdelphij		/* sockargs() call must be after above copyin() calls */
1060235426Sdelphij		error = getsockaddr(&nam, (caddr_t)args.addr,
1061235426Sdelphij		    args.addrlen);
1062214455Srpaulo		if (error != 0)
1063214455Srpaulo			goto out;
1064214455Srpaulo	} else {
1065235426Sdelphij		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1066235426Sdelphij		    &args.fhsize) == 0) {
1067214455Srpaulo			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1068214455Srpaulo				vfs_mount_error(mp, "Bad file handle");
1069235426Sdelphij				error = EINVAL;
1070235426Sdelphij				goto out;
1071235426Sdelphij			}
1072235426Sdelphij			bcopy(args.fh, nfh, args.fhsize);
1073235426Sdelphij		} else {
1074214455Srpaulo			args.fhsize = 0;
1075214455Srpaulo		}
1076235426Sdelphij		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1077235426Sdelphij		    (void **)&args.hostname, &len);
1078235426Sdelphij		if (args.hostname == NULL) {
1079235426Sdelphij			vfs_mount_error(mp, "Invalid hostname");
1080235426Sdelphij			error = EINVAL;
1081214455Srpaulo			goto out;
1082214455Srpaulo		}
1083235426Sdelphij		bcopy(args.hostname, hst, MNAMELEN);
1084235426Sdelphij		hst[MNAMELEN - 1] = '\0';
1085235426Sdelphij	}
1086214455Srpaulo
1087214455Srpaulo	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1088235426Sdelphij		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1089235426Sdelphij	else
1090235426Sdelphij		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1091214455Srpaulo	srvkrbnamelen = strlen(srvkrbname);
1092214455Srpaulo
1093235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1094235426Sdelphij		strlcpy(krbname, name, sizeof (krbname));
1095235426Sdelphij	else
1096214455Srpaulo		krbname[0] = '\0';
1097214455Srpaulo	krbnamelen = strlen(krbname);
1098235426Sdelphij
1099235426Sdelphij	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1100235426Sdelphij		strlcpy(dirpath, name, sizeof (dirpath));
1101214455Srpaulo	else
1102214455Srpaulo		dirpath[0] = '\0';
1103214455Srpaulo	dirlen = strlen(dirpath);
1104235426Sdelphij
1105235426Sdelphij	if (has_nfs_args_opt == 0) {
1106235426Sdelphij		if (vfs_getopt(mp->mnt_optnew, "addr",
1107235426Sdelphij		    (void **)&args.addr, &args.addrlen) == 0) {
1108235426Sdelphij			if (args.addrlen > SOCK_MAXADDRLEN) {
1109235426Sdelphij				error = ENAMETOOLONG;
1110235426Sdelphij				goto out;
1111235426Sdelphij			}
1112235426Sdelphij			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1113235426Sdelphij			bcopy(args.addr, nam, args.addrlen);
1114235426Sdelphij			nam->sa_len = args.addrlen;
1115235426Sdelphij		} else {
1116235426Sdelphij			vfs_mount_error(mp, "No server address");
1117214455Srpaulo			error = EINVAL;
1118214455Srpaulo			goto out;
1119214455Srpaulo		}
1120214455Srpaulo	}
1121214455Srpaulo
1122214455Srpaulo	args.fh = nfh;
1123214455Srpaulo	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1124214455Srpaulo	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1125214455Srpaulo	    negnametimeo);
1126235426Sdelphijout:
1127235426Sdelphij	if (!error) {
1128214455Srpaulo		MNT_ILOCK(mp);
1129214455Srpaulo		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1130235426Sdelphij		MNT_IUNLOCK(mp);
1131235426Sdelphij	}
1132235426Sdelphij	return (error);
1133214455Srpaulo}
1134214455Srpaulo
1135235426Sdelphij
1136235426Sdelphij/*
1137235426Sdelphij * VFS Operations.
1138214455Srpaulo *
1139214455Srpaulo * mount system call
1140235426Sdelphij * It seems a bit dumb to copyinstr() the host and path here and then
1141235426Sdelphij * bcopy() them in mountnfs(), but I wanted to detect errors before
1142235426Sdelphij * doing the sockargs() call because sockargs() allocates an mbuf and
1143214455Srpaulo * an error after that means that I have to release the mbuf.
1144214455Srpaulo */
1145214455Srpaulo/* ARGSUSED */
1146235426Sdelphijstatic int
1147235426Sdelphijnfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1148235426Sdelphij{
1149235426Sdelphij	int error;
1150235426Sdelphij	struct nfs_args args;
1151235426Sdelphij
1152235426Sdelphij	error = copyin(data, &args, sizeof (struct nfs_args));
1153235426Sdelphij	if (error)
1154235426Sdelphij		return error;
1155235426Sdelphij
1156235426Sdelphij	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1157235426Sdelphij
1158235426Sdelphij	error = kernel_mount(ma, flags);
1159235426Sdelphij	return (error);
1160235426Sdelphij}
1161235426Sdelphij
1162235426Sdelphij/*
1163235426Sdelphij * Common code for mount and mountroot
1164235426Sdelphij */
1165235426Sdelphijstatic int
1166235426Sdelphijmountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1167235426Sdelphij    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1168235426Sdelphij    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1169235426Sdelphij    struct ucred *cred, struct thread *td, int negnametimeo)
1170235426Sdelphij{
1171235426Sdelphij	struct nfsmount *nmp;
1172214455Srpaulo	struct nfsnode *np;
1173	int error, trycnt, ret;
1174	struct nfsvattr nfsva;
1175	static u_int64_t clval = 0;
1176
1177	if (mp->mnt_flag & MNT_UPDATE) {
1178		nmp = VFSTONFS(mp);
1179		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1180		FREE(nam, M_SONAME);
1181		return (0);
1182	} else {
1183		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1184		    krbnamelen + dirlen + srvkrbnamelen + 2,
1185		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1186		TAILQ_INIT(&nmp->nm_bufq);
1187		if (clval == 0)
1188			clval = (u_int64_t)nfsboottime.tv_sec;
1189		nmp->nm_clval = clval++;
1190		nmp->nm_krbnamelen = krbnamelen;
1191		nmp->nm_dirpathlen = dirlen;
1192		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1193		if (td->td_ucred->cr_uid != (uid_t)0) {
1194			/*
1195			 * nm_uid is used to get KerberosV credentials for
1196			 * the nfsv4 state handling operations if there is
1197			 * no host based principal set. Use the uid of
1198			 * this user if not root, since they are doing the
1199			 * mount. I don't think setting this for root will
1200			 * work, since root normally does not have user
1201			 * credentials in a credentials cache.
1202			 */
1203			nmp->nm_uid = td->td_ucred->cr_uid;
1204		} else {
1205			/*
1206			 * Just set to -1, so it won't be used.
1207			 */
1208			nmp->nm_uid = (uid_t)-1;
1209		}
1210
1211		/* Copy and null terminate all the names */
1212		if (nmp->nm_krbnamelen > 0) {
1213			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1214			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1215		}
1216		if (nmp->nm_dirpathlen > 0) {
1217			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1218			    nmp->nm_dirpathlen);
1219			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1220			    + 1] = '\0';
1221		}
1222		if (nmp->nm_srvkrbnamelen > 0) {
1223			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1224			    nmp->nm_srvkrbnamelen);
1225			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1226			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1227		}
1228		nmp->nm_sockreq.nr_cred = crhold(cred);
1229		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1230		mp->mnt_data = nmp;
1231		nmp->nm_getinfo = nfs_getnlminfo;
1232		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1233	}
1234	vfs_getnewfsid(mp);
1235	nmp->nm_mountp = mp;
1236	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1237
1238	/*
1239	 * Since nfs_decode_args() might optionally set them, these need to
1240	 * set to defaults before the call, so that the optional settings
1241	 * aren't overwritten.
1242	 */
1243	nmp->nm_negnametimeo = negnametimeo;
1244	nmp->nm_timeo = NFS_TIMEO;
1245	nmp->nm_retry = NFS_RETRANS;
1246	nmp->nm_readahead = NFS_DEFRAHEAD;
1247	if (desiredvnodes >= 11000)
1248		nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1249	else
1250		nmp->nm_wcommitsize = hibufspace / 10;
1251
1252	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1253
1254	/*
1255	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1256	 * high, depending on whether we end up with negative offsets in
1257	 * the client or server somewhere.  2GB-1 may be safer.
1258	 *
1259	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1260	 * that we can handle until we find out otherwise.
1261	 * XXX Our "safe" limit on the client is what we can store in our
1262	 * buffer cache using signed(!) block numbers.
1263	 */
1264	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1265		nmp->nm_maxfilesize = 0xffffffffLL;
1266	else
1267		nmp->nm_maxfilesize = OFF_MAX;
1268
1269	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1270		nmp->nm_wsize = NFS_WSIZE;
1271		nmp->nm_rsize = NFS_RSIZE;
1272		nmp->nm_readdirsize = NFS_READDIRSIZE;
1273	}
1274	nmp->nm_numgrps = NFS_MAXGRPS;
1275	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1276	if (nmp->nm_tprintf_delay < 0)
1277		nmp->nm_tprintf_delay = 0;
1278	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1279	if (nmp->nm_tprintf_initial_delay < 0)
1280		nmp->nm_tprintf_initial_delay = 0;
1281	nmp->nm_fhsize = argp->fhsize;
1282	if (nmp->nm_fhsize > 0)
1283		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1284	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1285	nmp->nm_nam = nam;
1286	/* Set up the sockets and per-host congestion */
1287	nmp->nm_sotype = argp->sotype;
1288	nmp->nm_soproto = argp->proto;
1289	nmp->nm_sockreq.nr_prog = NFS_PROG;
1290	if ((argp->flags & NFSMNT_NFSV4))
1291		nmp->nm_sockreq.nr_vers = NFS_VER4;
1292	else if ((argp->flags & NFSMNT_NFSV3))
1293		nmp->nm_sockreq.nr_vers = NFS_VER3;
1294	else
1295		nmp->nm_sockreq.nr_vers = NFS_VER2;
1296
1297
1298	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1299		goto bad;
1300
1301	/*
1302	 * A reference count is needed on the nfsnode representing the
1303	 * remote root.  If this object is not persistent, then backward
1304	 * traversals of the mount point (i.e. "..") will not work if
1305	 * the nfsnode gets flushed out of the cache. Ufs does not have
1306	 * this problem, because one can identify root inodes by their
1307	 * number == ROOTINO (2).
1308	 */
1309	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1310	    nmp->nm_dirpathlen > 0) {
1311		/*
1312		 * If the fhsize on the mount point == 0 for V4, the mount
1313		 * path needs to be looked up.
1314		 */
1315		trycnt = 3;
1316		do {
1317			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1318			    cred, td);
1319			if (error)
1320				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1321		} while (error && --trycnt > 0);
1322		if (error) {
1323			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1324			goto bad;
1325		}
1326	}
1327	if (nmp->nm_fhsize > 0) {
1328		/*
1329		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1330		 * non-zero for the root vnode. f_iosize will be set correctly
1331		 * by nfs_statfs() before any I/O occurs.
1332		 */
1333		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1334		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1335		    LK_EXCLUSIVE);
1336		if (error)
1337			goto bad;
1338		*vpp = NFSTOV(np);
1339
1340		/*
1341		 * Get file attributes and transfer parameters for the
1342		 * mountpoint.  This has the side effect of filling in
1343		 * (*vpp)->v_type with the correct value.
1344		 */
1345		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1346		    cred, td, &nfsva, NULL);
1347		if (ret) {
1348			/*
1349			 * Just set default values to get things going.
1350			 */
1351			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1352			nfsva.na_vattr.va_type = VDIR;
1353			nfsva.na_vattr.va_mode = 0777;
1354			nfsva.na_vattr.va_nlink = 100;
1355			nfsva.na_vattr.va_uid = (uid_t)0;
1356			nfsva.na_vattr.va_gid = (gid_t)0;
1357			nfsva.na_vattr.va_fileid = 2;
1358			nfsva.na_vattr.va_gen = 1;
1359			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1360			nfsva.na_vattr.va_size = 512 * 1024;
1361		}
1362		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1363		if (argp->flags & NFSMNT_NFSV3)
1364			ncl_fsinfo(nmp, *vpp, cred, td);
1365
1366		/* Mark if the mount point supports NFSv4 ACLs. */
1367		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1368		    ret == 0 &&
1369		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1370			MNT_ILOCK(mp);
1371			mp->mnt_flag |= MNT_NFS4ACLS;
1372			MNT_IUNLOCK(mp);
1373		}
1374
1375		/*
1376		 * Lose the lock but keep the ref.
1377		 */
1378		NFSVOPUNLOCK(*vpp, 0);
1379		return (0);
1380	}
1381	error = EIO;
1382
1383bad:
1384	newnfs_disconnect(&nmp->nm_sockreq);
1385	crfree(nmp->nm_sockreq.nr_cred);
1386	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1387	mtx_destroy(&nmp->nm_mtx);
1388	FREE(nmp, M_NEWNFSMNT);
1389	FREE(nam, M_SONAME);
1390	return (error);
1391}
1392
1393/*
1394 * unmount system call
1395 */
1396static int
1397nfs_unmount(struct mount *mp, int mntflags)
1398{
1399	struct thread *td;
1400	struct nfsmount *nmp;
1401	int error, flags = 0, trycnt = 0;
1402
1403	td = curthread;
1404
1405	if (mntflags & MNT_FORCE)
1406		flags |= FORCECLOSE;
1407	nmp = VFSTONFS(mp);
1408	/*
1409	 * Goes something like this..
1410	 * - Call vflush() to clear out vnodes for this filesystem
1411	 * - Close the socket
1412	 * - Free up the data structures
1413	 */
1414	/* In the forced case, cancel any outstanding requests. */
1415	if (mntflags & MNT_FORCE) {
1416		error = newnfs_nmcancelreqs(nmp);
1417		if (error)
1418			goto out;
1419		/* For a forced close, get rid of the renew thread now */
1420		nfscl_umount(nmp, td);
1421	}
1422	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1423	do {
1424		error = vflush(mp, 1, flags, td);
1425		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1426			(void) nfs_catnap(PSOCK, error, "newndm");
1427	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1428	if (error)
1429		goto out;
1430
1431	/*
1432	 * We are now committed to the unmount.
1433	 */
1434	if ((mntflags & MNT_FORCE) == 0)
1435		nfscl_umount(nmp, td);
1436	newnfs_disconnect(&nmp->nm_sockreq);
1437	crfree(nmp->nm_sockreq.nr_cred);
1438	FREE(nmp->nm_nam, M_SONAME);
1439
1440	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1441	mtx_destroy(&nmp->nm_mtx);
1442	FREE(nmp, M_NEWNFSMNT);
1443out:
1444	return (error);
1445}
1446
1447/*
1448 * Return root of a filesystem
1449 */
1450static int
1451nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1452{
1453	struct vnode *vp;
1454	struct nfsmount *nmp;
1455	struct nfsnode *np;
1456	int error;
1457
1458	nmp = VFSTONFS(mp);
1459	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1460	if (error)
1461		return error;
1462	vp = NFSTOV(np);
1463	/*
1464	 * Get transfer parameters and attributes for root vnode once.
1465	 */
1466	mtx_lock(&nmp->nm_mtx);
1467	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1468		mtx_unlock(&nmp->nm_mtx);
1469		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1470	} else
1471		mtx_unlock(&nmp->nm_mtx);
1472	if (vp->v_type == VNON)
1473	    vp->v_type = VDIR;
1474	vp->v_vflag |= VV_ROOT;
1475	*vpp = vp;
1476	return (0);
1477}
1478
1479/*
1480 * Flush out the buffer cache
1481 */
1482/* ARGSUSED */
1483static int
1484nfs_sync(struct mount *mp, int waitfor)
1485{
1486	struct vnode *vp, *mvp;
1487	struct thread *td;
1488	int error, allerror = 0;
1489
1490	td = curthread;
1491
1492	MNT_ILOCK(mp);
1493	/*
1494	 * If a forced dismount is in progress, return from here so that
1495	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1496	 * calling VFS_UNMOUNT().
1497	 */
1498	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1499		MNT_IUNLOCK(mp);
1500		return (EBADF);
1501	}
1502
1503	/*
1504	 * Force stale buffer cache information to be flushed.
1505	 */
1506loop:
1507	MNT_VNODE_FOREACH(vp, mp, mvp) {
1508		VI_LOCK(vp);
1509		MNT_IUNLOCK(mp);
1510		/* XXX Racy bv_cnt check. */
1511		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1512		    waitfor == MNT_LAZY) {
1513			VI_UNLOCK(vp);
1514			MNT_ILOCK(mp);
1515			continue;
1516		}
1517		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1518			MNT_ILOCK(mp);
1519			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1520			goto loop;
1521		}
1522		error = VOP_FSYNC(vp, waitfor, td);
1523		if (error)
1524			allerror = error;
1525		NFSVOPUNLOCK(vp, 0);
1526		vrele(vp);
1527
1528		MNT_ILOCK(mp);
1529	}
1530	MNT_IUNLOCK(mp);
1531	return (allerror);
1532}
1533
1534static int
1535nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1536{
1537	struct nfsmount *nmp = VFSTONFS(mp);
1538	struct vfsquery vq;
1539	int error;
1540
1541	bzero(&vq, sizeof(vq));
1542	switch (op) {
1543#if 0
1544	case VFS_CTL_NOLOCKS:
1545		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1546 		if (req->oldptr != NULL) {
1547 			error = SYSCTL_OUT(req, &val, sizeof(val));
1548 			if (error)
1549 				return (error);
1550 		}
1551 		if (req->newptr != NULL) {
1552 			error = SYSCTL_IN(req, &val, sizeof(val));
1553 			if (error)
1554 				return (error);
1555			if (val)
1556				nmp->nm_flag |= NFSMNT_NOLOCKS;
1557			else
1558				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1559 		}
1560		break;
1561#endif
1562	case VFS_CTL_QUERY:
1563		mtx_lock(&nmp->nm_mtx);
1564		if (nmp->nm_state & NFSSTA_TIMEO)
1565			vq.vq_flags |= VQ_NOTRESP;
1566		mtx_unlock(&nmp->nm_mtx);
1567#if 0
1568		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1569		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1570			vq.vq_flags |= VQ_NOTRESPLOCK;
1571#endif
1572		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1573		break;
1574 	case VFS_CTL_TIMEO:
1575 		if (req->oldptr != NULL) {
1576 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1577 			    sizeof(nmp->nm_tprintf_initial_delay));
1578 			if (error)
1579 				return (error);
1580 		}
1581 		if (req->newptr != NULL) {
1582			error = vfs_suser(mp, req->td);
1583			if (error)
1584				return (error);
1585 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1586 			    sizeof(nmp->nm_tprintf_initial_delay));
1587 			if (error)
1588 				return (error);
1589 			if (nmp->nm_tprintf_initial_delay < 0)
1590 				nmp->nm_tprintf_initial_delay = 0;
1591 		}
1592		break;
1593	default:
1594		return (ENOTSUP);
1595	}
1596	return (0);
1597}
1598
1599/*
1600 * Extract the information needed by the nlm from the nfs vnode.
1601 */
1602static void
1603nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1604    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1605    struct timeval *timeop)
1606{
1607	struct nfsmount *nmp;
1608	struct nfsnode *np = VTONFS(vp);
1609
1610	nmp = VFSTONFS(vp->v_mount);
1611	if (fhlenp != NULL)
1612		*fhlenp = (size_t)np->n_fhp->nfh_len;
1613	if (fhp != NULL)
1614		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1615	if (sp != NULL)
1616		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1617	if (is_v3p != NULL)
1618		*is_v3p = NFS_ISV3(vp);
1619	if (sizep != NULL)
1620		*sizep = np->n_size;
1621	if (timeop != NULL) {
1622		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1623		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1624	}
1625}
1626
1627