nfs_clvfsops.c revision 220739
1191783Srmacklem/*-
2191783Srmacklem * Copyright (c) 1989, 1993, 1995
3191783Srmacklem *	The Regents of the University of California.  All rights reserved.
4191783Srmacklem *
5191783Srmacklem * This code is derived from software contributed to Berkeley by
6191783Srmacklem * Rick Macklem at The University of Guelph.
7191783Srmacklem *
8191783Srmacklem * Redistribution and use in source and binary forms, with or without
9191783Srmacklem * modification, are permitted provided that the following conditions
10191783Srmacklem * are met:
11191783Srmacklem * 1. Redistributions of source code must retain the above copyright
12191783Srmacklem *    notice, this list of conditions and the following disclaimer.
13191783Srmacklem * 2. Redistributions in binary form must reproduce the above copyright
14191783Srmacklem *    notice, this list of conditions and the following disclaimer in the
15191783Srmacklem *    documentation and/or other materials provided with the distribution.
16191783Srmacklem * 4. Neither the name of the University nor the names of its contributors
17191783Srmacklem *    may be used to endorse or promote products derived from this software
18191783Srmacklem *    without specific prior written permission.
19191783Srmacklem *
20191783Srmacklem * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21191783Srmacklem * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22191783Srmacklem * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23191783Srmacklem * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24191783Srmacklem * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25191783Srmacklem * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26191783Srmacklem * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27191783Srmacklem * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28191783Srmacklem * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29191783Srmacklem * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30191783Srmacklem * SUCH DAMAGE.
31191783Srmacklem *
32191783Srmacklem *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33191783Srmacklem */
34191783Srmacklem
35191783Srmacklem#include <sys/cdefs.h>
36191783Srmacklem__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 220739 2011-04-17 14:10:12Z rmacklem $");
37191783Srmacklem
38191783Srmacklem
39191783Srmacklem#include "opt_bootp.h"
40191783Srmacklem#include "opt_nfsroot.h"
41191783Srmacklem
42191783Srmacklem#include <sys/param.h>
43191783Srmacklem#include <sys/systm.h>
44191783Srmacklem#include <sys/kernel.h>
45191783Srmacklem#include <sys/bio.h>
46191783Srmacklem#include <sys/buf.h>
47191783Srmacklem#include <sys/clock.h>
48193066Sjamie#include <sys/jail.h>
49220739Srmacklem#include <sys/limits.h>
50191783Srmacklem#include <sys/lock.h>
51191783Srmacklem#include <sys/malloc.h>
52191783Srmacklem#include <sys/mbuf.h>
53191783Srmacklem#include <sys/module.h>
54191783Srmacklem#include <sys/mount.h>
55191783Srmacklem#include <sys/proc.h>
56191783Srmacklem#include <sys/socket.h>
57191783Srmacklem#include <sys/socketvar.h>
58191783Srmacklem#include <sys/sockio.h>
59191783Srmacklem#include <sys/sysctl.h>
60191783Srmacklem#include <sys/vnode.h>
61191783Srmacklem#include <sys/signalvar.h>
62191783Srmacklem
63191783Srmacklem#include <vm/vm.h>
64191783Srmacklem#include <vm/vm_extern.h>
65191783Srmacklem#include <vm/uma.h>
66191783Srmacklem
67191783Srmacklem#include <net/if.h>
68191783Srmacklem#include <net/route.h>
69191783Srmacklem#include <netinet/in.h>
70191783Srmacklem
71191783Srmacklem#include <fs/nfs/nfsport.h>
72191783Srmacklem#include <fs/nfsclient/nfsnode.h>
73191783Srmacklem#include <fs/nfsclient/nfsmount.h>
74191783Srmacklem#include <fs/nfsclient/nfs.h>
75191783Srmacklem#include <fs/nfsclient/nfsdiskless.h>
76191783Srmacklem
77219028SnetchildFEATURE(nfscl, "NFSv4 client");
78219028Snetchild
79191783Srmacklemextern int nfscl_ticks;
80191783Srmacklemextern struct timeval nfsboottime;
81191783Srmacklemextern struct nfsstats	newnfsstats;
82191783Srmacklem
83191783SrmacklemMALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
84191783SrmacklemMALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
85191783Srmacklem
86191783SrmacklemSYSCTL_DECL(_vfs_newnfs);
87191783SrmacklemSYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
88191783Srmacklem	&newnfsstats, nfsstats, "S,nfsstats");
89191783Srmacklemstatic int nfs_ip_paranoia = 1;
90191783SrmacklemSYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
91191783Srmacklem    &nfs_ip_paranoia, 0, "");
92191783Srmacklemstatic int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
93191783SrmacklemSYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
94191783Srmacklem        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
95191783Srmacklem/* how long between console messages "nfs server foo not responding" */
96191783Srmacklemstatic int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
97191783SrmacklemSYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
98191783Srmacklem        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
99191783Srmacklem
100192585Srmacklemstatic void	nfs_sec_name(char *, int *);
101191783Srmacklemstatic void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
102214048Srmacklem		    struct nfs_args *argp, const char *, struct ucred *,
103214048Srmacklem		    struct thread *);
104191783Srmacklemstatic int	mountnfs(struct nfs_args *, struct mount *,
105191783Srmacklem		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
106203303Srmacklem		    struct vnode **, struct ucred *, struct thread *, int);
107214053Srmacklemstatic void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
108216931Srmacklem		    struct sockaddr_storage *, int *, off_t *,
109216931Srmacklem		    struct timeval *);
110191783Srmacklemstatic vfs_mount_t nfs_mount;
111191783Srmacklemstatic vfs_cmount_t nfs_cmount;
112191783Srmacklemstatic vfs_unmount_t nfs_unmount;
113191783Srmacklemstatic vfs_root_t nfs_root;
114191783Srmacklemstatic vfs_statfs_t nfs_statfs;
115191783Srmacklemstatic vfs_sync_t nfs_sync;
116191783Srmacklemstatic vfs_sysctl_t nfs_sysctl;
117191783Srmacklem
118191783Srmacklem/*
119191783Srmacklem * nfs vfs operations.
120191783Srmacklem */
121191783Srmacklemstatic struct vfsops nfs_vfsops = {
122191783Srmacklem	.vfs_init =		ncl_init,
123191783Srmacklem	.vfs_mount =		nfs_mount,
124191783Srmacklem	.vfs_cmount =		nfs_cmount,
125191783Srmacklem	.vfs_root =		nfs_root,
126191783Srmacklem	.vfs_statfs =		nfs_statfs,
127191783Srmacklem	.vfs_sync =		nfs_sync,
128191783Srmacklem	.vfs_uninit =		ncl_uninit,
129191783Srmacklem	.vfs_unmount =		nfs_unmount,
130191783Srmacklem	.vfs_sysctl =		nfs_sysctl,
131191783Srmacklem};
132191783SrmacklemVFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
133191783Srmacklem
134191783Srmacklem/* So that loader and kldload(2) can find us, wherever we are.. */
135191783SrmacklemMODULE_VERSION(newnfs, 1);
136191783Srmacklem
137191783Srmacklem/*
138191783Srmacklem * This structure must be filled in by a primary bootstrap or bootstrap
139191783Srmacklem * server for a diskless/dataless machine. It is initialized below just
140191783Srmacklem * to ensure that it is allocated to initialized data (.data not .bss).
141191783Srmacklem */
142191783Srmacklemstruct nfs_diskless newnfs_diskless = { { { 0 } } };
143191783Srmacklemstruct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
144191783Srmacklemint newnfs_diskless_valid = 0;
145191783Srmacklem
146191783SrmacklemSYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
147192145Srmacklem    &newnfs_diskless_valid, 0,
148192145Srmacklem    "Has the diskless struct been filled correctly");
149191783Srmacklem
150191783SrmacklemSYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
151192145Srmacklem    newnfsv3_diskless.root_hostnam, 0, "Path to nfs root");
152191783Srmacklem
153191783SrmacklemSYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
154192145Srmacklem    &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
155192145Srmacklem    "%Ssockaddr_in", "Diskless root nfs address");
156191783Srmacklem
157191783Srmacklem
158191783Srmacklemvoid		newnfsargs_ntoh(struct nfs_args *);
159191783Srmacklemstatic int	nfs_mountdiskless(char *,
160191783Srmacklem		    struct sockaddr_in *, struct nfs_args *,
161191783Srmacklem		    struct thread *, struct vnode **, struct mount *);
162191783Srmacklemstatic void	nfs_convert_diskless(void);
163191783Srmacklemstatic void	nfs_convert_oargs(struct nfs_args *args,
164191783Srmacklem		    struct onfs_args *oargs);
165191783Srmacklem
166191783Srmacklemint
167191783Srmacklemnewnfs_iosize(struct nfsmount *nmp)
168191783Srmacklem{
169191783Srmacklem	int iosize, maxio;
170191783Srmacklem
171191783Srmacklem	/* First, set the upper limit for iosize */
172191783Srmacklem	if (nmp->nm_flag & NFSMNT_NFSV4) {
173191783Srmacklem		maxio = NFS_MAXBSIZE;
174191783Srmacklem	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
175191783Srmacklem		if (nmp->nm_sotype == SOCK_DGRAM)
176191783Srmacklem			maxio = NFS_MAXDGRAMDATA;
177191783Srmacklem		else
178191783Srmacklem			maxio = NFS_MAXBSIZE;
179191783Srmacklem	} else {
180191783Srmacklem		maxio = NFS_V2MAXDATA;
181191783Srmacklem	}
182191783Srmacklem	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
183191783Srmacklem		nmp->nm_rsize = maxio;
184191783Srmacklem	if (nmp->nm_rsize > MAXBSIZE)
185191783Srmacklem		nmp->nm_rsize = MAXBSIZE;
186191783Srmacklem	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
187191783Srmacklem		nmp->nm_readdirsize = maxio;
188191783Srmacklem	if (nmp->nm_readdirsize > nmp->nm_rsize)
189191783Srmacklem		nmp->nm_readdirsize = nmp->nm_rsize;
190191783Srmacklem	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
191191783Srmacklem		nmp->nm_wsize = maxio;
192191783Srmacklem	if (nmp->nm_wsize > MAXBSIZE)
193191783Srmacklem		nmp->nm_wsize = MAXBSIZE;
194191783Srmacklem
195191783Srmacklem	/*
196191783Srmacklem	 * Calculate the size used for io buffers.  Use the larger
197191783Srmacklem	 * of the two sizes to minimise nfs requests but make sure
198191783Srmacklem	 * that it is at least one VM page to avoid wasting buffer
199191783Srmacklem	 * space.
200191783Srmacklem	 */
201191783Srmacklem	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
202191783Srmacklem	iosize = imax(iosize, PAGE_SIZE);
203191783Srmacklem	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
204191783Srmacklem	return (iosize);
205191783Srmacklem}
206191783Srmacklem
207191783Srmacklemstatic void
208191783Srmacklemnfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
209191783Srmacklem{
210191783Srmacklem
211191783Srmacklem	args->version = NFS_ARGSVERSION;
212191783Srmacklem	args->addr = oargs->addr;
213191783Srmacklem	args->addrlen = oargs->addrlen;
214191783Srmacklem	args->sotype = oargs->sotype;
215191783Srmacklem	args->proto = oargs->proto;
216191783Srmacklem	args->fh = oargs->fh;
217191783Srmacklem	args->fhsize = oargs->fhsize;
218191783Srmacklem	args->flags = oargs->flags;
219191783Srmacklem	args->wsize = oargs->wsize;
220191783Srmacklem	args->rsize = oargs->rsize;
221191783Srmacklem	args->readdirsize = oargs->readdirsize;
222191783Srmacklem	args->timeo = oargs->timeo;
223191783Srmacklem	args->retrans = oargs->retrans;
224191783Srmacklem	args->readahead = oargs->readahead;
225191783Srmacklem	args->hostname = oargs->hostname;
226191783Srmacklem}
227191783Srmacklem
228191783Srmacklemstatic void
229191783Srmacklemnfs_convert_diskless(void)
230191783Srmacklem{
231191783Srmacklem
232191783Srmacklem	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
233192145Srmacklem	    sizeof (struct ifaliasreq));
234191783Srmacklem	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
235192145Srmacklem	    sizeof (struct sockaddr_in));
236192145Srmacklem	nfs_convert_oargs(&newnfsv3_diskless.root_args,
237192145Srmacklem	    &newnfs_diskless.root_args);
238191783Srmacklem	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
239191783Srmacklem		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
240192145Srmacklem		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
241192145Srmacklem		    NFSX_MYFH);
242191783Srmacklem	} else {
243191783Srmacklem		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
244192145Srmacklem		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
245192145Srmacklem		    NFSX_V2FH);
246191783Srmacklem	}
247191783Srmacklem	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
248192145Srmacklem	    sizeof(struct sockaddr_in));
249192145Srmacklem	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam,
250192145Srmacklem	    MNAMELEN);
251191783Srmacklem	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
252191783Srmacklem	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
253192145Srmacklem	    MAXHOSTNAMELEN);
254191783Srmacklem	newnfs_diskless_valid = 3;
255191783Srmacklem}
256191783Srmacklem
257191783Srmacklem/*
258191783Srmacklem * nfs statfs call
259191783Srmacklem */
260191783Srmacklemstatic int
261191990Sattilionfs_statfs(struct mount *mp, struct statfs *sbp)
262191783Srmacklem{
263191783Srmacklem	struct vnode *vp;
264191990Sattilio	struct thread *td;
265191783Srmacklem	struct nfsmount *nmp = VFSTONFS(mp);
266191783Srmacklem	struct nfsvattr nfsva;
267191783Srmacklem	struct nfsfsinfo fs;
268191783Srmacklem	struct nfsstatfs sb;
269191783Srmacklem	int error = 0, attrflag, gotfsinfo = 0, ret;
270191783Srmacklem	struct nfsnode *np;
271191783Srmacklem
272191990Sattilio	td = curthread;
273191990Sattilio
274191783Srmacklem	error = vfs_busy(mp, MBF_NOWAIT);
275191783Srmacklem	if (error)
276191783Srmacklem		return (error);
277220732Srmacklem	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
278191783Srmacklem	if (error) {
279191783Srmacklem		vfs_unbusy(mp);
280191783Srmacklem		return (error);
281191783Srmacklem	}
282191783Srmacklem	vp = NFSTOV(np);
283191783Srmacklem	mtx_lock(&nmp->nm_mtx);
284191783Srmacklem	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
285191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
286191783Srmacklem		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
287191783Srmacklem		    &attrflag, NULL);
288191783Srmacklem		if (!error)
289191783Srmacklem			gotfsinfo = 1;
290191783Srmacklem	} else
291191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
292191783Srmacklem	if (!error)
293191783Srmacklem		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
294191783Srmacklem		    &attrflag, NULL);
295191783Srmacklem	if (attrflag == 0) {
296191783Srmacklem		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
297191783Srmacklem		    td->td_ucred, td, &nfsva, NULL);
298191783Srmacklem		if (ret) {
299191783Srmacklem			/*
300191783Srmacklem			 * Just set default values to get things going.
301191783Srmacklem			 */
302191783Srmacklem			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
303191783Srmacklem			nfsva.na_vattr.va_type = VDIR;
304191783Srmacklem			nfsva.na_vattr.va_mode = 0777;
305191783Srmacklem			nfsva.na_vattr.va_nlink = 100;
306191783Srmacklem			nfsva.na_vattr.va_uid = (uid_t)0;
307191783Srmacklem			nfsva.na_vattr.va_gid = (gid_t)0;
308191783Srmacklem			nfsva.na_vattr.va_fileid = 2;
309191783Srmacklem			nfsva.na_vattr.va_gen = 1;
310191783Srmacklem			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
311191783Srmacklem			nfsva.na_vattr.va_size = 512 * 1024;
312191783Srmacklem		}
313191783Srmacklem	}
314191783Srmacklem	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
315191783Srmacklem	if (!error) {
316191783Srmacklem	    mtx_lock(&nmp->nm_mtx);
317191783Srmacklem	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
318191783Srmacklem		nfscl_loadfsinfo(nmp, &fs);
319191783Srmacklem	    nfscl_loadsbinfo(nmp, &sb, sbp);
320191783Srmacklem	    sbp->f_flags = nmp->nm_flag;
321191783Srmacklem	    sbp->f_iosize = newnfs_iosize(nmp);
322191783Srmacklem	    mtx_unlock(&nmp->nm_mtx);
323191783Srmacklem	    if (sbp != &mp->mnt_stat) {
324191783Srmacklem		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
325191783Srmacklem		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
326191783Srmacklem	    }
327191783Srmacklem	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
328191783Srmacklem	} else if (NFS_ISV4(vp)) {
329191783Srmacklem		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
330191783Srmacklem	}
331191783Srmacklem	vput(vp);
332191783Srmacklem	vfs_unbusy(mp);
333191783Srmacklem	return (error);
334191783Srmacklem}
335191783Srmacklem
336191783Srmacklem/*
337191783Srmacklem * nfs version 3 fsinfo rpc call
338191783Srmacklem */
339191783Srmacklemint
340191783Srmacklemncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
341191783Srmacklem    struct thread *td)
342191783Srmacklem{
343191783Srmacklem	struct nfsfsinfo fs;
344191783Srmacklem	struct nfsvattr nfsva;
345191783Srmacklem	int error, attrflag;
346191783Srmacklem
347191783Srmacklem	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
348191783Srmacklem	if (!error) {
349191783Srmacklem		if (attrflag)
350191783Srmacklem			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
351191783Srmacklem			    1);
352191783Srmacklem		mtx_lock(&nmp->nm_mtx);
353191783Srmacklem		nfscl_loadfsinfo(nmp, &fs);
354191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
355191783Srmacklem	}
356191783Srmacklem	return (error);
357191783Srmacklem}
358191783Srmacklem
359191783Srmacklem/*
360191783Srmacklem * Mount a remote root fs via. nfs. This depends on the info in the
361191783Srmacklem * newnfs_diskless structure that has been filled in properly by some primary
362191783Srmacklem * bootstrap.
363191783Srmacklem * It goes something like this:
364191783Srmacklem * - do enough of "ifconfig" by calling ifioctl() so that the system
365191783Srmacklem *   can talk to the server
366191783Srmacklem * - If newnfs_diskless.mygateway is filled in, use that address as
367191783Srmacklem *   a default gateway.
368191783Srmacklem * - build the rootfs mount point and call mountnfs() to do the rest.
369191783Srmacklem *
370191783Srmacklem * It is assumed to be safe to read, modify, and write the nfsv3_diskless
371191783Srmacklem * structure, as well as other global NFS client variables here, as
372192145Srmacklem * nfs_mountroot() will be called once in the boot before any other NFS
373191783Srmacklem * client activity occurs.
374191783Srmacklem */
375191783Srmacklemint
376192145Srmacklemncl_mountroot(struct mount *mp)
377191783Srmacklem{
378192145Srmacklem	struct thread *td = curthread;
379191783Srmacklem	struct nfsv3_diskless *nd = &newnfsv3_diskless;
380191783Srmacklem	struct socket *so;
381191783Srmacklem	struct vnode *vp;
382191783Srmacklem	struct ifreq ir;
383193066Sjamie	int error;
384191783Srmacklem	u_long l;
385191783Srmacklem	char buf[128];
386191783Srmacklem	char *cp;
387191783Srmacklem
388191783Srmacklem#if defined(BOOTP_NFSROOT) && defined(BOOTP)
389192145Srmacklem	bootpc_init();		/* use bootp to get nfs_diskless filled in */
390191783Srmacklem#elif defined(NFS_ROOT)
391191783Srmacklem	nfs_setup_diskless();
392191783Srmacklem#endif
393191783Srmacklem
394191783Srmacklem	if (newnfs_diskless_valid == 0)
395191783Srmacklem		return (-1);
396191783Srmacklem	if (newnfs_diskless_valid == 1)
397191783Srmacklem		nfs_convert_diskless();
398191783Srmacklem
399191783Srmacklem	/*
400191783Srmacklem	 * XXX splnet, so networks will receive...
401191783Srmacklem	 */
402191783Srmacklem	splnet();
403191783Srmacklem
404191783Srmacklem	/*
405191783Srmacklem	 * Do enough of ifconfig(8) so that the critical net interface can
406191783Srmacklem	 * talk to the server.
407191783Srmacklem	 */
408191783Srmacklem	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
409191783Srmacklem	    td->td_ucred, td);
410191783Srmacklem	if (error)
411192145Srmacklem		panic("nfs_mountroot: socreate(%04x): %d",
412191783Srmacklem			nd->myif.ifra_addr.sa_family, error);
413191783Srmacklem
414191783Srmacklem#if 0 /* XXX Bad idea */
415191783Srmacklem	/*
416191783Srmacklem	 * We might not have been told the right interface, so we pass
417191783Srmacklem	 * over the first ten interfaces of the same kind, until we get
418191783Srmacklem	 * one of them configured.
419191783Srmacklem	 */
420191783Srmacklem
421191783Srmacklem	for (i = strlen(nd->myif.ifra_name) - 1;
422191783Srmacklem		nd->myif.ifra_name[i] >= '0' &&
423191783Srmacklem		nd->myif.ifra_name[i] <= '9';
424191783Srmacklem		nd->myif.ifra_name[i] ++) {
425191783Srmacklem		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
426191783Srmacklem		if(!error)
427191783Srmacklem			break;
428191783Srmacklem	}
429191783Srmacklem#endif
430191783Srmacklem	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
431191783Srmacklem	if (error)
432192145Srmacklem		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
433191783Srmacklem	if ((cp = getenv("boot.netif.mtu")) != NULL) {
434191783Srmacklem		ir.ifr_mtu = strtol(cp, NULL, 10);
435191783Srmacklem		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
436191783Srmacklem		freeenv(cp);
437191783Srmacklem		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
438191783Srmacklem		if (error)
439192145Srmacklem			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
440191783Srmacklem	}
441191783Srmacklem	soclose(so);
442191783Srmacklem
443191783Srmacklem	/*
444191783Srmacklem	 * If the gateway field is filled in, set it as the default route.
445191783Srmacklem	 * Note that pxeboot will set a default route of 0 if the route
446191783Srmacklem	 * is not set by the DHCP server.  Check also for a value of 0
447191783Srmacklem	 * to avoid panicking inappropriately in that situation.
448191783Srmacklem	 */
449191783Srmacklem	if (nd->mygateway.sin_len != 0 &&
450191783Srmacklem	    nd->mygateway.sin_addr.s_addr != 0) {
451191783Srmacklem		struct sockaddr_in mask, sin;
452191783Srmacklem
453191783Srmacklem		bzero((caddr_t)&mask, sizeof(mask));
454191783Srmacklem		sin = mask;
455191783Srmacklem		sin.sin_family = AF_INET;
456191783Srmacklem		sin.sin_len = sizeof(sin);
457192145Srmacklem                /* XXX MRT use table 0 for this sort of thing */
458218757Sbz		CURVNET_SET(TD_TO_VNET(td));
459191783Srmacklem		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
460191783Srmacklem		    (struct sockaddr *)&nd->mygateway,
461191783Srmacklem		    (struct sockaddr *)&mask,
462191783Srmacklem		    RTF_UP | RTF_GATEWAY, NULL);
463218757Sbz		CURVNET_RESTORE();
464191783Srmacklem		if (error)
465192145Srmacklem			panic("nfs_mountroot: RTM_ADD: %d", error);
466191783Srmacklem	}
467191783Srmacklem
468191783Srmacklem	/*
469191783Srmacklem	 * Create the rootfs mount point.
470191783Srmacklem	 */
471191783Srmacklem	nd->root_args.fh = nd->root_fh;
472191783Srmacklem	nd->root_args.fhsize = nd->root_fhsize;
473191783Srmacklem	l = ntohl(nd->root_saddr.sin_addr.s_addr);
474191783Srmacklem	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
475191783Srmacklem		(l >> 24) & 0xff, (l >> 16) & 0xff,
476191783Srmacklem		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
477191783Srmacklem	printf("NFS ROOT: %s\n", buf);
478192145Srmacklem	nd->root_args.hostname = buf;
479191783Srmacklem	if ((error = nfs_mountdiskless(buf,
480191783Srmacklem	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
481191783Srmacklem		return (error);
482191783Srmacklem	}
483191783Srmacklem
484191783Srmacklem	/*
485191783Srmacklem	 * This is not really an nfs issue, but it is much easier to
486191783Srmacklem	 * set hostname here and then let the "/etc/rc.xxx" files
487191783Srmacklem	 * mount the right /var based upon its preset value.
488191783Srmacklem	 */
489193066Sjamie	mtx_lock(&prison0.pr_mtx);
490194118Sjamie	strlcpy(prison0.pr_hostname, nd->my_hostnam,
491194118Sjamie	    sizeof(prison0.pr_hostname));
492193066Sjamie	mtx_unlock(&prison0.pr_mtx);
493191783Srmacklem	inittodr(ntohl(nd->root_time));
494191783Srmacklem	return (0);
495191783Srmacklem}
496191783Srmacklem
497191783Srmacklem/*
498191783Srmacklem * Internal version of mount system call for diskless setup.
499191783Srmacklem */
500191783Srmacklemstatic int
501191783Srmacklemnfs_mountdiskless(char *path,
502191783Srmacklem    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
503191783Srmacklem    struct vnode **vpp, struct mount *mp)
504191783Srmacklem{
505191783Srmacklem	struct sockaddr *nam;
506191783Srmacklem	int error;
507191783Srmacklem
508191783Srmacklem	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
509191783Srmacklem	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
510203303Srmacklem	    td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
511192145Srmacklem		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
512191783Srmacklem		return (error);
513191783Srmacklem	}
514191783Srmacklem	return (0);
515191783Srmacklem}
516191783Srmacklem
517191783Srmacklemstatic void
518192585Srmacklemnfs_sec_name(char *sec, int *flagsp)
519192585Srmacklem{
520192585Srmacklem	if (!strcmp(sec, "krb5"))
521192585Srmacklem		*flagsp |= NFSMNT_KERB;
522192585Srmacklem	else if (!strcmp(sec, "krb5i"))
523192585Srmacklem		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
524192585Srmacklem	else if (!strcmp(sec, "krb5p"))
525192585Srmacklem		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
526192585Srmacklem}
527192585Srmacklem
528192585Srmacklemstatic void
529191783Srmacklemnfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
530214048Srmacklem    const char *hostname, struct ucred *cred, struct thread *td)
531191783Srmacklem{
532191783Srmacklem	int s;
533191783Srmacklem	int adjsock;
534214048Srmacklem	char *p;
535191783Srmacklem
536191783Srmacklem	s = splnet();
537191783Srmacklem
538191783Srmacklem	/*
539191783Srmacklem	 * Set read-only flag if requested; otherwise, clear it if this is
540191783Srmacklem	 * an update.  If this is not an update, then either the read-only
541191783Srmacklem	 * flag is already clear, or this is a root mount and it was set
542191783Srmacklem	 * intentionally at some previous point.
543191783Srmacklem	 */
544191783Srmacklem	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
545191783Srmacklem		MNT_ILOCK(mp);
546191783Srmacklem		mp->mnt_flag |= MNT_RDONLY;
547191783Srmacklem		MNT_IUNLOCK(mp);
548191783Srmacklem	} else if (mp->mnt_flag & MNT_UPDATE) {
549191783Srmacklem		MNT_ILOCK(mp);
550191783Srmacklem		mp->mnt_flag &= ~MNT_RDONLY;
551191783Srmacklem		MNT_IUNLOCK(mp);
552191783Srmacklem	}
553191783Srmacklem
554191783Srmacklem	/*
555191783Srmacklem	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
556191783Srmacklem	 * no sense in that context.  Also, set up appropriate retransmit
557191783Srmacklem	 * and soft timeout behavior.
558191783Srmacklem	 */
559191783Srmacklem	if (argp->sotype == SOCK_STREAM) {
560191783Srmacklem		nmp->nm_flag &= ~NFSMNT_NOCONN;
561191783Srmacklem		nmp->nm_timeo = NFS_MAXTIMEO;
562220739Srmacklem		if ((argp->flags & NFSMNT_NFSV4) != 0)
563220739Srmacklem			nmp->nm_retry = INT_MAX;
564220739Srmacklem		else
565220739Srmacklem			nmp->nm_retry = NFS_RETRANS_TCP;
566191783Srmacklem	}
567191783Srmacklem
568220739Srmacklem	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
569220739Srmacklem	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
570220739Srmacklem		argp->flags &= ~NFSMNT_RDIRPLUS;
571191783Srmacklem		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
572220739Srmacklem	}
573191783Srmacklem
574220739Srmacklem	/* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */
575220739Srmacklem	if ((argp->flags & NFSMNT_NFSV4) != 0) {
576220739Srmacklem		argp->flags &= ~NFSMNT_RESVPORT;
577220739Srmacklem		nmp->nm_flag &= ~NFSMNT_RESVPORT;
578220739Srmacklem	}
579220739Srmacklem
580220739Srmacklem	/* Re-bind if rsrvd port requested and wasn't on one */
581220739Srmacklem	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
582220739Srmacklem		  && (argp->flags & NFSMNT_RESVPORT);
583191783Srmacklem	/* Also re-bind if we're switching to/from a connected UDP socket */
584220739Srmacklem	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
585191783Srmacklem		    (argp->flags & NFSMNT_NOCONN));
586191783Srmacklem
587191783Srmacklem	/* Update flags atomically.  Don't change the lock bits. */
588191783Srmacklem	nmp->nm_flag = argp->flags | nmp->nm_flag;
589191783Srmacklem	splx(s);
590191783Srmacklem
591191783Srmacklem	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
592191783Srmacklem		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
593191783Srmacklem		if (nmp->nm_timeo < NFS_MINTIMEO)
594191783Srmacklem			nmp->nm_timeo = NFS_MINTIMEO;
595191783Srmacklem		else if (nmp->nm_timeo > NFS_MAXTIMEO)
596191783Srmacklem			nmp->nm_timeo = NFS_MAXTIMEO;
597191783Srmacklem	}
598191783Srmacklem
599191783Srmacklem	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
600191783Srmacklem		nmp->nm_retry = argp->retrans;
601191783Srmacklem		if (nmp->nm_retry > NFS_MAXREXMIT)
602191783Srmacklem			nmp->nm_retry = NFS_MAXREXMIT;
603191783Srmacklem	}
604191783Srmacklem
605191783Srmacklem	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
606191783Srmacklem		nmp->nm_wsize = argp->wsize;
607191783Srmacklem		/* Round down to multiple of blocksize */
608191783Srmacklem		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
609191783Srmacklem		if (nmp->nm_wsize <= 0)
610191783Srmacklem			nmp->nm_wsize = NFS_FABLKSIZE;
611191783Srmacklem	}
612191783Srmacklem
613191783Srmacklem	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
614191783Srmacklem		nmp->nm_rsize = argp->rsize;
615191783Srmacklem		/* Round down to multiple of blocksize */
616191783Srmacklem		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
617191783Srmacklem		if (nmp->nm_rsize <= 0)
618191783Srmacklem			nmp->nm_rsize = NFS_FABLKSIZE;
619191783Srmacklem	}
620191783Srmacklem
621191783Srmacklem	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
622191783Srmacklem		nmp->nm_readdirsize = argp->readdirsize;
623191783Srmacklem	}
624191783Srmacklem
625191783Srmacklem	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
626191783Srmacklem		nmp->nm_acregmin = argp->acregmin;
627191783Srmacklem	else
628191783Srmacklem		nmp->nm_acregmin = NFS_MINATTRTIMO;
629191783Srmacklem	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
630191783Srmacklem		nmp->nm_acregmax = argp->acregmax;
631191783Srmacklem	else
632191783Srmacklem		nmp->nm_acregmax = NFS_MAXATTRTIMO;
633191783Srmacklem	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
634191783Srmacklem		nmp->nm_acdirmin = argp->acdirmin;
635191783Srmacklem	else
636191783Srmacklem		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
637191783Srmacklem	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
638191783Srmacklem		nmp->nm_acdirmax = argp->acdirmax;
639191783Srmacklem	else
640191783Srmacklem		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
641191783Srmacklem	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
642191783Srmacklem		nmp->nm_acdirmin = nmp->nm_acdirmax;
643191783Srmacklem	if (nmp->nm_acregmin > nmp->nm_acregmax)
644191783Srmacklem		nmp->nm_acregmin = nmp->nm_acregmax;
645191783Srmacklem
646191783Srmacklem	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
647191783Srmacklem		if (argp->readahead <= NFS_MAXRAHEAD)
648191783Srmacklem			nmp->nm_readahead = argp->readahead;
649191783Srmacklem		else
650191783Srmacklem			nmp->nm_readahead = NFS_MAXRAHEAD;
651191783Srmacklem	}
652191783Srmacklem	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
653191783Srmacklem		if (argp->wcommitsize < nmp->nm_wsize)
654191783Srmacklem			nmp->nm_wcommitsize = nmp->nm_wsize;
655191783Srmacklem		else
656191783Srmacklem			nmp->nm_wcommitsize = argp->wcommitsize;
657191783Srmacklem	}
658191783Srmacklem
659191783Srmacklem	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
660191783Srmacklem		    (nmp->nm_soproto != argp->proto));
661191783Srmacklem
662191783Srmacklem	if (nmp->nm_client != NULL && adjsock) {
663191783Srmacklem		int haslock = 0, error = 0;
664191783Srmacklem
665191783Srmacklem		if (nmp->nm_sotype == SOCK_STREAM) {
666191783Srmacklem			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
667191783Srmacklem			if (!error)
668191783Srmacklem				haslock = 1;
669191783Srmacklem		}
670191783Srmacklem		if (!error) {
671191783Srmacklem		    newnfs_disconnect(&nmp->nm_sockreq);
672191783Srmacklem		    if (haslock)
673191783Srmacklem			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
674191783Srmacklem		    nmp->nm_sotype = argp->sotype;
675191783Srmacklem		    nmp->nm_soproto = argp->proto;
676191783Srmacklem		    if (nmp->nm_sotype == SOCK_DGRAM)
677191783Srmacklem			while (newnfs_connect(nmp, &nmp->nm_sockreq,
678191783Srmacklem			    cred, td, 0)) {
679191783Srmacklem				printf("newnfs_args: retrying connect\n");
680207170Srmacklem				(void) nfs_catnap(PSOCK, 0, "newnfscon");
681191783Srmacklem			}
682191783Srmacklem		}
683191783Srmacklem	} else {
684191783Srmacklem		nmp->nm_sotype = argp->sotype;
685191783Srmacklem		nmp->nm_soproto = argp->proto;
686191783Srmacklem	}
687214048Srmacklem
688214048Srmacklem	if (hostname != NULL) {
689214048Srmacklem		strlcpy(nmp->nm_hostname, hostname,
690214048Srmacklem		    sizeof(nmp->nm_hostname));
691214048Srmacklem		p = strchr(nmp->nm_hostname, ':');
692214048Srmacklem		if (p != NULL)
693214048Srmacklem			*p = '\0';
694214048Srmacklem	}
695191783Srmacklem}
696191783Srmacklem
697192585Srmacklemstatic const char *nfs_opts[] = { "from",
698191783Srmacklem    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
699191783Srmacklem    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
700192585Srmacklem    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
701192585Srmacklem    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
702192585Srmacklem    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
703192585Srmacklem    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
704192585Srmacklem    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
705203303Srmacklem    "negnametimeo",
706191783Srmacklem    NULL };
707191783Srmacklem
708191783Srmacklem/*
709191783Srmacklem * VFS Operations.
710191783Srmacklem *
711191783Srmacklem * mount system call
712191783Srmacklem * It seems a bit dumb to copyinstr() the host and path here and then
713191783Srmacklem * bcopy() them in mountnfs(), but I wanted to detect errors before
714191783Srmacklem * doing the sockargs() call because sockargs() allocates an mbuf and
715191783Srmacklem * an error after that means that I have to release the mbuf.
716191783Srmacklem */
717191783Srmacklem/* ARGSUSED */
718191783Srmacklemstatic int
719191990Sattilionfs_mount(struct mount *mp)
720191783Srmacklem{
721191783Srmacklem	struct nfs_args args = {
722191783Srmacklem	    .version = NFS_ARGSVERSION,
723191783Srmacklem	    .addr = NULL,
724191783Srmacklem	    .addrlen = sizeof (struct sockaddr_in),
725191783Srmacklem	    .sotype = SOCK_STREAM,
726191783Srmacklem	    .proto = 0,
727191783Srmacklem	    .fh = NULL,
728191783Srmacklem	    .fhsize = 0,
729220739Srmacklem	    .flags = NFSMNT_RESVPORT,
730191783Srmacklem	    .wsize = NFS_WSIZE,
731191783Srmacklem	    .rsize = NFS_RSIZE,
732191783Srmacklem	    .readdirsize = NFS_READDIRSIZE,
733191783Srmacklem	    .timeo = 10,
734191783Srmacklem	    .retrans = NFS_RETRANS,
735191783Srmacklem	    .readahead = NFS_DEFRAHEAD,
736191783Srmacklem	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
737191783Srmacklem	    .hostname = NULL,
738191783Srmacklem	    /* args version 4 */
739191783Srmacklem	    .acregmin = NFS_MINATTRTIMO,
740191783Srmacklem	    .acregmax = NFS_MAXATTRTIMO,
741191783Srmacklem	    .acdirmin = NFS_MINDIRATTRTIMO,
742191783Srmacklem	    .acdirmax = NFS_MAXDIRATTRTIMO,
743191783Srmacklem	    .dirlen = 0,
744191783Srmacklem	    .krbnamelen = 0,
745192585Srmacklem	    .srvkrbnamelen = 0,
746191783Srmacklem	};
747192585Srmacklem	int error = 0, ret, len;
748192585Srmacklem	struct sockaddr *nam = NULL;
749191783Srmacklem	struct vnode *vp;
750191990Sattilio	struct thread *td;
751191783Srmacklem	char hst[MNAMELEN];
752191783Srmacklem	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
753192585Srmacklem	char *opt, *name, *secname;
754203303Srmacklem	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
755191783Srmacklem
756191783Srmacklem	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
757191783Srmacklem		error = EINVAL;
758191783Srmacklem		goto out;
759191783Srmacklem	}
760191783Srmacklem
761191990Sattilio	td = curthread;
762191783Srmacklem	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
763192145Srmacklem		error = ncl_mountroot(mp);
764191783Srmacklem		goto out;
765191783Srmacklem	}
766191783Srmacklem
767192585Srmacklem	nfscl_init();
768191783Srmacklem
769192585Srmacklem	/* Handle the new style options. */
770192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
771192585Srmacklem		args.flags |= NFSMNT_NOCONN;
772192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
773192585Srmacklem		args.flags |= NFSMNT_NOCONN;
774192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
775192585Srmacklem		args.flags |= NFSMNT_NOLOCKD;
776192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
777192585Srmacklem		args.flags &= ~NFSMNT_NOLOCKD;
778192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
779192585Srmacklem		args.flags |= NFSMNT_INT;
780192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
781192585Srmacklem		args.flags |= NFSMNT_RDIRPLUS;
782192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
783192585Srmacklem		args.flags |= NFSMNT_RESVPORT;
784192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
785192585Srmacklem		args.flags &= ~NFSMNT_RESVPORT;
786192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
787192585Srmacklem		args.flags |= NFSMNT_SOFT;
788192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
789192585Srmacklem		args.flags &= ~NFSMNT_SOFT;
790192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
791192585Srmacklem		args.sotype = SOCK_DGRAM;
792192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
793192585Srmacklem		args.sotype = SOCK_DGRAM;
794192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
795192585Srmacklem		args.sotype = SOCK_STREAM;
796192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
797192585Srmacklem		args.flags |= NFSMNT_NFSV3;
798192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
799192585Srmacklem		args.flags |= NFSMNT_NFSV4;
800192585Srmacklem		args.sotype = SOCK_STREAM;
801191783Srmacklem	}
802192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
803192585Srmacklem		args.flags |= NFSMNT_ALLGSSNAME;
804192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
805192585Srmacklem		if (opt == NULL) {
806192585Srmacklem			vfs_mount_error(mp, "illegal readdirsize");
807192585Srmacklem			error = EINVAL;
808192585Srmacklem			goto out;
809192585Srmacklem		}
810192585Srmacklem		ret = sscanf(opt, "%d", &args.readdirsize);
811192585Srmacklem		if (ret != 1 || args.readdirsize <= 0) {
812192585Srmacklem			vfs_mount_error(mp, "illegal readdirsize: %s",
813192585Srmacklem			    opt);
814192585Srmacklem			error = EINVAL;
815192585Srmacklem			goto out;
816192585Srmacklem		}
817192585Srmacklem		args.flags |= NFSMNT_READDIRSIZE;
818192585Srmacklem	}
819192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
820192585Srmacklem		if (opt == NULL) {
821192585Srmacklem			vfs_mount_error(mp, "illegal readahead");
822192585Srmacklem			error = EINVAL;
823192585Srmacklem			goto out;
824192585Srmacklem		}
825192585Srmacklem		ret = sscanf(opt, "%d", &args.readahead);
826192585Srmacklem		if (ret != 1 || args.readahead <= 0) {
827192585Srmacklem			vfs_mount_error(mp, "illegal readahead: %s",
828192585Srmacklem			    opt);
829192585Srmacklem			error = EINVAL;
830192585Srmacklem			goto out;
831192585Srmacklem		}
832192585Srmacklem		args.flags |= NFSMNT_READAHEAD;
833192585Srmacklem	}
834192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
835192585Srmacklem		if (opt == NULL) {
836192585Srmacklem			vfs_mount_error(mp, "illegal wsize");
837192585Srmacklem			error = EINVAL;
838192585Srmacklem			goto out;
839192585Srmacklem		}
840192585Srmacklem		ret = sscanf(opt, "%d", &args.wsize);
841192585Srmacklem		if (ret != 1 || args.wsize <= 0) {
842192585Srmacklem			vfs_mount_error(mp, "illegal wsize: %s",
843192585Srmacklem			    opt);
844192585Srmacklem			error = EINVAL;
845192585Srmacklem			goto out;
846192585Srmacklem		}
847192585Srmacklem		args.flags |= NFSMNT_WSIZE;
848192585Srmacklem	}
849192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
850192585Srmacklem		if (opt == NULL) {
851192585Srmacklem			vfs_mount_error(mp, "illegal rsize");
852192585Srmacklem			error = EINVAL;
853192585Srmacklem			goto out;
854192585Srmacklem		}
855192585Srmacklem		ret = sscanf(opt, "%d", &args.rsize);
856192585Srmacklem		if (ret != 1 || args.rsize <= 0) {
857192585Srmacklem			vfs_mount_error(mp, "illegal wsize: %s",
858192585Srmacklem			    opt);
859192585Srmacklem			error = EINVAL;
860192585Srmacklem			goto out;
861192585Srmacklem		}
862192585Srmacklem		args.flags |= NFSMNT_RSIZE;
863192585Srmacklem	}
864192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
865192585Srmacklem		if (opt == NULL) {
866192585Srmacklem			vfs_mount_error(mp, "illegal retrans");
867192585Srmacklem			error = EINVAL;
868192585Srmacklem			goto out;
869192585Srmacklem		}
870192585Srmacklem		ret = sscanf(opt, "%d", &args.retrans);
871192585Srmacklem		if (ret != 1 || args.retrans <= 0) {
872192585Srmacklem			vfs_mount_error(mp, "illegal retrans: %s",
873192585Srmacklem			    opt);
874192585Srmacklem			error = EINVAL;
875192585Srmacklem			goto out;
876192585Srmacklem		}
877192585Srmacklem		args.flags |= NFSMNT_RETRANS;
878192585Srmacklem	}
879192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
880192585Srmacklem		ret = sscanf(opt, "%d", &args.acregmin);
881192585Srmacklem		if (ret != 1 || args.acregmin < 0) {
882192585Srmacklem			vfs_mount_error(mp, "illegal acregmin: %s",
883192585Srmacklem			    opt);
884192585Srmacklem			error = EINVAL;
885192585Srmacklem			goto out;
886192585Srmacklem		}
887192585Srmacklem		args.flags |= NFSMNT_ACREGMIN;
888192585Srmacklem	}
889192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
890192585Srmacklem		ret = sscanf(opt, "%d", &args.acregmax);
891192585Srmacklem		if (ret != 1 || args.acregmax < 0) {
892192585Srmacklem			vfs_mount_error(mp, "illegal acregmax: %s",
893192585Srmacklem			    opt);
894192585Srmacklem			error = EINVAL;
895192585Srmacklem			goto out;
896192585Srmacklem		}
897192585Srmacklem		args.flags |= NFSMNT_ACREGMAX;
898192585Srmacklem	}
899192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
900192585Srmacklem		ret = sscanf(opt, "%d", &args.acdirmin);
901192585Srmacklem		if (ret != 1 || args.acdirmin < 0) {
902192585Srmacklem			vfs_mount_error(mp, "illegal acdirmin: %s",
903192585Srmacklem			    opt);
904192585Srmacklem			error = EINVAL;
905192585Srmacklem			goto out;
906192585Srmacklem		}
907192585Srmacklem		args.flags |= NFSMNT_ACDIRMIN;
908192585Srmacklem	}
909192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
910192585Srmacklem		ret = sscanf(opt, "%d", &args.acdirmax);
911192585Srmacklem		if (ret != 1 || args.acdirmax < 0) {
912192585Srmacklem			vfs_mount_error(mp, "illegal acdirmax: %s",
913192585Srmacklem			    opt);
914192585Srmacklem			error = EINVAL;
915192585Srmacklem			goto out;
916192585Srmacklem		}
917192585Srmacklem		args.flags |= NFSMNT_ACDIRMAX;
918192585Srmacklem	}
919192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
920192585Srmacklem		ret = sscanf(opt, "%d", &args.timeo);
921192585Srmacklem		if (ret != 1 || args.timeo <= 0) {
922192585Srmacklem			vfs_mount_error(mp, "illegal timeout: %s",
923192585Srmacklem			    opt);
924192585Srmacklem			error = EINVAL;
925192585Srmacklem			goto out;
926192585Srmacklem		}
927192585Srmacklem		args.flags |= NFSMNT_TIMEO;
928192585Srmacklem	}
929203303Srmacklem	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
930203303Srmacklem	    == 0) {
931203303Srmacklem		ret = sscanf(opt, "%d", &negnametimeo);
932203303Srmacklem		if (ret != 1 || negnametimeo < 0) {
933203303Srmacklem			vfs_mount_error(mp, "illegal negnametimeo: %s",
934203303Srmacklem			    opt);
935203303Srmacklem			error = EINVAL;
936203303Srmacklem			goto out;
937203303Srmacklem		}
938203303Srmacklem	}
939192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "sec",
940192585Srmacklem		(void **) &secname, NULL) == 0)
941192585Srmacklem		nfs_sec_name(secname, &args.flags);
942191783Srmacklem
943191783Srmacklem	if (mp->mnt_flag & MNT_UPDATE) {
944191783Srmacklem		struct nfsmount *nmp = VFSTONFS(mp);
945191783Srmacklem
946191783Srmacklem		if (nmp == NULL) {
947191783Srmacklem			error = EIO;
948191783Srmacklem			goto out;
949191783Srmacklem		}
950191783Srmacklem		/*
951191783Srmacklem		 * When doing an update, we can't change version,
952191783Srmacklem		 * security, switch lockd strategies or change cookie
953191783Srmacklem		 * translation
954191783Srmacklem		 */
955191783Srmacklem		args.flags = (args.flags &
956191783Srmacklem		    ~(NFSMNT_NFSV3 |
957191783Srmacklem		      NFSMNT_NFSV4 |
958191783Srmacklem		      NFSMNT_KERB |
959191783Srmacklem		      NFSMNT_INTEGRITY |
960191783Srmacklem		      NFSMNT_PRIVACY |
961191783Srmacklem		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
962191783Srmacklem		    (nmp->nm_flag &
963191783Srmacklem			(NFSMNT_NFSV3 |
964191783Srmacklem			 NFSMNT_NFSV4 |
965191783Srmacklem			 NFSMNT_KERB |
966191783Srmacklem			 NFSMNT_INTEGRITY |
967191783Srmacklem			 NFSMNT_PRIVACY |
968191783Srmacklem			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
969214048Srmacklem		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
970191783Srmacklem		goto out;
971191783Srmacklem	}
972191783Srmacklem
973191783Srmacklem	/*
974191783Srmacklem	 * Make the nfs_ip_paranoia sysctl serve as the default connection
975191783Srmacklem	 * or no-connection mode for those protocols that support
976191783Srmacklem	 * no-connection mode (the flag will be cleared later for protocols
977191783Srmacklem	 * that do not support no-connection mode).  This will allow a client
978191783Srmacklem	 * to receive replies from a different IP then the request was
979191783Srmacklem	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
980191783Srmacklem	 * not 0.
981191783Srmacklem	 */
982191783Srmacklem	if (nfs_ip_paranoia == 0)
983191783Srmacklem		args.flags |= NFSMNT_NOCONN;
984192585Srmacklem
985192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
986192585Srmacklem	    &args.fhsize) == 0) {
987208234Srmacklem		if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
988192585Srmacklem			vfs_mount_error(mp, "Bad file handle");
989191783Srmacklem			error = EINVAL;
990191783Srmacklem			goto out;
991191783Srmacklem		}
992192585Srmacklem		bcopy(args.fh, nfh, args.fhsize);
993191783Srmacklem	} else {
994192585Srmacklem		args.fhsize = 0;
995192585Srmacklem	}
996192585Srmacklem
997192585Srmacklem	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
998192585Srmacklem	    &len);
999192585Srmacklem	if (args.hostname == NULL) {
1000192585Srmacklem		vfs_mount_error(mp, "Invalid hostname");
1001192585Srmacklem		error = EINVAL;
1002192585Srmacklem		goto out;
1003192585Srmacklem	}
1004192585Srmacklem	bcopy(args.hostname, hst, MNAMELEN);
1005192585Srmacklem	hst[MNAMELEN - 1] = '\0';
1006192585Srmacklem
1007192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1008192585Srmacklem		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1009192585Srmacklem	else
1010192585Srmacklem		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1011192585Srmacklem	args.srvkrbnamelen = strlen(srvkrbname);
1012192585Srmacklem
1013192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1014192585Srmacklem		strlcpy(krbname, name, sizeof (krbname));
1015192585Srmacklem	else
1016191783Srmacklem		krbname[0] = '\0';
1017192585Srmacklem	args.krbnamelen = strlen(krbname);
1018192585Srmacklem
1019192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1020192585Srmacklem		strlcpy(dirpath, name, sizeof (dirpath));
1021192585Srmacklem	else
1022191783Srmacklem		dirpath[0] = '\0';
1023192585Srmacklem	args.dirlen = strlen(dirpath);
1024192585Srmacklem
1025192585Srmacklem	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1026192585Srmacklem	    &args.addrlen) == 0) {
1027192585Srmacklem		if (args.addrlen > SOCK_MAXADDRLEN) {
1028192585Srmacklem			error = ENAMETOOLONG;
1029191783Srmacklem			goto out;
1030191783Srmacklem		}
1031192585Srmacklem		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1032192585Srmacklem		bcopy(args.addr, nam, args.addrlen);
1033192585Srmacklem		nam->sa_len = args.addrlen;
1034191783Srmacklem	}
1035192585Srmacklem
1036191783Srmacklem	args.fh = nfh;
1037191783Srmacklem	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
1038203303Srmacklem	    &vp, td->td_ucred, td, negnametimeo);
1039191783Srmacklemout:
1040191783Srmacklem	if (!error) {
1041191783Srmacklem		MNT_ILOCK(mp);
1042191783Srmacklem		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1043191783Srmacklem		MNT_IUNLOCK(mp);
1044191783Srmacklem	}
1045191783Srmacklem	return (error);
1046191783Srmacklem}
1047191783Srmacklem
1048191783Srmacklem
1049191783Srmacklem/*
1050191783Srmacklem * VFS Operations.
1051191783Srmacklem *
1052191783Srmacklem * mount system call
1053191783Srmacklem * It seems a bit dumb to copyinstr() the host and path here and then
1054191783Srmacklem * bcopy() them in mountnfs(), but I wanted to detect errors before
1055191783Srmacklem * doing the sockargs() call because sockargs() allocates an mbuf and
1056191783Srmacklem * an error after that means that I have to release the mbuf.
1057191783Srmacklem */
1058191783Srmacklem/* ARGSUSED */
1059191783Srmacklemstatic int
1060191990Sattilionfs_cmount(struct mntarg *ma, void *data, int flags)
1061191783Srmacklem{
1062191783Srmacklem	int error;
1063191783Srmacklem	struct nfs_args args;
1064191783Srmacklem
1065191783Srmacklem	error = copyin(data, &args, sizeof (struct nfs_args));
1066191783Srmacklem	if (error)
1067191783Srmacklem		return error;
1068191783Srmacklem
1069191783Srmacklem	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1070191783Srmacklem
1071191783Srmacklem	error = kernel_mount(ma, flags);
1072191783Srmacklem	return (error);
1073191783Srmacklem}
1074191783Srmacklem
1075191783Srmacklem/*
1076191783Srmacklem * Common code for mount and mountroot
1077191783Srmacklem */
1078191783Srmacklemstatic int
1079191783Srmacklemmountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1080191783Srmacklem    char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
1081203303Srmacklem    struct vnode **vpp, struct ucred *cred, struct thread *td,
1082203303Srmacklem    int negnametimeo)
1083191783Srmacklem{
1084191783Srmacklem	struct nfsmount *nmp;
1085191783Srmacklem	struct nfsnode *np;
1086195762Srmacklem	int error, trycnt, ret;
1087191783Srmacklem	struct nfsvattr nfsva;
1088191783Srmacklem	static u_int64_t clval = 0;
1089191783Srmacklem
1090191783Srmacklem	if (mp->mnt_flag & MNT_UPDATE) {
1091191783Srmacklem		nmp = VFSTONFS(mp);
1092191783Srmacklem		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1093191783Srmacklem		FREE(nam, M_SONAME);
1094191783Srmacklem		return (0);
1095191783Srmacklem	} else {
1096191783Srmacklem		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1097191783Srmacklem		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
1098191783Srmacklem		    M_NEWNFSMNT, M_WAITOK);
1099191783Srmacklem		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
1100191783Srmacklem		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
1101191783Srmacklem		TAILQ_INIT(&nmp->nm_bufq);
1102191783Srmacklem		if (clval == 0)
1103191783Srmacklem			clval = (u_int64_t)nfsboottime.tv_sec;
1104191783Srmacklem		nmp->nm_clval = clval++;
1105191783Srmacklem		nmp->nm_krbnamelen = argp->krbnamelen;
1106191783Srmacklem		nmp->nm_dirpathlen = argp->dirlen;
1107191783Srmacklem		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
1108192675Srmacklem		if (td->td_ucred->cr_uid != (uid_t)0) {
1109191783Srmacklem			/*
1110192675Srmacklem			 * nm_uid is used to get KerberosV credentials for
1111192675Srmacklem			 * the nfsv4 state handling operations if there is
1112192675Srmacklem			 * no host based principal set. Use the uid of
1113192675Srmacklem			 * this user if not root, since they are doing the
1114192675Srmacklem			 * mount. I don't think setting this for root will
1115192675Srmacklem			 * work, since root normally does not have user
1116192675Srmacklem			 * credentials in a credentials cache.
1117191783Srmacklem			 */
1118192675Srmacklem			nmp->nm_uid = td->td_ucred->cr_uid;
1119191783Srmacklem		} else {
1120191783Srmacklem			/*
1121192675Srmacklem			 * Just set to -1, so it won't be used.
1122191783Srmacklem			 */
1123191783Srmacklem			nmp->nm_uid = (uid_t)-1;
1124191783Srmacklem		}
1125191783Srmacklem
1126191783Srmacklem		/* Copy and null terminate all the names */
1127191783Srmacklem		if (nmp->nm_krbnamelen > 0) {
1128191783Srmacklem			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1129191783Srmacklem			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1130191783Srmacklem		}
1131191783Srmacklem		if (nmp->nm_dirpathlen > 0) {
1132191783Srmacklem			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1133191783Srmacklem			    nmp->nm_dirpathlen);
1134191783Srmacklem			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1135191783Srmacklem			    + 1] = '\0';
1136191783Srmacklem		}
1137191783Srmacklem		if (nmp->nm_srvkrbnamelen > 0) {
1138191783Srmacklem			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1139191783Srmacklem			    nmp->nm_srvkrbnamelen);
1140191783Srmacklem			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1141191783Srmacklem			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1142191783Srmacklem		}
1143191783Srmacklem		nmp->nm_sockreq.nr_cred = crhold(cred);
1144191783Srmacklem		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1145191783Srmacklem		mp->mnt_data = nmp;
1146214048Srmacklem		nmp->nm_getinfo = nfs_getnlminfo;
1147216931Srmacklem		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1148191783Srmacklem	}
1149191783Srmacklem	vfs_getnewfsid(mp);
1150191783Srmacklem	nmp->nm_mountp = mp;
1151191783Srmacklem	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1152203303Srmacklem	nmp->nm_negnametimeo = negnametimeo;
1153191783Srmacklem
1154214048Srmacklem	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1155192585Srmacklem
1156191783Srmacklem	/*
1157191783Srmacklem	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1158191783Srmacklem	 * high, depending on whether we end up with negative offsets in
1159191783Srmacklem	 * the client or server somewhere.  2GB-1 may be safer.
1160191783Srmacklem	 *
1161191783Srmacklem	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1162191783Srmacklem	 * that we can handle until we find out otherwise.
1163191783Srmacklem	 * XXX Our "safe" limit on the client is what we can store in our
1164191783Srmacklem	 * buffer cache using signed(!) block numbers.
1165191783Srmacklem	 */
1166191783Srmacklem	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1167191783Srmacklem		nmp->nm_maxfilesize = 0xffffffffLL;
1168191783Srmacklem	else
1169191783Srmacklem		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1170191783Srmacklem
1171191783Srmacklem	nmp->nm_timeo = NFS_TIMEO;
1172191783Srmacklem	nmp->nm_retry = NFS_RETRANS;
1173191783Srmacklem	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1174191783Srmacklem		nmp->nm_wsize = NFS_WSIZE;
1175191783Srmacklem		nmp->nm_rsize = NFS_RSIZE;
1176191783Srmacklem		nmp->nm_readdirsize = NFS_READDIRSIZE;
1177191783Srmacklem	}
1178191783Srmacklem	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1179191783Srmacklem	nmp->nm_numgrps = NFS_MAXGRPS;
1180191783Srmacklem	nmp->nm_readahead = NFS_DEFRAHEAD;
1181191783Srmacklem	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1182191783Srmacklem	if (nmp->nm_tprintf_delay < 0)
1183191783Srmacklem		nmp->nm_tprintf_delay = 0;
1184191783Srmacklem	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1185191783Srmacklem	if (nmp->nm_tprintf_initial_delay < 0)
1186191783Srmacklem		nmp->nm_tprintf_initial_delay = 0;
1187191783Srmacklem	nmp->nm_fhsize = argp->fhsize;
1188191783Srmacklem	if (nmp->nm_fhsize > 0)
1189191783Srmacklem		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1190191783Srmacklem	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1191191783Srmacklem	nmp->nm_nam = nam;
1192191783Srmacklem	/* Set up the sockets and per-host congestion */
1193191783Srmacklem	nmp->nm_sotype = argp->sotype;
1194191783Srmacklem	nmp->nm_soproto = argp->proto;
1195191783Srmacklem	nmp->nm_sockreq.nr_prog = NFS_PROG;
1196191783Srmacklem	if ((argp->flags & NFSMNT_NFSV4))
1197191783Srmacklem		nmp->nm_sockreq.nr_vers = NFS_VER4;
1198191783Srmacklem	else if ((argp->flags & NFSMNT_NFSV3))
1199191783Srmacklem		nmp->nm_sockreq.nr_vers = NFS_VER3;
1200191783Srmacklem	else
1201191783Srmacklem		nmp->nm_sockreq.nr_vers = NFS_VER2;
1202191783Srmacklem
1203191783Srmacklem
1204191783Srmacklem	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1205191783Srmacklem		goto bad;
1206191783Srmacklem
1207191783Srmacklem	/*
1208191783Srmacklem	 * A reference count is needed on the nfsnode representing the
1209191783Srmacklem	 * remote root.  If this object is not persistent, then backward
1210191783Srmacklem	 * traversals of the mount point (i.e. "..") will not work if
1211191783Srmacklem	 * the nfsnode gets flushed out of the cache. Ufs does not have
1212191783Srmacklem	 * this problem, because one can identify root inodes by their
1213191783Srmacklem	 * number == ROOTINO (2).
1214191783Srmacklem	 */
1215191783Srmacklem	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1216191783Srmacklem	    nmp->nm_dirpathlen > 0) {
1217191783Srmacklem		/*
1218191783Srmacklem		 * If the fhsize on the mount point == 0 for V4, the mount
1219191783Srmacklem		 * path needs to be looked up.
1220191783Srmacklem		 */
1221191783Srmacklem		trycnt = 3;
1222191783Srmacklem		do {
1223191783Srmacklem			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1224191783Srmacklem			    cred, td);
1225191783Srmacklem			if (error)
1226207170Srmacklem				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1227191783Srmacklem		} while (error && --trycnt > 0);
1228191783Srmacklem		if (error) {
1229191783Srmacklem			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1230191783Srmacklem			goto bad;
1231191783Srmacklem		}
1232191783Srmacklem	}
1233191783Srmacklem	if (nmp->nm_fhsize > 0) {
1234195762Srmacklem		/*
1235195762Srmacklem		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1236195762Srmacklem		 * non-zero for the root vnode. f_iosize will be set correctly
1237195762Srmacklem		 * by nfs_statfs() before any I/O occurs.
1238195762Srmacklem		 */
1239195762Srmacklem		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1240220732Srmacklem		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1241220732Srmacklem		    LK_EXCLUSIVE);
1242191783Srmacklem		if (error)
1243191783Srmacklem			goto bad;
1244191783Srmacklem		*vpp = NFSTOV(np);
1245191783Srmacklem
1246191783Srmacklem		/*
1247191783Srmacklem		 * Get file attributes and transfer parameters for the
1248191783Srmacklem		 * mountpoint.  This has the side effect of filling in
1249191783Srmacklem		 * (*vpp)->v_type with the correct value.
1250191783Srmacklem		 */
1251191783Srmacklem		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1252191783Srmacklem		    cred, td, &nfsva, NULL);
1253191783Srmacklem		if (ret) {
1254191783Srmacklem			/*
1255191783Srmacklem			 * Just set default values to get things going.
1256191783Srmacklem			 */
1257191783Srmacklem			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1258191783Srmacklem			nfsva.na_vattr.va_type = VDIR;
1259191783Srmacklem			nfsva.na_vattr.va_mode = 0777;
1260191783Srmacklem			nfsva.na_vattr.va_nlink = 100;
1261191783Srmacklem			nfsva.na_vattr.va_uid = (uid_t)0;
1262191783Srmacklem			nfsva.na_vattr.va_gid = (gid_t)0;
1263191783Srmacklem			nfsva.na_vattr.va_fileid = 2;
1264191783Srmacklem			nfsva.na_vattr.va_gen = 1;
1265191783Srmacklem			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1266191783Srmacklem			nfsva.na_vattr.va_size = 512 * 1024;
1267191783Srmacklem		}
1268191783Srmacklem		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1269191783Srmacklem		if (argp->flags & NFSMNT_NFSV3)
1270191783Srmacklem			ncl_fsinfo(nmp, *vpp, cred, td);
1271191783Srmacklem
1272191783Srmacklem		/*
1273191783Srmacklem		 * Lose the lock but keep the ref.
1274191783Srmacklem		 */
1275191783Srmacklem		VOP_UNLOCK(*vpp, 0);
1276191783Srmacklem		return (0);
1277191783Srmacklem	}
1278191783Srmacklem	error = EIO;
1279191783Srmacklem
1280191783Srmacklembad:
1281191783Srmacklem	newnfs_disconnect(&nmp->nm_sockreq);
1282191783Srmacklem	crfree(nmp->nm_sockreq.nr_cred);
1283191783Srmacklem	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1284191783Srmacklem	mtx_destroy(&nmp->nm_mtx);
1285191783Srmacklem	FREE(nmp, M_NEWNFSMNT);
1286191783Srmacklem	FREE(nam, M_SONAME);
1287191783Srmacklem	return (error);
1288191783Srmacklem}
1289191783Srmacklem
1290191783Srmacklem/*
1291191783Srmacklem * unmount system call
1292191783Srmacklem */
1293191783Srmacklemstatic int
1294191990Sattilionfs_unmount(struct mount *mp, int mntflags)
1295191783Srmacklem{
1296191990Sattilio	struct thread *td;
1297191783Srmacklem	struct nfsmount *nmp;
1298191783Srmacklem	int error, flags = 0, trycnt = 0;
1299191783Srmacklem
1300191990Sattilio	td = curthread;
1301191990Sattilio
1302191783Srmacklem	if (mntflags & MNT_FORCE)
1303191783Srmacklem		flags |= FORCECLOSE;
1304191783Srmacklem	nmp = VFSTONFS(mp);
1305191783Srmacklem	/*
1306191783Srmacklem	 * Goes something like this..
1307191783Srmacklem	 * - Call vflush() to clear out vnodes for this filesystem
1308191783Srmacklem	 * - Close the socket
1309191783Srmacklem	 * - Free up the data structures
1310191783Srmacklem	 */
1311191783Srmacklem	/* In the forced case, cancel any outstanding requests. */
1312191783Srmacklem	if (mntflags & MNT_FORCE) {
1313191783Srmacklem		error = newnfs_nmcancelreqs(nmp);
1314191783Srmacklem		if (error)
1315191783Srmacklem			goto out;
1316191783Srmacklem		/* For a forced close, get rid of the renew thread now */
1317191783Srmacklem		nfscl_umount(nmp, td);
1318191783Srmacklem	}
1319191783Srmacklem	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1320191783Srmacklem	do {
1321191783Srmacklem		error = vflush(mp, 1, flags, td);
1322191783Srmacklem		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1323207170Srmacklem			(void) nfs_catnap(PSOCK, error, "newndm");
1324191783Srmacklem	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1325191783Srmacklem	if (error)
1326191783Srmacklem		goto out;
1327191783Srmacklem
1328191783Srmacklem	/*
1329191783Srmacklem	 * We are now committed to the unmount.
1330191783Srmacklem	 */
1331191783Srmacklem	if ((mntflags & MNT_FORCE) == 0)
1332191783Srmacklem		nfscl_umount(nmp, td);
1333191783Srmacklem	newnfs_disconnect(&nmp->nm_sockreq);
1334191783Srmacklem	crfree(nmp->nm_sockreq.nr_cred);
1335191783Srmacklem	FREE(nmp->nm_nam, M_SONAME);
1336191783Srmacklem
1337191783Srmacklem	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1338191783Srmacklem	mtx_destroy(&nmp->nm_mtx);
1339191783Srmacklem	FREE(nmp, M_NEWNFSMNT);
1340191783Srmacklemout:
1341191783Srmacklem	return (error);
1342191783Srmacklem}
1343191783Srmacklem
1344191783Srmacklem/*
1345191783Srmacklem * Return root of a filesystem
1346191783Srmacklem */
1347191783Srmacklemstatic int
1348191990Sattilionfs_root(struct mount *mp, int flags, struct vnode **vpp)
1349191783Srmacklem{
1350191783Srmacklem	struct vnode *vp;
1351191783Srmacklem	struct nfsmount *nmp;
1352191783Srmacklem	struct nfsnode *np;
1353191783Srmacklem	int error;
1354191783Srmacklem
1355191783Srmacklem	nmp = VFSTONFS(mp);
1356220732Srmacklem	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1357191783Srmacklem	if (error)
1358191783Srmacklem		return error;
1359191783Srmacklem	vp = NFSTOV(np);
1360191783Srmacklem	/*
1361191783Srmacklem	 * Get transfer parameters and attributes for root vnode once.
1362191783Srmacklem	 */
1363191783Srmacklem	mtx_lock(&nmp->nm_mtx);
1364191783Srmacklem	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1365191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
1366191783Srmacklem		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1367191783Srmacklem	} else
1368191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
1369191783Srmacklem	if (vp->v_type == VNON)
1370191783Srmacklem	    vp->v_type = VDIR;
1371191783Srmacklem	vp->v_vflag |= VV_ROOT;
1372191783Srmacklem	*vpp = vp;
1373191783Srmacklem	return (0);
1374191783Srmacklem}
1375191783Srmacklem
1376191783Srmacklem/*
1377191783Srmacklem * Flush out the buffer cache
1378191783Srmacklem */
1379191783Srmacklem/* ARGSUSED */
1380191783Srmacklemstatic int
1381191990Sattilionfs_sync(struct mount *mp, int waitfor)
1382191783Srmacklem{
1383191783Srmacklem	struct vnode *vp, *mvp;
1384191990Sattilio	struct thread *td;
1385191783Srmacklem	int error, allerror = 0;
1386191783Srmacklem
1387191990Sattilio	td = curthread;
1388191990Sattilio
1389191783Srmacklem	/*
1390191783Srmacklem	 * Force stale buffer cache information to be flushed.
1391191783Srmacklem	 */
1392191783Srmacklem	MNT_ILOCK(mp);
1393191783Srmacklemloop:
1394191783Srmacklem	MNT_VNODE_FOREACH(vp, mp, mvp) {
1395191783Srmacklem		VI_LOCK(vp);
1396191783Srmacklem		MNT_IUNLOCK(mp);
1397191783Srmacklem		/* XXX Racy bv_cnt check. */
1398191783Srmacklem		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1399191783Srmacklem		    waitfor == MNT_LAZY) {
1400191783Srmacklem			VI_UNLOCK(vp);
1401191783Srmacklem			MNT_ILOCK(mp);
1402191783Srmacklem			continue;
1403191783Srmacklem		}
1404191783Srmacklem		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1405191783Srmacklem			MNT_ILOCK(mp);
1406191783Srmacklem			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1407191783Srmacklem			goto loop;
1408191783Srmacklem		}
1409191783Srmacklem		error = VOP_FSYNC(vp, waitfor, td);
1410191783Srmacklem		if (error)
1411191783Srmacklem			allerror = error;
1412191783Srmacklem		VOP_UNLOCK(vp, 0);
1413191783Srmacklem		vrele(vp);
1414191783Srmacklem
1415191783Srmacklem		MNT_ILOCK(mp);
1416191783Srmacklem	}
1417191783Srmacklem	MNT_IUNLOCK(mp);
1418191783Srmacklem	return (allerror);
1419191783Srmacklem}
1420191783Srmacklem
1421191783Srmacklemstatic int
1422191783Srmacklemnfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1423191783Srmacklem{
1424191783Srmacklem	struct nfsmount *nmp = VFSTONFS(mp);
1425191783Srmacklem	struct vfsquery vq;
1426191783Srmacklem	int error;
1427191783Srmacklem
1428191783Srmacklem	bzero(&vq, sizeof(vq));
1429191783Srmacklem	switch (op) {
1430191783Srmacklem#if 0
1431191783Srmacklem	case VFS_CTL_NOLOCKS:
1432191783Srmacklem		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1433191783Srmacklem 		if (req->oldptr != NULL) {
1434191783Srmacklem 			error = SYSCTL_OUT(req, &val, sizeof(val));
1435191783Srmacklem 			if (error)
1436191783Srmacklem 				return (error);
1437191783Srmacklem 		}
1438191783Srmacklem 		if (req->newptr != NULL) {
1439191783Srmacklem 			error = SYSCTL_IN(req, &val, sizeof(val));
1440191783Srmacklem 			if (error)
1441191783Srmacklem 				return (error);
1442191783Srmacklem			if (val)
1443191783Srmacklem				nmp->nm_flag |= NFSMNT_NOLOCKS;
1444191783Srmacklem			else
1445191783Srmacklem				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1446191783Srmacklem 		}
1447191783Srmacklem		break;
1448191783Srmacklem#endif
1449191783Srmacklem	case VFS_CTL_QUERY:
1450191783Srmacklem		mtx_lock(&nmp->nm_mtx);
1451191783Srmacklem		if (nmp->nm_state & NFSSTA_TIMEO)
1452191783Srmacklem			vq.vq_flags |= VQ_NOTRESP;
1453191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
1454191783Srmacklem#if 0
1455191783Srmacklem		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1456191783Srmacklem		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1457191783Srmacklem			vq.vq_flags |= VQ_NOTRESPLOCK;
1458191783Srmacklem#endif
1459191783Srmacklem		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1460191783Srmacklem		break;
1461191783Srmacklem 	case VFS_CTL_TIMEO:
1462191783Srmacklem 		if (req->oldptr != NULL) {
1463191783Srmacklem 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1464191783Srmacklem 			    sizeof(nmp->nm_tprintf_initial_delay));
1465191783Srmacklem 			if (error)
1466191783Srmacklem 				return (error);
1467191783Srmacklem 		}
1468191783Srmacklem 		if (req->newptr != NULL) {
1469191783Srmacklem			error = vfs_suser(mp, req->td);
1470191783Srmacklem			if (error)
1471191783Srmacklem				return (error);
1472191783Srmacklem 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1473191783Srmacklem 			    sizeof(nmp->nm_tprintf_initial_delay));
1474191783Srmacklem 			if (error)
1475191783Srmacklem 				return (error);
1476191783Srmacklem 			if (nmp->nm_tprintf_initial_delay < 0)
1477191783Srmacklem 				nmp->nm_tprintf_initial_delay = 0;
1478191783Srmacklem 		}
1479191783Srmacklem		break;
1480191783Srmacklem	default:
1481191783Srmacklem		return (ENOTSUP);
1482191783Srmacklem	}
1483191783Srmacklem	return (0);
1484191783Srmacklem}
1485191783Srmacklem
1486214048Srmacklem/*
1487214048Srmacklem * Extract the information needed by the nlm from the nfs vnode.
1488214048Srmacklem */
1489214048Srmacklemstatic void
1490214053Srmacklemnfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1491216931Srmacklem    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1492216931Srmacklem    struct timeval *timeop)
1493214048Srmacklem{
1494214048Srmacklem	struct nfsmount *nmp;
1495214048Srmacklem	struct nfsnode *np = VTONFS(vp);
1496214048Srmacklem
1497214048Srmacklem	nmp = VFSTONFS(vp->v_mount);
1498214048Srmacklem	if (fhlenp != NULL)
1499214053Srmacklem		*fhlenp = (size_t)np->n_fhp->nfh_len;
1500214048Srmacklem	if (fhp != NULL)
1501214048Srmacklem		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1502214048Srmacklem	if (sp != NULL)
1503214048Srmacklem		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1504214048Srmacklem	if (is_v3p != NULL)
1505214048Srmacklem		*is_v3p = NFS_ISV3(vp);
1506214048Srmacklem	if (sizep != NULL)
1507214048Srmacklem		*sizep = np->n_size;
1508216931Srmacklem	if (timeop != NULL) {
1509216931Srmacklem		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1510216931Srmacklem		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1511216931Srmacklem	}
1512214048Srmacklem}
1513214048Srmacklem
1514