nfs_clvfsops.c revision 203303
1214501Srpaulo/*-
2214501Srpaulo * Copyright (c) 1989, 1993, 1995
3214501Srpaulo *	The Regents of the University of California.  All rights reserved.
4214501Srpaulo *
5252726Srpaulo * This code is derived from software contributed to Berkeley by
6252726Srpaulo * Rick Macklem at The University of Guelph.
7214501Srpaulo *
8214501Srpaulo * Redistribution and use in source and binary forms, with or without
9214501Srpaulo * modification, are permitted provided that the following conditions
10214501Srpaulo * are met:
11214501Srpaulo * 1. Redistributions of source code must retain the above copyright
12214501Srpaulo *    notice, this list of conditions and the following disclaimer.
13214501Srpaulo * 2. Redistributions in binary form must reproduce the above copyright
14214501Srpaulo *    notice, this list of conditions and the following disclaimer in the
15214501Srpaulo *    documentation and/or other materials provided with the distribution.
16214501Srpaulo * 4. Neither the name of the University nor the names of its contributors
17214501Srpaulo *    may be used to endorse or promote products derived from this software
18214501Srpaulo *    without specific prior written permission.
19214501Srpaulo *
20214501Srpaulo * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21214501Srpaulo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22214501Srpaulo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23214501Srpaulo * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24214501Srpaulo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25214501Srpaulo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26214501Srpaulo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27214501Srpaulo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28214501Srpaulo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29214501Srpaulo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30214501Srpaulo * SUCH DAMAGE.
31214501Srpaulo *
32214501Srpaulo *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33281806Srpaulo */
34281806Srpaulo
35281806Srpaulo#include <sys/cdefs.h>
36214501Srpaulo__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 203303 2010-01-31 19:12:24Z rmacklem $");
37214501Srpaulo
38214501Srpaulo
39214501Srpaulo#include "opt_bootp.h"
40214501Srpaulo#include "opt_nfsroot.h"
41214501Srpaulo
42214501Srpaulo#include <sys/param.h>
43214501Srpaulo#include <sys/systm.h>
44214501Srpaulo#include <sys/kernel.h>
45214501Srpaulo#include <sys/bio.h>
46214501Srpaulo#include <sys/buf.h>
47214501Srpaulo#include <sys/clock.h>
48214501Srpaulo#include <sys/jail.h>
49214501Srpaulo#include <sys/lock.h>
50214501Srpaulo#include <sys/malloc.h>
51214501Srpaulo#include <sys/mbuf.h>
52214501Srpaulo#include <sys/module.h>
53214501Srpaulo#include <sys/mount.h>
54214501Srpaulo#include <sys/proc.h>
55214501Srpaulo#include <sys/socket.h>
56214501Srpaulo#include <sys/socketvar.h>
57214501Srpaulo#include <sys/sockio.h>
58214501Srpaulo#include <sys/sysctl.h>
59214501Srpaulo#include <sys/vnode.h>
60214501Srpaulo#include <sys/signalvar.h>
61214501Srpaulo
62214501Srpaulo#include <vm/vm.h>
63214501Srpaulo#include <vm/vm_extern.h>
64214501Srpaulo#include <vm/uma.h>
65214501Srpaulo
66214501Srpaulo#include <net/if.h>
67214501Srpaulo#include <net/route.h>
68214501Srpaulo#include <netinet/in.h>
69214501Srpaulo
70214501Srpaulo#include <fs/nfs/nfsport.h>
71214501Srpaulo#include <fs/nfsclient/nfsnode.h>
72214501Srpaulo#include <fs/nfsclient/nfsmount.h>
73214501Srpaulo#include <fs/nfsclient/nfs.h>
74214501Srpaulo#include <fs/nfsclient/nfsdiskless.h>
75214501Srpaulo
76214501Srpauloextern int nfscl_ticks;
77214501Srpauloextern struct timeval nfsboottime;
78214501Srpauloextern struct nfsstats	newnfsstats;
79214501Srpaulo
80214501SrpauloMALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
81214501SrpauloMALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
82214501Srpaulo
83214501SrpauloSYSCTL_DECL(_vfs_newnfs);
84214501SrpauloSYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
85214501Srpaulo	&newnfsstats, nfsstats, "S,nfsstats");
86214501Srpaulostatic int nfs_ip_paranoia = 1;
87214501SrpauloSYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
88214501Srpaulo    &nfs_ip_paranoia, 0, "");
89214501Srpaulostatic int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
90214501SrpauloSYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
91214501Srpaulo        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
92214501Srpaulo/* how long between console messages "nfs server foo not responding" */
93214501Srpaulostatic int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
94214501SrpauloSYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
95214501Srpaulo        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
96214501Srpaulo
97214501Srpaulostatic void	nfs_sec_name(char *, int *);
98214501Srpaulostatic void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
99214501Srpaulo		    struct nfs_args *argp, struct ucred *, struct thread *);
100214501Srpaulostatic int	mountnfs(struct nfs_args *, struct mount *,
101214501Srpaulo		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
102214501Srpaulo		    struct vnode **, struct ucred *, struct thread *, int);
103214501Srpaulostatic vfs_mount_t nfs_mount;
104214501Srpaulostatic vfs_cmount_t nfs_cmount;
105214501Srpaulostatic vfs_unmount_t nfs_unmount;
106214501Srpaulostatic vfs_root_t nfs_root;
107214501Srpaulostatic vfs_statfs_t nfs_statfs;
108214501Srpaulostatic vfs_sync_t nfs_sync;
109214501Srpaulostatic vfs_sysctl_t nfs_sysctl;
110214501Srpaulo
111214501Srpaulo/*
112214501Srpaulo * nfs vfs operations.
113214501Srpaulo */
114214501Srpaulostatic struct vfsops nfs_vfsops = {
115214501Srpaulo	.vfs_init =		ncl_init,
116214501Srpaulo	.vfs_mount =		nfs_mount,
117214501Srpaulo	.vfs_cmount =		nfs_cmount,
118214501Srpaulo	.vfs_root =		nfs_root,
119214501Srpaulo	.vfs_statfs =		nfs_statfs,
120214501Srpaulo	.vfs_sync =		nfs_sync,
121214501Srpaulo	.vfs_uninit =		ncl_uninit,
122214501Srpaulo	.vfs_unmount =		nfs_unmount,
123214501Srpaulo	.vfs_sysctl =		nfs_sysctl,
124214501Srpaulo};
125214501SrpauloVFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
126214501Srpaulo
127214501Srpaulo/* So that loader and kldload(2) can find us, wherever we are.. */
128214501SrpauloMODULE_VERSION(newnfs, 1);
129214501Srpaulo
130214501Srpaulo/*
131214501Srpaulo * This structure must be filled in by a primary bootstrap or bootstrap
132214501Srpaulo * server for a diskless/dataless machine. It is initialized below just
133214501Srpaulo * to ensure that it is allocated to initialized data (.data not .bss).
134214501Srpaulo */
135214501Srpaulostruct nfs_diskless newnfs_diskless = { { { 0 } } };
136214501Srpaulostruct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
137214501Srpauloint newnfs_diskless_valid = 0;
138214501Srpaulo
139214501SrpauloSYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
140214501Srpaulo    &newnfs_diskless_valid, 0,
141214501Srpaulo    "Has the diskless struct been filled correctly");
142214501Srpaulo
143214501SrpauloSYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
144214501Srpaulo    newnfsv3_diskless.root_hostnam, 0, "Path to nfs root");
145214501Srpaulo
146214501SrpauloSYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
147214501Srpaulo    &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
148214501Srpaulo    "%Ssockaddr_in", "Diskless root nfs address");
149214501Srpaulo
150214501Srpaulo
151214501Srpaulovoid		newnfsargs_ntoh(struct nfs_args *);
152214501Srpaulostatic int	nfs_mountdiskless(char *,
153214501Srpaulo		    struct sockaddr_in *, struct nfs_args *,
154214501Srpaulo		    struct thread *, struct vnode **, struct mount *);
155214501Srpaulostatic void	nfs_convert_diskless(void);
156214501Srpaulostatic void	nfs_convert_oargs(struct nfs_args *args,
157214501Srpaulo		    struct onfs_args *oargs);
158214501Srpaulo
159214501Srpauloint
160214501Srpaulonewnfs_iosize(struct nfsmount *nmp)
161214501Srpaulo{
162214501Srpaulo	int iosize, maxio;
163252726Srpaulo
164214501Srpaulo	/* First, set the upper limit for iosize */
165214501Srpaulo	if (nmp->nm_flag & NFSMNT_NFSV4) {
166214501Srpaulo		maxio = NFS_MAXBSIZE;
167214501Srpaulo	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
168214501Srpaulo		if (nmp->nm_sotype == SOCK_DGRAM)
169214501Srpaulo			maxio = NFS_MAXDGRAMDATA;
170214501Srpaulo		else
171214501Srpaulo			maxio = NFS_MAXBSIZE;
172214501Srpaulo	} else {
173214501Srpaulo		maxio = NFS_V2MAXDATA;
174214501Srpaulo	}
175214501Srpaulo	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
176214501Srpaulo		nmp->nm_rsize = maxio;
177214501Srpaulo	if (nmp->nm_rsize > MAXBSIZE)
178214501Srpaulo		nmp->nm_rsize = MAXBSIZE;
179281806Srpaulo	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
180281806Srpaulo		nmp->nm_readdirsize = maxio;
181281806Srpaulo	if (nmp->nm_readdirsize > nmp->nm_rsize)
182281806Srpaulo		nmp->nm_readdirsize = nmp->nm_rsize;
183281806Srpaulo	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
184214501Srpaulo		nmp->nm_wsize = maxio;
185214501Srpaulo	if (nmp->nm_wsize > MAXBSIZE)
186214501Srpaulo		nmp->nm_wsize = MAXBSIZE;
187
188	/*
189	 * Calculate the size used for io buffers.  Use the larger
190	 * of the two sizes to minimise nfs requests but make sure
191	 * that it is at least one VM page to avoid wasting buffer
192	 * space.
193	 */
194	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
195	iosize = imax(iosize, PAGE_SIZE);
196	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
197	return (iosize);
198}
199
200static void
201nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
202{
203
204	args->version = NFS_ARGSVERSION;
205	args->addr = oargs->addr;
206	args->addrlen = oargs->addrlen;
207	args->sotype = oargs->sotype;
208	args->proto = oargs->proto;
209	args->fh = oargs->fh;
210	args->fhsize = oargs->fhsize;
211	args->flags = oargs->flags;
212	args->wsize = oargs->wsize;
213	args->rsize = oargs->rsize;
214	args->readdirsize = oargs->readdirsize;
215	args->timeo = oargs->timeo;
216	args->retrans = oargs->retrans;
217	args->readahead = oargs->readahead;
218	args->hostname = oargs->hostname;
219}
220
221static void
222nfs_convert_diskless(void)
223{
224
225	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
226	    sizeof (struct ifaliasreq));
227	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
228	    sizeof (struct sockaddr_in));
229	nfs_convert_oargs(&newnfsv3_diskless.root_args,
230	    &newnfs_diskless.root_args);
231	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
232		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
233		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
234		    NFSX_MYFH);
235	} else {
236		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
237		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
238		    NFSX_V2FH);
239	}
240	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
241	    sizeof(struct sockaddr_in));
242	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam,
243	    MNAMELEN);
244	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
245	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
246	    MAXHOSTNAMELEN);
247	newnfs_diskless_valid = 3;
248}
249
250/*
251 * nfs statfs call
252 */
253static int
254nfs_statfs(struct mount *mp, struct statfs *sbp)
255{
256	struct vnode *vp;
257	struct thread *td;
258	struct nfsmount *nmp = VFSTONFS(mp);
259	struct nfsvattr nfsva;
260	struct nfsfsinfo fs;
261	struct nfsstatfs sb;
262	int error = 0, attrflag, gotfsinfo = 0, ret;
263	struct nfsnode *np;
264
265	td = curthread;
266
267	error = vfs_busy(mp, MBF_NOWAIT);
268	if (error)
269		return (error);
270	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
271	if (error) {
272		vfs_unbusy(mp);
273		return (error);
274	}
275	vp = NFSTOV(np);
276	mtx_lock(&nmp->nm_mtx);
277	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
278		mtx_unlock(&nmp->nm_mtx);
279		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
280		    &attrflag, NULL);
281		if (!error)
282			gotfsinfo = 1;
283	} else
284		mtx_unlock(&nmp->nm_mtx);
285	if (!error)
286		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
287		    &attrflag, NULL);
288	if (attrflag == 0) {
289		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
290		    td->td_ucred, td, &nfsva, NULL);
291		if (ret) {
292			/*
293			 * Just set default values to get things going.
294			 */
295			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
296			nfsva.na_vattr.va_type = VDIR;
297			nfsva.na_vattr.va_mode = 0777;
298			nfsva.na_vattr.va_nlink = 100;
299			nfsva.na_vattr.va_uid = (uid_t)0;
300			nfsva.na_vattr.va_gid = (gid_t)0;
301			nfsva.na_vattr.va_fileid = 2;
302			nfsva.na_vattr.va_gen = 1;
303			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
304			nfsva.na_vattr.va_size = 512 * 1024;
305		}
306	}
307	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
308	if (!error) {
309	    mtx_lock(&nmp->nm_mtx);
310	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
311		nfscl_loadfsinfo(nmp, &fs);
312	    nfscl_loadsbinfo(nmp, &sb, sbp);
313	    sbp->f_flags = nmp->nm_flag;
314	    sbp->f_iosize = newnfs_iosize(nmp);
315	    mtx_unlock(&nmp->nm_mtx);
316	    if (sbp != &mp->mnt_stat) {
317		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
318		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
319	    }
320	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
321	} else if (NFS_ISV4(vp)) {
322		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
323	}
324	vput(vp);
325	vfs_unbusy(mp);
326	return (error);
327}
328
329/*
330 * nfs version 3 fsinfo rpc call
331 */
332int
333ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
334    struct thread *td)
335{
336	struct nfsfsinfo fs;
337	struct nfsvattr nfsva;
338	int error, attrflag;
339
340	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
341	if (!error) {
342		if (attrflag)
343			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
344			    1);
345		mtx_lock(&nmp->nm_mtx);
346		nfscl_loadfsinfo(nmp, &fs);
347		mtx_unlock(&nmp->nm_mtx);
348	}
349	return (error);
350}
351
352/*
353 * Mount a remote root fs via. nfs. This depends on the info in the
354 * newnfs_diskless structure that has been filled in properly by some primary
355 * bootstrap.
356 * It goes something like this:
357 * - do enough of "ifconfig" by calling ifioctl() so that the system
358 *   can talk to the server
359 * - If newnfs_diskless.mygateway is filled in, use that address as
360 *   a default gateway.
361 * - build the rootfs mount point and call mountnfs() to do the rest.
362 *
363 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
364 * structure, as well as other global NFS client variables here, as
365 * nfs_mountroot() will be called once in the boot before any other NFS
366 * client activity occurs.
367 */
368int
369ncl_mountroot(struct mount *mp)
370{
371	struct thread *td = curthread;
372	struct nfsv3_diskless *nd = &newnfsv3_diskless;
373	struct socket *so;
374	struct vnode *vp;
375	struct ifreq ir;
376	int error;
377	u_long l;
378	char buf[128];
379	char *cp;
380
381#if defined(BOOTP_NFSROOT) && defined(BOOTP)
382	bootpc_init();		/* use bootp to get nfs_diskless filled in */
383#elif defined(NFS_ROOT)
384	nfs_setup_diskless();
385#endif
386
387	if (newnfs_diskless_valid == 0)
388		return (-1);
389	if (newnfs_diskless_valid == 1)
390		nfs_convert_diskless();
391
392	/*
393	 * XXX splnet, so networks will receive...
394	 */
395	splnet();
396
397	/*
398	 * Do enough of ifconfig(8) so that the critical net interface can
399	 * talk to the server.
400	 */
401	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
402	    td->td_ucred, td);
403	if (error)
404		panic("nfs_mountroot: socreate(%04x): %d",
405			nd->myif.ifra_addr.sa_family, error);
406
407#if 0 /* XXX Bad idea */
408	/*
409	 * We might not have been told the right interface, so we pass
410	 * over the first ten interfaces of the same kind, until we get
411	 * one of them configured.
412	 */
413
414	for (i = strlen(nd->myif.ifra_name) - 1;
415		nd->myif.ifra_name[i] >= '0' &&
416		nd->myif.ifra_name[i] <= '9';
417		nd->myif.ifra_name[i] ++) {
418		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
419		if(!error)
420			break;
421	}
422#endif
423	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
424	if (error)
425		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
426	if ((cp = getenv("boot.netif.mtu")) != NULL) {
427		ir.ifr_mtu = strtol(cp, NULL, 10);
428		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
429		freeenv(cp);
430		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
431		if (error)
432			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
433	}
434	soclose(so);
435
436	/*
437	 * If the gateway field is filled in, set it as the default route.
438	 * Note that pxeboot will set a default route of 0 if the route
439	 * is not set by the DHCP server.  Check also for a value of 0
440	 * to avoid panicking inappropriately in that situation.
441	 */
442	if (nd->mygateway.sin_len != 0 &&
443	    nd->mygateway.sin_addr.s_addr != 0) {
444		struct sockaddr_in mask, sin;
445
446		bzero((caddr_t)&mask, sizeof(mask));
447		sin = mask;
448		sin.sin_family = AF_INET;
449		sin.sin_len = sizeof(sin);
450                /* XXX MRT use table 0 for this sort of thing */
451		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
452		    (struct sockaddr *)&nd->mygateway,
453		    (struct sockaddr *)&mask,
454		    RTF_UP | RTF_GATEWAY, NULL);
455		if (error)
456			panic("nfs_mountroot: RTM_ADD: %d", error);
457	}
458
459	/*
460	 * Create the rootfs mount point.
461	 */
462	nd->root_args.fh = nd->root_fh;
463	nd->root_args.fhsize = nd->root_fhsize;
464	l = ntohl(nd->root_saddr.sin_addr.s_addr);
465	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
466		(l >> 24) & 0xff, (l >> 16) & 0xff,
467		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
468	printf("NFS ROOT: %s\n", buf);
469	nd->root_args.hostname = buf;
470	if ((error = nfs_mountdiskless(buf,
471	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
472		return (error);
473	}
474
475	/*
476	 * This is not really an nfs issue, but it is much easier to
477	 * set hostname here and then let the "/etc/rc.xxx" files
478	 * mount the right /var based upon its preset value.
479	 */
480	mtx_lock(&prison0.pr_mtx);
481	strlcpy(prison0.pr_hostname, nd->my_hostnam,
482	    sizeof(prison0.pr_hostname));
483	mtx_unlock(&prison0.pr_mtx);
484	inittodr(ntohl(nd->root_time));
485	return (0);
486}
487
488/*
489 * Internal version of mount system call for diskless setup.
490 */
491static int
492nfs_mountdiskless(char *path,
493    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
494    struct vnode **vpp, struct mount *mp)
495{
496	struct sockaddr *nam;
497	int error;
498
499	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
500	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
501	    td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
502		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
503		return (error);
504	}
505	return (0);
506}
507
508static void
509nfs_sec_name(char *sec, int *flagsp)
510{
511	if (!strcmp(sec, "krb5"))
512		*flagsp |= NFSMNT_KERB;
513	else if (!strcmp(sec, "krb5i"))
514		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
515	else if (!strcmp(sec, "krb5p"))
516		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
517}
518
519static void
520nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
521    struct ucred *cred, struct thread *td)
522{
523	int s;
524	int adjsock;
525
526	s = splnet();
527
528	/*
529	 * Set read-only flag if requested; otherwise, clear it if this is
530	 * an update.  If this is not an update, then either the read-only
531	 * flag is already clear, or this is a root mount and it was set
532	 * intentionally at some previous point.
533	 */
534	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
535		MNT_ILOCK(mp);
536		mp->mnt_flag |= MNT_RDONLY;
537		MNT_IUNLOCK(mp);
538	} else if (mp->mnt_flag & MNT_UPDATE) {
539		MNT_ILOCK(mp);
540		mp->mnt_flag &= ~MNT_RDONLY;
541		MNT_IUNLOCK(mp);
542	}
543
544	/*
545	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
546	 * no sense in that context.  Also, set up appropriate retransmit
547	 * and soft timeout behavior.
548	 */
549	if (argp->sotype == SOCK_STREAM) {
550		nmp->nm_flag &= ~NFSMNT_NOCONN;
551		nmp->nm_timeo = NFS_MAXTIMEO;
552	}
553
554	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
555	if ((argp->flags & NFSMNT_NFSV3) == 0)
556		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
557
558	/* Also re-bind if we're switching to/from a connected UDP socket */
559	adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) !=
560		    (argp->flags & NFSMNT_NOCONN));
561
562	/* Update flags atomically.  Don't change the lock bits. */
563	nmp->nm_flag = argp->flags | nmp->nm_flag;
564	splx(s);
565
566	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
567		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
568		if (nmp->nm_timeo < NFS_MINTIMEO)
569			nmp->nm_timeo = NFS_MINTIMEO;
570		else if (nmp->nm_timeo > NFS_MAXTIMEO)
571			nmp->nm_timeo = NFS_MAXTIMEO;
572	}
573
574	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
575		nmp->nm_retry = argp->retrans;
576		if (nmp->nm_retry > NFS_MAXREXMIT)
577			nmp->nm_retry = NFS_MAXREXMIT;
578	}
579
580	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
581		nmp->nm_wsize = argp->wsize;
582		/* Round down to multiple of blocksize */
583		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
584		if (nmp->nm_wsize <= 0)
585			nmp->nm_wsize = NFS_FABLKSIZE;
586	}
587
588	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
589		nmp->nm_rsize = argp->rsize;
590		/* Round down to multiple of blocksize */
591		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
592		if (nmp->nm_rsize <= 0)
593			nmp->nm_rsize = NFS_FABLKSIZE;
594	}
595
596	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
597		nmp->nm_readdirsize = argp->readdirsize;
598	}
599
600	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
601		nmp->nm_acregmin = argp->acregmin;
602	else
603		nmp->nm_acregmin = NFS_MINATTRTIMO;
604	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
605		nmp->nm_acregmax = argp->acregmax;
606	else
607		nmp->nm_acregmax = NFS_MAXATTRTIMO;
608	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
609		nmp->nm_acdirmin = argp->acdirmin;
610	else
611		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
612	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
613		nmp->nm_acdirmax = argp->acdirmax;
614	else
615		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
616	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
617		nmp->nm_acdirmin = nmp->nm_acdirmax;
618	if (nmp->nm_acregmin > nmp->nm_acregmax)
619		nmp->nm_acregmin = nmp->nm_acregmax;
620
621	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
622		if (argp->readahead <= NFS_MAXRAHEAD)
623			nmp->nm_readahead = argp->readahead;
624		else
625			nmp->nm_readahead = NFS_MAXRAHEAD;
626	}
627	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
628		if (argp->wcommitsize < nmp->nm_wsize)
629			nmp->nm_wcommitsize = nmp->nm_wsize;
630		else
631			nmp->nm_wcommitsize = argp->wcommitsize;
632	}
633
634	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
635		    (nmp->nm_soproto != argp->proto));
636
637	if (nmp->nm_client != NULL && adjsock) {
638		int haslock = 0, error = 0;
639
640		if (nmp->nm_sotype == SOCK_STREAM) {
641			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
642			if (!error)
643				haslock = 1;
644		}
645		if (!error) {
646		    newnfs_disconnect(&nmp->nm_sockreq);
647		    if (haslock)
648			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
649		    nmp->nm_sotype = argp->sotype;
650		    nmp->nm_soproto = argp->proto;
651		    if (nmp->nm_sotype == SOCK_DGRAM)
652			while (newnfs_connect(nmp, &nmp->nm_sockreq,
653			    cred, td, 0)) {
654				printf("newnfs_args: retrying connect\n");
655				(void) nfs_catnap(PSOCK, "newnfscon");
656			}
657		}
658	} else {
659		nmp->nm_sotype = argp->sotype;
660		nmp->nm_soproto = argp->proto;
661	}
662}
663
664static const char *nfs_opts[] = { "from",
665    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
666    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
667    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
668    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
669    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
670    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
671    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
672    "negnametimeo",
673    NULL };
674
675/*
676 * VFS Operations.
677 *
678 * mount system call
679 * It seems a bit dumb to copyinstr() the host and path here and then
680 * bcopy() them in mountnfs(), but I wanted to detect errors before
681 * doing the sockargs() call because sockargs() allocates an mbuf and
682 * an error after that means that I have to release the mbuf.
683 */
684/* ARGSUSED */
685static int
686nfs_mount(struct mount *mp)
687{
688	struct nfs_args args = {
689	    .version = NFS_ARGSVERSION,
690	    .addr = NULL,
691	    .addrlen = sizeof (struct sockaddr_in),
692	    .sotype = SOCK_STREAM,
693	    .proto = 0,
694	    .fh = NULL,
695	    .fhsize = 0,
696	    .flags = 0,
697	    .wsize = NFS_WSIZE,
698	    .rsize = NFS_RSIZE,
699	    .readdirsize = NFS_READDIRSIZE,
700	    .timeo = 10,
701	    .retrans = NFS_RETRANS,
702	    .readahead = NFS_DEFRAHEAD,
703	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
704	    .hostname = NULL,
705	    /* args version 4 */
706	    .acregmin = NFS_MINATTRTIMO,
707	    .acregmax = NFS_MAXATTRTIMO,
708	    .acdirmin = NFS_MINDIRATTRTIMO,
709	    .acdirmax = NFS_MAXDIRATTRTIMO,
710	    .dirlen = 0,
711	    .krbnamelen = 0,
712	    .srvkrbnamelen = 0,
713	};
714	int error = 0, ret, len;
715	struct sockaddr *nam = NULL;
716	struct vnode *vp;
717	struct thread *td;
718	char hst[MNAMELEN];
719	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
720	char *opt, *name, *secname;
721	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
722
723	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
724		error = EINVAL;
725		goto out;
726	}
727
728	td = curthread;
729	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
730		error = ncl_mountroot(mp);
731		goto out;
732	}
733
734	nfscl_init();
735
736	/* Handle the new style options. */
737	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
738		args.flags |= NFSMNT_NOCONN;
739	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
740		args.flags |= NFSMNT_NOCONN;
741	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
742		args.flags |= NFSMNT_NOLOCKD;
743	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
744		args.flags &= ~NFSMNT_NOLOCKD;
745	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
746		args.flags |= NFSMNT_INT;
747	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
748		args.flags |= NFSMNT_RDIRPLUS;
749	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
750		args.flags |= NFSMNT_RESVPORT;
751	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
752		args.flags &= ~NFSMNT_RESVPORT;
753	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
754		args.flags |= NFSMNT_SOFT;
755	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
756		args.flags &= ~NFSMNT_SOFT;
757	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
758		args.sotype = SOCK_DGRAM;
759	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
760		args.sotype = SOCK_DGRAM;
761	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
762		args.sotype = SOCK_STREAM;
763	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
764		args.flags |= NFSMNT_NFSV3;
765	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
766		args.flags |= NFSMNT_NFSV4;
767		args.sotype = SOCK_STREAM;
768	}
769	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
770		args.flags |= NFSMNT_ALLGSSNAME;
771	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
772		if (opt == NULL) {
773			vfs_mount_error(mp, "illegal readdirsize");
774			error = EINVAL;
775			goto out;
776		}
777		ret = sscanf(opt, "%d", &args.readdirsize);
778		if (ret != 1 || args.readdirsize <= 0) {
779			vfs_mount_error(mp, "illegal readdirsize: %s",
780			    opt);
781			error = EINVAL;
782			goto out;
783		}
784		args.flags |= NFSMNT_READDIRSIZE;
785	}
786	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
787		if (opt == NULL) {
788			vfs_mount_error(mp, "illegal readahead");
789			error = EINVAL;
790			goto out;
791		}
792		ret = sscanf(opt, "%d", &args.readahead);
793		if (ret != 1 || args.readahead <= 0) {
794			vfs_mount_error(mp, "illegal readahead: %s",
795			    opt);
796			error = EINVAL;
797			goto out;
798		}
799		args.flags |= NFSMNT_READAHEAD;
800	}
801	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
802		if (opt == NULL) {
803			vfs_mount_error(mp, "illegal wsize");
804			error = EINVAL;
805			goto out;
806		}
807		ret = sscanf(opt, "%d", &args.wsize);
808		if (ret != 1 || args.wsize <= 0) {
809			vfs_mount_error(mp, "illegal wsize: %s",
810			    opt);
811			error = EINVAL;
812			goto out;
813		}
814		args.flags |= NFSMNT_WSIZE;
815	}
816	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
817		if (opt == NULL) {
818			vfs_mount_error(mp, "illegal rsize");
819			error = EINVAL;
820			goto out;
821		}
822		ret = sscanf(opt, "%d", &args.rsize);
823		if (ret != 1 || args.rsize <= 0) {
824			vfs_mount_error(mp, "illegal wsize: %s",
825			    opt);
826			error = EINVAL;
827			goto out;
828		}
829		args.flags |= NFSMNT_RSIZE;
830	}
831	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
832		if (opt == NULL) {
833			vfs_mount_error(mp, "illegal retrans");
834			error = EINVAL;
835			goto out;
836		}
837		ret = sscanf(opt, "%d", &args.retrans);
838		if (ret != 1 || args.retrans <= 0) {
839			vfs_mount_error(mp, "illegal retrans: %s",
840			    opt);
841			error = EINVAL;
842			goto out;
843		}
844		args.flags |= NFSMNT_RETRANS;
845	}
846	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
847		ret = sscanf(opt, "%d", &args.acregmin);
848		if (ret != 1 || args.acregmin < 0) {
849			vfs_mount_error(mp, "illegal acregmin: %s",
850			    opt);
851			error = EINVAL;
852			goto out;
853		}
854		args.flags |= NFSMNT_ACREGMIN;
855	}
856	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
857		ret = sscanf(opt, "%d", &args.acregmax);
858		if (ret != 1 || args.acregmax < 0) {
859			vfs_mount_error(mp, "illegal acregmax: %s",
860			    opt);
861			error = EINVAL;
862			goto out;
863		}
864		args.flags |= NFSMNT_ACREGMAX;
865	}
866	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
867		ret = sscanf(opt, "%d", &args.acdirmin);
868		if (ret != 1 || args.acdirmin < 0) {
869			vfs_mount_error(mp, "illegal acdirmin: %s",
870			    opt);
871			error = EINVAL;
872			goto out;
873		}
874		args.flags |= NFSMNT_ACDIRMIN;
875	}
876	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
877		ret = sscanf(opt, "%d", &args.acdirmax);
878		if (ret != 1 || args.acdirmax < 0) {
879			vfs_mount_error(mp, "illegal acdirmax: %s",
880			    opt);
881			error = EINVAL;
882			goto out;
883		}
884		args.flags |= NFSMNT_ACDIRMAX;
885	}
886	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
887		ret = sscanf(opt, "%d", &args.timeo);
888		if (ret != 1 || args.timeo <= 0) {
889			vfs_mount_error(mp, "illegal timeout: %s",
890			    opt);
891			error = EINVAL;
892			goto out;
893		}
894		args.flags |= NFSMNT_TIMEO;
895	}
896	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
897	    == 0) {
898		ret = sscanf(opt, "%d", &negnametimeo);
899		if (ret != 1 || negnametimeo < 0) {
900			vfs_mount_error(mp, "illegal negnametimeo: %s",
901			    opt);
902			error = EINVAL;
903			goto out;
904		}
905	}
906	if (vfs_getopt(mp->mnt_optnew, "sec",
907		(void **) &secname, NULL) == 0)
908		nfs_sec_name(secname, &args.flags);
909
910	if (mp->mnt_flag & MNT_UPDATE) {
911		struct nfsmount *nmp = VFSTONFS(mp);
912
913		if (nmp == NULL) {
914			error = EIO;
915			goto out;
916		}
917		/*
918		 * When doing an update, we can't change version,
919		 * security, switch lockd strategies or change cookie
920		 * translation
921		 */
922		args.flags = (args.flags &
923		    ~(NFSMNT_NFSV3 |
924		      NFSMNT_NFSV4 |
925		      NFSMNT_KERB |
926		      NFSMNT_INTEGRITY |
927		      NFSMNT_PRIVACY |
928		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
929		    (nmp->nm_flag &
930			(NFSMNT_NFSV3 |
931			 NFSMNT_NFSV4 |
932			 NFSMNT_KERB |
933			 NFSMNT_INTEGRITY |
934			 NFSMNT_PRIVACY |
935			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
936		nfs_decode_args(mp, nmp, &args, td->td_ucred, td);
937		goto out;
938	}
939
940	/*
941	 * Make the nfs_ip_paranoia sysctl serve as the default connection
942	 * or no-connection mode for those protocols that support
943	 * no-connection mode (the flag will be cleared later for protocols
944	 * that do not support no-connection mode).  This will allow a client
945	 * to receive replies from a different IP then the request was
946	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
947	 * not 0.
948	 */
949	if (nfs_ip_paranoia == 0)
950		args.flags |= NFSMNT_NOCONN;
951
952	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
953	    &args.fhsize) == 0) {
954		if (args.fhsize > NFSX_FHMAX) {
955			vfs_mount_error(mp, "Bad file handle");
956			error = EINVAL;
957			goto out;
958		}
959		bcopy(args.fh, nfh, args.fhsize);
960	} else {
961		args.fhsize = 0;
962	}
963
964	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
965	    &len);
966	if (args.hostname == NULL) {
967		vfs_mount_error(mp, "Invalid hostname");
968		error = EINVAL;
969		goto out;
970	}
971	bcopy(args.hostname, hst, MNAMELEN);
972	hst[MNAMELEN - 1] = '\0';
973
974	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
975		strlcpy(srvkrbname, name, sizeof (srvkrbname));
976	else
977		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
978	args.srvkrbnamelen = strlen(srvkrbname);
979
980	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
981		strlcpy(krbname, name, sizeof (krbname));
982	else
983		krbname[0] = '\0';
984	args.krbnamelen = strlen(krbname);
985
986	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
987		strlcpy(dirpath, name, sizeof (dirpath));
988	else
989		dirpath[0] = '\0';
990	args.dirlen = strlen(dirpath);
991
992	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
993	    &args.addrlen) == 0) {
994		if (args.addrlen > SOCK_MAXADDRLEN) {
995			error = ENAMETOOLONG;
996			goto out;
997		}
998		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
999		bcopy(args.addr, nam, args.addrlen);
1000		nam->sa_len = args.addrlen;
1001	}
1002
1003	args.fh = nfh;
1004	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
1005	    &vp, td->td_ucred, td, negnametimeo);
1006out:
1007	if (!error) {
1008		MNT_ILOCK(mp);
1009		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1010		MNT_IUNLOCK(mp);
1011	}
1012	return (error);
1013}
1014
1015
1016/*
1017 * VFS Operations.
1018 *
1019 * mount system call
1020 * It seems a bit dumb to copyinstr() the host and path here and then
1021 * bcopy() them in mountnfs(), but I wanted to detect errors before
1022 * doing the sockargs() call because sockargs() allocates an mbuf and
1023 * an error after that means that I have to release the mbuf.
1024 */
1025/* ARGSUSED */
1026static int
1027nfs_cmount(struct mntarg *ma, void *data, int flags)
1028{
1029	int error;
1030	struct nfs_args args;
1031
1032	error = copyin(data, &args, sizeof (struct nfs_args));
1033	if (error)
1034		return error;
1035
1036	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1037
1038	error = kernel_mount(ma, flags);
1039	return (error);
1040}
1041
1042/*
1043 * Common code for mount and mountroot
1044 */
1045static int
1046mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1047    char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
1048    struct vnode **vpp, struct ucred *cred, struct thread *td,
1049    int negnametimeo)
1050{
1051	struct nfsmount *nmp;
1052	struct nfsnode *np;
1053	int error, trycnt, ret;
1054	struct nfsvattr nfsva;
1055	static u_int64_t clval = 0;
1056
1057	if (mp->mnt_flag & MNT_UPDATE) {
1058		nmp = VFSTONFS(mp);
1059		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1060		FREE(nam, M_SONAME);
1061		return (0);
1062	} else {
1063		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1064		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
1065		    M_NEWNFSMNT, M_WAITOK);
1066		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
1067		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
1068		TAILQ_INIT(&nmp->nm_bufq);
1069		if (clval == 0)
1070			clval = (u_int64_t)nfsboottime.tv_sec;
1071		nmp->nm_clval = clval++;
1072		nmp->nm_krbnamelen = argp->krbnamelen;
1073		nmp->nm_dirpathlen = argp->dirlen;
1074		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
1075		if (td->td_ucred->cr_uid != (uid_t)0) {
1076			/*
1077			 * nm_uid is used to get KerberosV credentials for
1078			 * the nfsv4 state handling operations if there is
1079			 * no host based principal set. Use the uid of
1080			 * this user if not root, since they are doing the
1081			 * mount. I don't think setting this for root will
1082			 * work, since root normally does not have user
1083			 * credentials in a credentials cache.
1084			 */
1085			nmp->nm_uid = td->td_ucred->cr_uid;
1086		} else {
1087			/*
1088			 * Just set to -1, so it won't be used.
1089			 */
1090			nmp->nm_uid = (uid_t)-1;
1091		}
1092
1093		/* Copy and null terminate all the names */
1094		if (nmp->nm_krbnamelen > 0) {
1095			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1096			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1097		}
1098		if (nmp->nm_dirpathlen > 0) {
1099			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1100			    nmp->nm_dirpathlen);
1101			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1102			    + 1] = '\0';
1103		}
1104		if (nmp->nm_srvkrbnamelen > 0) {
1105			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1106			    nmp->nm_srvkrbnamelen);
1107			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1108			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1109		}
1110		nmp->nm_sockreq.nr_cred = crhold(cred);
1111		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1112		mp->mnt_data = nmp;
1113	}
1114	vfs_getnewfsid(mp);
1115	nmp->nm_mountp = mp;
1116	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1117	nmp->nm_negnametimeo = negnametimeo;
1118
1119	nfs_decode_args(mp, nmp, argp, cred, td);
1120
1121	/*
1122	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1123	 * high, depending on whether we end up with negative offsets in
1124	 * the client or server somewhere.  2GB-1 may be safer.
1125	 *
1126	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1127	 * that we can handle until we find out otherwise.
1128	 * XXX Our "safe" limit on the client is what we can store in our
1129	 * buffer cache using signed(!) block numbers.
1130	 */
1131	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1132		nmp->nm_maxfilesize = 0xffffffffLL;
1133	else
1134		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1135
1136	nmp->nm_timeo = NFS_TIMEO;
1137	nmp->nm_retry = NFS_RETRANS;
1138	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1139		nmp->nm_wsize = NFS_WSIZE;
1140		nmp->nm_rsize = NFS_RSIZE;
1141		nmp->nm_readdirsize = NFS_READDIRSIZE;
1142	}
1143	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1144	nmp->nm_numgrps = NFS_MAXGRPS;
1145	nmp->nm_readahead = NFS_DEFRAHEAD;
1146	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1147	if (nmp->nm_tprintf_delay < 0)
1148		nmp->nm_tprintf_delay = 0;
1149	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1150	if (nmp->nm_tprintf_initial_delay < 0)
1151		nmp->nm_tprintf_initial_delay = 0;
1152	nmp->nm_fhsize = argp->fhsize;
1153	if (nmp->nm_fhsize > 0)
1154		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1155	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1156	nmp->nm_nam = nam;
1157	/* Set up the sockets and per-host congestion */
1158	nmp->nm_sotype = argp->sotype;
1159	nmp->nm_soproto = argp->proto;
1160	nmp->nm_sockreq.nr_prog = NFS_PROG;
1161	if ((argp->flags & NFSMNT_NFSV4))
1162		nmp->nm_sockreq.nr_vers = NFS_VER4;
1163	else if ((argp->flags & NFSMNT_NFSV3))
1164		nmp->nm_sockreq.nr_vers = NFS_VER3;
1165	else
1166		nmp->nm_sockreq.nr_vers = NFS_VER2;
1167
1168
1169	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1170		goto bad;
1171
1172	/*
1173	 * A reference count is needed on the nfsnode representing the
1174	 * remote root.  If this object is not persistent, then backward
1175	 * traversals of the mount point (i.e. "..") will not work if
1176	 * the nfsnode gets flushed out of the cache. Ufs does not have
1177	 * this problem, because one can identify root inodes by their
1178	 * number == ROOTINO (2).
1179	 */
1180	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1181	    nmp->nm_dirpathlen > 0) {
1182		/*
1183		 * If the fhsize on the mount point == 0 for V4, the mount
1184		 * path needs to be looked up.
1185		 */
1186		trycnt = 3;
1187		do {
1188			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1189			    cred, td);
1190			if (error)
1191				(void) nfs_catnap(PZERO, "nfsgetdirp");
1192		} while (error && --trycnt > 0);
1193		if (error) {
1194			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1195			goto bad;
1196		}
1197	}
1198	if (nmp->nm_fhsize > 0) {
1199		/*
1200		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1201		 * non-zero for the root vnode. f_iosize will be set correctly
1202		 * by nfs_statfs() before any I/O occurs.
1203		 */
1204		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1205		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1206		if (error)
1207			goto bad;
1208		*vpp = NFSTOV(np);
1209
1210		/*
1211		 * Get file attributes and transfer parameters for the
1212		 * mountpoint.  This has the side effect of filling in
1213		 * (*vpp)->v_type with the correct value.
1214		 */
1215		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1216		    cred, td, &nfsva, NULL);
1217		if (ret) {
1218			/*
1219			 * Just set default values to get things going.
1220			 */
1221			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1222			nfsva.na_vattr.va_type = VDIR;
1223			nfsva.na_vattr.va_mode = 0777;
1224			nfsva.na_vattr.va_nlink = 100;
1225			nfsva.na_vattr.va_uid = (uid_t)0;
1226			nfsva.na_vattr.va_gid = (gid_t)0;
1227			nfsva.na_vattr.va_fileid = 2;
1228			nfsva.na_vattr.va_gen = 1;
1229			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1230			nfsva.na_vattr.va_size = 512 * 1024;
1231		}
1232		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1233		if (argp->flags & NFSMNT_NFSV3)
1234			ncl_fsinfo(nmp, *vpp, cred, td);
1235
1236		/*
1237		 * Lose the lock but keep the ref.
1238		 */
1239		VOP_UNLOCK(*vpp, 0);
1240		return (0);
1241	}
1242	error = EIO;
1243
1244bad:
1245	newnfs_disconnect(&nmp->nm_sockreq);
1246	crfree(nmp->nm_sockreq.nr_cred);
1247	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1248	mtx_destroy(&nmp->nm_mtx);
1249	FREE(nmp, M_NEWNFSMNT);
1250	FREE(nam, M_SONAME);
1251	return (error);
1252}
1253
1254/*
1255 * unmount system call
1256 */
1257static int
1258nfs_unmount(struct mount *mp, int mntflags)
1259{
1260	struct thread *td;
1261	struct nfsmount *nmp;
1262	int error, flags = 0, trycnt = 0;
1263
1264	td = curthread;
1265
1266	if (mntflags & MNT_FORCE)
1267		flags |= FORCECLOSE;
1268	nmp = VFSTONFS(mp);
1269	/*
1270	 * Goes something like this..
1271	 * - Call vflush() to clear out vnodes for this filesystem
1272	 * - Close the socket
1273	 * - Free up the data structures
1274	 */
1275	/* In the forced case, cancel any outstanding requests. */
1276	if (mntflags & MNT_FORCE) {
1277		error = newnfs_nmcancelreqs(nmp);
1278		if (error)
1279			goto out;
1280		/* For a forced close, get rid of the renew thread now */
1281		nfscl_umount(nmp, td);
1282	}
1283	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1284	do {
1285		error = vflush(mp, 1, flags, td);
1286		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1287			(void) nfs_catnap(PSOCK, "newndm");
1288	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1289	if (error)
1290		goto out;
1291
1292	/*
1293	 * We are now committed to the unmount.
1294	 */
1295	if ((mntflags & MNT_FORCE) == 0)
1296		nfscl_umount(nmp, td);
1297	newnfs_disconnect(&nmp->nm_sockreq);
1298	crfree(nmp->nm_sockreq.nr_cred);
1299	FREE(nmp->nm_nam, M_SONAME);
1300
1301	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1302	mtx_destroy(&nmp->nm_mtx);
1303	FREE(nmp, M_NEWNFSMNT);
1304out:
1305	return (error);
1306}
1307
1308/*
1309 * Return root of a filesystem
1310 */
1311static int
1312nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1313{
1314	struct vnode *vp;
1315	struct nfsmount *nmp;
1316	struct nfsnode *np;
1317	int error;
1318
1319	nmp = VFSTONFS(mp);
1320	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1321	if (error)
1322		return error;
1323	vp = NFSTOV(np);
1324	/*
1325	 * Get transfer parameters and attributes for root vnode once.
1326	 */
1327	mtx_lock(&nmp->nm_mtx);
1328	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1329		mtx_unlock(&nmp->nm_mtx);
1330		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1331	} else
1332		mtx_unlock(&nmp->nm_mtx);
1333	if (vp->v_type == VNON)
1334	    vp->v_type = VDIR;
1335	vp->v_vflag |= VV_ROOT;
1336	*vpp = vp;
1337	return (0);
1338}
1339
1340/*
1341 * Flush out the buffer cache
1342 */
1343/* ARGSUSED */
1344static int
1345nfs_sync(struct mount *mp, int waitfor)
1346{
1347	struct vnode *vp, *mvp;
1348	struct thread *td;
1349	int error, allerror = 0;
1350
1351	td = curthread;
1352
1353	/*
1354	 * Force stale buffer cache information to be flushed.
1355	 */
1356	MNT_ILOCK(mp);
1357loop:
1358	MNT_VNODE_FOREACH(vp, mp, mvp) {
1359		VI_LOCK(vp);
1360		MNT_IUNLOCK(mp);
1361		/* XXX Racy bv_cnt check. */
1362		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1363		    waitfor == MNT_LAZY) {
1364			VI_UNLOCK(vp);
1365			MNT_ILOCK(mp);
1366			continue;
1367		}
1368		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1369			MNT_ILOCK(mp);
1370			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1371			goto loop;
1372		}
1373		error = VOP_FSYNC(vp, waitfor, td);
1374		if (error)
1375			allerror = error;
1376		VOP_UNLOCK(vp, 0);
1377		vrele(vp);
1378
1379		MNT_ILOCK(mp);
1380	}
1381	MNT_IUNLOCK(mp);
1382	return (allerror);
1383}
1384
1385static int
1386nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1387{
1388	struct nfsmount *nmp = VFSTONFS(mp);
1389	struct vfsquery vq;
1390	int error;
1391
1392	bzero(&vq, sizeof(vq));
1393	switch (op) {
1394#if 0
1395	case VFS_CTL_NOLOCKS:
1396		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1397 		if (req->oldptr != NULL) {
1398 			error = SYSCTL_OUT(req, &val, sizeof(val));
1399 			if (error)
1400 				return (error);
1401 		}
1402 		if (req->newptr != NULL) {
1403 			error = SYSCTL_IN(req, &val, sizeof(val));
1404 			if (error)
1405 				return (error);
1406			if (val)
1407				nmp->nm_flag |= NFSMNT_NOLOCKS;
1408			else
1409				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1410 		}
1411		break;
1412#endif
1413	case VFS_CTL_QUERY:
1414		mtx_lock(&nmp->nm_mtx);
1415		if (nmp->nm_state & NFSSTA_TIMEO)
1416			vq.vq_flags |= VQ_NOTRESP;
1417		mtx_unlock(&nmp->nm_mtx);
1418#if 0
1419		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1420		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1421			vq.vq_flags |= VQ_NOTRESPLOCK;
1422#endif
1423		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1424		break;
1425 	case VFS_CTL_TIMEO:
1426 		if (req->oldptr != NULL) {
1427 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1428 			    sizeof(nmp->nm_tprintf_initial_delay));
1429 			if (error)
1430 				return (error);
1431 		}
1432 		if (req->newptr != NULL) {
1433			error = vfs_suser(mp, req->td);
1434			if (error)
1435				return (error);
1436 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1437 			    sizeof(nmp->nm_tprintf_initial_delay));
1438 			if (error)
1439 				return (error);
1440 			if (nmp->nm_tprintf_initial_delay < 0)
1441 				nmp->nm_tprintf_initial_delay = 0;
1442 		}
1443		break;
1444	default:
1445		return (ENOTSUP);
1446	}
1447	return (0);
1448}
1449
1450