nfs_clvfsops.c revision 192585
165557Sjasone/*-
2132637Simp * Copyright (c) 1989, 1993, 1995
365557Sjasone *	The Regents of the University of California.  All rights reserved.
4132637Simp *
5132637Simp * This code is derived from software contributed to Berkeley by
6132637Simp * Rick Macklem at The University of Guelph.
7132637Simp *
8132637Simp * Redistribution and use in source and binary forms, with or without
9132637Simp * modification, are permitted provided that the following conditions
10132637Simp * are met:
11132637Simp * 1. Redistributions of source code must retain the above copyright
12132637Simp *    notice, this list of conditions and the following disclaimer.
13132637Simp * 2. Redistributions in binary form must reproduce the above copyright
14132637Simp *    notice, this list of conditions and the following disclaimer in the
15132637Simp *    documentation and/or other materials provided with the distribution.
16132637Simp * 4. Neither the name of the University nor the names of its contributors
17132637Simp *    may be used to endorse or promote products derived from this software
18132637Simp *    without specific prior written permission.
19132637Simp *
20132637Simp * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21132637Simp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22132637Simp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23132637Simp * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2465557Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2565557Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26116182Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27116182Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28116182Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2965557Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3065557Sjasone * SUCH DAMAGE.
3165557Sjasone *
3276440Sjhb *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
3376166Smarkm */
3476166Smarkm
3576166Smarkm#include <sys/cdefs.h>
3676440Sjhb__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 192585 2009-05-22 15:08:12Z rmacklem $");
37104964Sjeff
3865557Sjasone
39134591Sjulian#include "opt_bootp.h"
40134591Sjulian#include "opt_nfsroot.h"
41134591Sjulian
4265557Sjasone#include <sys/param.h>
4365557Sjasone#include <sys/systm.h>
4465557Sjasone#include <sys/kernel.h>
4565557Sjasone#include <sys/bio.h>
4665557Sjasone#include <sys/buf.h>
47121238Speter#include <sys/clock.h>
4872222Sjhb#include <sys/lock.h>
4972222Sjhb#include <sys/malloc.h>
5065557Sjasone#include <sys/mbuf.h>
5165557Sjasone#include <sys/module.h>
5265557Sjasone#include <sys/mount.h>
5365557Sjasone#include <sys/proc.h>
5476078Sjhb#include <sys/socket.h>
5587702Sjhb#include <sys/socketvar.h>
5676078Sjhb#include <sys/sockio.h>
5776078Sjhb#include <sys/sysctl.h>
5899072Sjulian#include <sys/vnode.h>
5965557Sjasone#include <sys/signalvar.h>
6065557Sjasone
6176078Sjhb#include <vm/vm.h>
6287702Sjhb#include <vm/vm_extern.h>
63173004Sjulian#include <vm/uma.h>
64173035Sjulian
65173004Sjulian#include <net/if.h>
66173004Sjulian#include <net/route.h>
67173004Sjulian#include <netinet/in.h>
6865557Sjasone
69173035Sjulian#include <fs/nfs/nfsport.h>
70173035Sjulian#include <fs/nfsclient/nfsnode.h>
71173004Sjulian#include <fs/nfsclient/nfsmount.h>
7265557Sjasone#include <fs/nfsclient/nfs.h>
73173004Sjulian#include <fs/nfsclient/nfsdiskless.h>
7465557Sjasone
75172836Sjulianextern int nfscl_ticks;
7665557Sjasoneextern struct timeval nfsboottime;
7776078Sjhbextern struct nfsstats	newnfsstats;
78170307Sjeff
79131473SjhbMALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
80114471SjulianMALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
81163709Sjb
82141246SssouhlalSYSCTL_DECL(_vfs_newnfs);
83170307SjeffSYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
8476078Sjhb	&newnfsstats, nfsstats, "S,nfsstats");
8565557Sjasonestatic int nfs_ip_paranoia = 1;
8676078SjhbSYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
8765557Sjasone    &nfs_ip_paranoia, 0, "");
88static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
89SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
90        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
91/* how long between console messages "nfs server foo not responding" */
92static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
93SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
94        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
95
96static void	nfs_sec_name(char *, int *);
97static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
98		    struct nfs_args *argp, struct ucred *, struct thread *);
99static int	mountnfs(struct nfs_args *, struct mount *,
100		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
101		    struct vnode **, struct ucred *, struct thread *);
102static vfs_mount_t nfs_mount;
103static vfs_cmount_t nfs_cmount;
104static vfs_unmount_t nfs_unmount;
105static vfs_root_t nfs_root;
106static vfs_statfs_t nfs_statfs;
107static vfs_sync_t nfs_sync;
108static vfs_sysctl_t nfs_sysctl;
109
110/*
111 * nfs vfs operations.
112 */
113static struct vfsops nfs_vfsops = {
114	.vfs_init =		ncl_init,
115	.vfs_mount =		nfs_mount,
116	.vfs_cmount =		nfs_cmount,
117	.vfs_root =		nfs_root,
118	.vfs_statfs =		nfs_statfs,
119	.vfs_sync =		nfs_sync,
120	.vfs_uninit =		ncl_uninit,
121	.vfs_unmount =		nfs_unmount,
122	.vfs_sysctl =		nfs_sysctl,
123};
124VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
125
126/* So that loader and kldload(2) can find us, wherever we are.. */
127MODULE_VERSION(newnfs, 1);
128
129/*
130 * This structure must be filled in by a primary bootstrap or bootstrap
131 * server for a diskless/dataless machine. It is initialized below just
132 * to ensure that it is allocated to initialized data (.data not .bss).
133 */
134struct nfs_diskless newnfs_diskless = { { { 0 } } };
135struct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
136int newnfs_diskless_valid = 0;
137
138SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
139    &newnfs_diskless_valid, 0,
140    "Has the diskless struct been filled correctly");
141
142SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
143    newnfsv3_diskless.root_hostnam, 0, "Path to nfs root");
144
145SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
146    &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
147    "%Ssockaddr_in", "Diskless root nfs address");
148
149
150void		newnfsargs_ntoh(struct nfs_args *);
151static int	nfs_mountdiskless(char *,
152		    struct sockaddr_in *, struct nfs_args *,
153		    struct thread *, struct vnode **, struct mount *);
154static void	nfs_convert_diskless(void);
155static void	nfs_convert_oargs(struct nfs_args *args,
156		    struct onfs_args *oargs);
157
158int
159newnfs_iosize(struct nfsmount *nmp)
160{
161	int iosize, maxio;
162
163	/* First, set the upper limit for iosize */
164	if (nmp->nm_flag & NFSMNT_NFSV4) {
165		maxio = NFS_MAXBSIZE;
166	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
167		if (nmp->nm_sotype == SOCK_DGRAM)
168			maxio = NFS_MAXDGRAMDATA;
169		else
170			maxio = NFS_MAXBSIZE;
171	} else {
172		maxio = NFS_V2MAXDATA;
173	}
174	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
175		nmp->nm_rsize = maxio;
176	if (nmp->nm_rsize > MAXBSIZE)
177		nmp->nm_rsize = MAXBSIZE;
178	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
179		nmp->nm_readdirsize = maxio;
180	if (nmp->nm_readdirsize > nmp->nm_rsize)
181		nmp->nm_readdirsize = nmp->nm_rsize;
182	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
183		nmp->nm_wsize = maxio;
184	if (nmp->nm_wsize > MAXBSIZE)
185		nmp->nm_wsize = MAXBSIZE;
186
187	/*
188	 * Calculate the size used for io buffers.  Use the larger
189	 * of the two sizes to minimise nfs requests but make sure
190	 * that it is at least one VM page to avoid wasting buffer
191	 * space.
192	 */
193	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
194	iosize = imax(iosize, PAGE_SIZE);
195	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
196	return (iosize);
197}
198
199static void
200nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
201{
202
203	args->version = NFS_ARGSVERSION;
204	args->addr = oargs->addr;
205	args->addrlen = oargs->addrlen;
206	args->sotype = oargs->sotype;
207	args->proto = oargs->proto;
208	args->fh = oargs->fh;
209	args->fhsize = oargs->fhsize;
210	args->flags = oargs->flags;
211	args->wsize = oargs->wsize;
212	args->rsize = oargs->rsize;
213	args->readdirsize = oargs->readdirsize;
214	args->timeo = oargs->timeo;
215	args->retrans = oargs->retrans;
216	args->readahead = oargs->readahead;
217	args->hostname = oargs->hostname;
218}
219
220static void
221nfs_convert_diskless(void)
222{
223
224	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
225	    sizeof (struct ifaliasreq));
226	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
227	    sizeof (struct sockaddr_in));
228	nfs_convert_oargs(&newnfsv3_diskless.root_args,
229	    &newnfs_diskless.root_args);
230	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
231		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
232		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
233		    NFSX_MYFH);
234	} else {
235		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
236		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
237		    NFSX_V2FH);
238	}
239	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
240	    sizeof(struct sockaddr_in));
241	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam,
242	    MNAMELEN);
243	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
244	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
245	    MAXHOSTNAMELEN);
246	newnfs_diskless_valid = 3;
247}
248
249/*
250 * nfs statfs call
251 */
252static int
253nfs_statfs(struct mount *mp, struct statfs *sbp)
254{
255	struct vnode *vp;
256	struct thread *td;
257	struct nfsmount *nmp = VFSTONFS(mp);
258	struct nfsvattr nfsva;
259	struct nfsfsinfo fs;
260	struct nfsstatfs sb;
261	int error = 0, attrflag, gotfsinfo = 0, ret;
262	struct nfsnode *np;
263
264	td = curthread;
265
266	error = vfs_busy(mp, MBF_NOWAIT);
267	if (error)
268		return (error);
269	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
270	if (error) {
271		vfs_unbusy(mp);
272		return (error);
273	}
274	vp = NFSTOV(np);
275	mtx_lock(&nmp->nm_mtx);
276	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
277		mtx_unlock(&nmp->nm_mtx);
278		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
279		    &attrflag, NULL);
280		if (!error)
281			gotfsinfo = 1;
282	} else
283		mtx_unlock(&nmp->nm_mtx);
284	if (!error)
285		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
286		    &attrflag, NULL);
287	if (attrflag == 0) {
288		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
289		    td->td_ucred, td, &nfsva, NULL);
290		if (ret) {
291			/*
292			 * Just set default values to get things going.
293			 */
294			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
295			nfsva.na_vattr.va_type = VDIR;
296			nfsva.na_vattr.va_mode = 0777;
297			nfsva.na_vattr.va_nlink = 100;
298			nfsva.na_vattr.va_uid = (uid_t)0;
299			nfsva.na_vattr.va_gid = (gid_t)0;
300			nfsva.na_vattr.va_fileid = 2;
301			nfsva.na_vattr.va_gen = 1;
302			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
303			nfsva.na_vattr.va_size = 512 * 1024;
304		}
305	}
306	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
307	if (!error) {
308	    mtx_lock(&nmp->nm_mtx);
309	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
310		nfscl_loadfsinfo(nmp, &fs);
311	    nfscl_loadsbinfo(nmp, &sb, sbp);
312	    sbp->f_flags = nmp->nm_flag;
313	    sbp->f_iosize = newnfs_iosize(nmp);
314	    mtx_unlock(&nmp->nm_mtx);
315	    if (sbp != &mp->mnt_stat) {
316		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
317		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
318	    }
319	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
320	} else if (NFS_ISV4(vp)) {
321		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
322	}
323	vput(vp);
324	vfs_unbusy(mp);
325	return (error);
326}
327
328/*
329 * nfs version 3 fsinfo rpc call
330 */
331int
332ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
333    struct thread *td)
334{
335	struct nfsfsinfo fs;
336	struct nfsvattr nfsva;
337	int error, attrflag;
338
339	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
340	if (!error) {
341		if (attrflag)
342			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
343			    1);
344		mtx_lock(&nmp->nm_mtx);
345		nfscl_loadfsinfo(nmp, &fs);
346		mtx_unlock(&nmp->nm_mtx);
347	}
348	return (error);
349}
350
351/*
352 * Mount a remote root fs via. nfs. This depends on the info in the
353 * newnfs_diskless structure that has been filled in properly by some primary
354 * bootstrap.
355 * It goes something like this:
356 * - do enough of "ifconfig" by calling ifioctl() so that the system
357 *   can talk to the server
358 * - If newnfs_diskless.mygateway is filled in, use that address as
359 *   a default gateway.
360 * - build the rootfs mount point and call mountnfs() to do the rest.
361 *
362 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
363 * structure, as well as other global NFS client variables here, as
364 * nfs_mountroot() will be called once in the boot before any other NFS
365 * client activity occurs.
366 */
367int
368ncl_mountroot(struct mount *mp)
369{
370	struct thread *td = curthread;
371	INIT_VPROCG(TD_TO_VPROCG(td));
372	struct nfsv3_diskless *nd = &newnfsv3_diskless;
373	struct socket *so;
374	struct vnode *vp;
375	struct ifreq ir;
376	int error, i;
377	u_long l;
378	char buf[128];
379	char *cp;
380
381#if defined(BOOTP_NFSROOT) && defined(BOOTP)
382	bootpc_init();		/* use bootp to get nfs_diskless filled in */
383#elif defined(NFS_ROOT)
384	nfs_setup_diskless();
385#endif
386
387	if (newnfs_diskless_valid == 0)
388		return (-1);
389	if (newnfs_diskless_valid == 1)
390		nfs_convert_diskless();
391
392	/*
393	 * XXX splnet, so networks will receive...
394	 */
395	splnet();
396
397	/*
398	 * Do enough of ifconfig(8) so that the critical net interface can
399	 * talk to the server.
400	 */
401	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
402	    td->td_ucred, td);
403	if (error)
404		panic("nfs_mountroot: socreate(%04x): %d",
405			nd->myif.ifra_addr.sa_family, error);
406
407#if 0 /* XXX Bad idea */
408	/*
409	 * We might not have been told the right interface, so we pass
410	 * over the first ten interfaces of the same kind, until we get
411	 * one of them configured.
412	 */
413
414	for (i = strlen(nd->myif.ifra_name) - 1;
415		nd->myif.ifra_name[i] >= '0' &&
416		nd->myif.ifra_name[i] <= '9';
417		nd->myif.ifra_name[i] ++) {
418		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
419		if(!error)
420			break;
421	}
422#endif
423	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
424	if (error)
425		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
426	if ((cp = getenv("boot.netif.mtu")) != NULL) {
427		ir.ifr_mtu = strtol(cp, NULL, 10);
428		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
429		freeenv(cp);
430		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
431		if (error)
432			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
433	}
434	soclose(so);
435
436	/*
437	 * If the gateway field is filled in, set it as the default route.
438	 * Note that pxeboot will set a default route of 0 if the route
439	 * is not set by the DHCP server.  Check also for a value of 0
440	 * to avoid panicking inappropriately in that situation.
441	 */
442	if (nd->mygateway.sin_len != 0 &&
443	    nd->mygateway.sin_addr.s_addr != 0) {
444		struct sockaddr_in mask, sin;
445
446		bzero((caddr_t)&mask, sizeof(mask));
447		sin = mask;
448		sin.sin_family = AF_INET;
449		sin.sin_len = sizeof(sin);
450                /* XXX MRT use table 0 for this sort of thing */
451		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
452		    (struct sockaddr *)&nd->mygateway,
453		    (struct sockaddr *)&mask,
454		    RTF_UP | RTF_GATEWAY, NULL);
455		if (error)
456			panic("nfs_mountroot: RTM_ADD: %d", error);
457	}
458
459	/*
460	 * Create the rootfs mount point.
461	 */
462	nd->root_args.fh = nd->root_fh;
463	nd->root_args.fhsize = nd->root_fhsize;
464	l = ntohl(nd->root_saddr.sin_addr.s_addr);
465	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
466		(l >> 24) & 0xff, (l >> 16) & 0xff,
467		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
468	printf("NFS ROOT: %s\n", buf);
469	nd->root_args.hostname = buf;
470	if ((error = nfs_mountdiskless(buf,
471	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
472		return (error);
473	}
474
475	/*
476	 * This is not really an nfs issue, but it is much easier to
477	 * set hostname here and then let the "/etc/rc.xxx" files
478	 * mount the right /var based upon its preset value.
479	 */
480	mtx_lock(&hostname_mtx);
481	bcopy(nd->my_hostnam, V_hostname, MAXHOSTNAMELEN);
482	V_hostname[MAXHOSTNAMELEN - 1] = '\0';
483	for (i = 0; i < MAXHOSTNAMELEN; i++)
484		if (V_hostname[i] == '\0')
485			break;
486	mtx_unlock(&hostname_mtx);
487	inittodr(ntohl(nd->root_time));
488	return (0);
489}
490
491/*
492 * Internal version of mount system call for diskless setup.
493 */
494static int
495nfs_mountdiskless(char *path,
496    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
497    struct vnode **vpp, struct mount *mp)
498{
499	struct sockaddr *nam;
500	int error;
501
502	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
503	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
504	    td->td_ucred, td)) != 0) {
505		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
506		return (error);
507	}
508	return (0);
509}
510
511static void
512nfs_sec_name(char *sec, int *flagsp)
513{
514	if (!strcmp(sec, "krb5"))
515		*flagsp |= NFSMNT_KERB;
516	else if (!strcmp(sec, "krb5i"))
517		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
518	else if (!strcmp(sec, "krb5p"))
519		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
520}
521
522static void
523nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
524    struct ucred *cred, struct thread *td)
525{
526	int s;
527	int adjsock;
528
529	s = splnet();
530
531	/*
532	 * Set read-only flag if requested; otherwise, clear it if this is
533	 * an update.  If this is not an update, then either the read-only
534	 * flag is already clear, or this is a root mount and it was set
535	 * intentionally at some previous point.
536	 */
537	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
538		MNT_ILOCK(mp);
539		mp->mnt_flag |= MNT_RDONLY;
540		MNT_IUNLOCK(mp);
541	} else if (mp->mnt_flag & MNT_UPDATE) {
542		MNT_ILOCK(mp);
543		mp->mnt_flag &= ~MNT_RDONLY;
544		MNT_IUNLOCK(mp);
545	}
546
547	/*
548	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
549	 * no sense in that context.  Also, set up appropriate retransmit
550	 * and soft timeout behavior.
551	 */
552	if (argp->sotype == SOCK_STREAM) {
553		nmp->nm_flag &= ~NFSMNT_NOCONN;
554		nmp->nm_timeo = NFS_MAXTIMEO;
555	}
556
557	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
558	if ((argp->flags & NFSMNT_NFSV3) == 0)
559		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
560
561	/* Also re-bind if we're switching to/from a connected UDP socket */
562	adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) !=
563		    (argp->flags & NFSMNT_NOCONN));
564
565	/* Update flags atomically.  Don't change the lock bits. */
566	nmp->nm_flag = argp->flags | nmp->nm_flag;
567	splx(s);
568
569	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
570		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
571		if (nmp->nm_timeo < NFS_MINTIMEO)
572			nmp->nm_timeo = NFS_MINTIMEO;
573		else if (nmp->nm_timeo > NFS_MAXTIMEO)
574			nmp->nm_timeo = NFS_MAXTIMEO;
575	}
576
577	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
578		nmp->nm_retry = argp->retrans;
579		if (nmp->nm_retry > NFS_MAXREXMIT)
580			nmp->nm_retry = NFS_MAXREXMIT;
581	}
582
583	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
584		nmp->nm_wsize = argp->wsize;
585		/* Round down to multiple of blocksize */
586		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
587		if (nmp->nm_wsize <= 0)
588			nmp->nm_wsize = NFS_FABLKSIZE;
589	}
590
591	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
592		nmp->nm_rsize = argp->rsize;
593		/* Round down to multiple of blocksize */
594		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
595		if (nmp->nm_rsize <= 0)
596			nmp->nm_rsize = NFS_FABLKSIZE;
597	}
598
599	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
600		nmp->nm_readdirsize = argp->readdirsize;
601	}
602
603	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
604		nmp->nm_acregmin = argp->acregmin;
605	else
606		nmp->nm_acregmin = NFS_MINATTRTIMO;
607	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
608		nmp->nm_acregmax = argp->acregmax;
609	else
610		nmp->nm_acregmax = NFS_MAXATTRTIMO;
611	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
612		nmp->nm_acdirmin = argp->acdirmin;
613	else
614		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
615	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
616		nmp->nm_acdirmax = argp->acdirmax;
617	else
618		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
619	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
620		nmp->nm_acdirmin = nmp->nm_acdirmax;
621	if (nmp->nm_acregmin > nmp->nm_acregmax)
622		nmp->nm_acregmin = nmp->nm_acregmax;
623
624	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
625		if (argp->readahead <= NFS_MAXRAHEAD)
626			nmp->nm_readahead = argp->readahead;
627		else
628			nmp->nm_readahead = NFS_MAXRAHEAD;
629	}
630	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
631		if (argp->wcommitsize < nmp->nm_wsize)
632			nmp->nm_wcommitsize = nmp->nm_wsize;
633		else
634			nmp->nm_wcommitsize = argp->wcommitsize;
635	}
636
637	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
638		    (nmp->nm_soproto != argp->proto));
639
640	if (nmp->nm_client != NULL && adjsock) {
641		int haslock = 0, error = 0;
642
643		if (nmp->nm_sotype == SOCK_STREAM) {
644			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
645			if (!error)
646				haslock = 1;
647		}
648		if (!error) {
649		    newnfs_disconnect(&nmp->nm_sockreq);
650		    if (haslock)
651			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
652		    nmp->nm_sotype = argp->sotype;
653		    nmp->nm_soproto = argp->proto;
654		    if (nmp->nm_sotype == SOCK_DGRAM)
655			while (newnfs_connect(nmp, &nmp->nm_sockreq,
656			    cred, td, 0)) {
657				printf("newnfs_args: retrying connect\n");
658				(void) nfs_catnap(PSOCK, "newnfscon");
659			}
660		}
661	} else {
662		nmp->nm_sotype = argp->sotype;
663		nmp->nm_soproto = argp->proto;
664	}
665}
666
667static const char *nfs_opts[] = { "from",
668    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
669    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
670    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
671    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
672    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
673    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
674    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
675    NULL };
676
677/*
678 * VFS Operations.
679 *
680 * mount system call
681 * It seems a bit dumb to copyinstr() the host and path here and then
682 * bcopy() them in mountnfs(), but I wanted to detect errors before
683 * doing the sockargs() call because sockargs() allocates an mbuf and
684 * an error after that means that I have to release the mbuf.
685 */
686/* ARGSUSED */
687static int
688nfs_mount(struct mount *mp)
689{
690	struct nfs_args args = {
691	    .version = NFS_ARGSVERSION,
692	    .addr = NULL,
693	    .addrlen = sizeof (struct sockaddr_in),
694	    .sotype = SOCK_STREAM,
695	    .proto = 0,
696	    .fh = NULL,
697	    .fhsize = 0,
698	    .flags = 0,
699	    .wsize = NFS_WSIZE,
700	    .rsize = NFS_RSIZE,
701	    .readdirsize = NFS_READDIRSIZE,
702	    .timeo = 10,
703	    .retrans = NFS_RETRANS,
704	    .readahead = NFS_DEFRAHEAD,
705	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
706	    .hostname = NULL,
707	    /* args version 4 */
708	    .acregmin = NFS_MINATTRTIMO,
709	    .acregmax = NFS_MAXATTRTIMO,
710	    .acdirmin = NFS_MINDIRATTRTIMO,
711	    .acdirmax = NFS_MAXDIRATTRTIMO,
712	    .dirlen = 0,
713	    .krbnamelen = 0,
714	    .srvkrbnamelen = 0,
715	};
716	int error = 0, ret, len;
717	struct sockaddr *nam = NULL;
718	struct vnode *vp;
719	struct thread *td;
720	char hst[MNAMELEN];
721	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
722	char *opt, *name, *secname;
723
724	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
725		error = EINVAL;
726		goto out;
727	}
728
729	td = curthread;
730	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
731		error = ncl_mountroot(mp);
732		goto out;
733	}
734
735	nfscl_init();
736
737	/* Handle the new style options. */
738	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
739		args.flags |= NFSMNT_NOCONN;
740	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
741		args.flags |= NFSMNT_NOCONN;
742	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
743		args.flags |= NFSMNT_NOLOCKD;
744	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
745		args.flags &= ~NFSMNT_NOLOCKD;
746	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
747		args.flags |= NFSMNT_INT;
748	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
749		args.flags |= NFSMNT_RDIRPLUS;
750	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
751		args.flags |= NFSMNT_RESVPORT;
752	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
753		args.flags &= ~NFSMNT_RESVPORT;
754	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
755		args.flags |= NFSMNT_SOFT;
756	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
757		args.flags &= ~NFSMNT_SOFT;
758	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
759		args.sotype = SOCK_DGRAM;
760	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
761		args.sotype = SOCK_DGRAM;
762	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
763		args.sotype = SOCK_STREAM;
764	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
765		args.flags |= NFSMNT_NFSV3;
766	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
767		args.flags |= NFSMNT_NFSV4;
768		args.sotype = SOCK_STREAM;
769	}
770	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
771		args.flags |= NFSMNT_ALLGSSNAME;
772	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
773		if (opt == NULL) {
774			vfs_mount_error(mp, "illegal readdirsize");
775			error = EINVAL;
776			goto out;
777		}
778		ret = sscanf(opt, "%d", &args.readdirsize);
779		if (ret != 1 || args.readdirsize <= 0) {
780			vfs_mount_error(mp, "illegal readdirsize: %s",
781			    opt);
782			error = EINVAL;
783			goto out;
784		}
785		args.flags |= NFSMNT_READDIRSIZE;
786	}
787	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
788		if (opt == NULL) {
789			vfs_mount_error(mp, "illegal readahead");
790			error = EINVAL;
791			goto out;
792		}
793		ret = sscanf(opt, "%d", &args.readahead);
794		if (ret != 1 || args.readahead <= 0) {
795			vfs_mount_error(mp, "illegal readahead: %s",
796			    opt);
797			error = EINVAL;
798			goto out;
799		}
800		args.flags |= NFSMNT_READAHEAD;
801	}
802	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
803		if (opt == NULL) {
804			vfs_mount_error(mp, "illegal wsize");
805			error = EINVAL;
806			goto out;
807		}
808		ret = sscanf(opt, "%d", &args.wsize);
809		if (ret != 1 || args.wsize <= 0) {
810			vfs_mount_error(mp, "illegal wsize: %s",
811			    opt);
812			error = EINVAL;
813			goto out;
814		}
815		args.flags |= NFSMNT_WSIZE;
816	}
817	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
818		if (opt == NULL) {
819			vfs_mount_error(mp, "illegal rsize");
820			error = EINVAL;
821			goto out;
822		}
823		ret = sscanf(opt, "%d", &args.rsize);
824		if (ret != 1 || args.rsize <= 0) {
825			vfs_mount_error(mp, "illegal wsize: %s",
826			    opt);
827			error = EINVAL;
828			goto out;
829		}
830		args.flags |= NFSMNT_RSIZE;
831	}
832	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
833		if (opt == NULL) {
834			vfs_mount_error(mp, "illegal retrans");
835			error = EINVAL;
836			goto out;
837		}
838		ret = sscanf(opt, "%d", &args.retrans);
839		if (ret != 1 || args.retrans <= 0) {
840			vfs_mount_error(mp, "illegal retrans: %s",
841			    opt);
842			error = EINVAL;
843			goto out;
844		}
845		args.flags |= NFSMNT_RETRANS;
846	}
847	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
848		ret = sscanf(opt, "%d", &args.acregmin);
849		if (ret != 1 || args.acregmin < 0) {
850			vfs_mount_error(mp, "illegal acregmin: %s",
851			    opt);
852			error = EINVAL;
853			goto out;
854		}
855		args.flags |= NFSMNT_ACREGMIN;
856	}
857	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
858		ret = sscanf(opt, "%d", &args.acregmax);
859		if (ret != 1 || args.acregmax < 0) {
860			vfs_mount_error(mp, "illegal acregmax: %s",
861			    opt);
862			error = EINVAL;
863			goto out;
864		}
865		args.flags |= NFSMNT_ACREGMAX;
866	}
867	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
868		ret = sscanf(opt, "%d", &args.acdirmin);
869		if (ret != 1 || args.acdirmin < 0) {
870			vfs_mount_error(mp, "illegal acdirmin: %s",
871			    opt);
872			error = EINVAL;
873			goto out;
874		}
875		args.flags |= NFSMNT_ACDIRMIN;
876	}
877	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
878		ret = sscanf(opt, "%d", &args.acdirmax);
879		if (ret != 1 || args.acdirmax < 0) {
880			vfs_mount_error(mp, "illegal acdirmax: %s",
881			    opt);
882			error = EINVAL;
883			goto out;
884		}
885		args.flags |= NFSMNT_ACDIRMAX;
886	}
887	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
888		ret = sscanf(opt, "%d", &args.timeo);
889		if (ret != 1 || args.timeo <= 0) {
890			vfs_mount_error(mp, "illegal timeout: %s",
891			    opt);
892			error = EINVAL;
893			goto out;
894		}
895		args.flags |= NFSMNT_TIMEO;
896	}
897	if (vfs_getopt(mp->mnt_optnew, "sec",
898		(void **) &secname, NULL) == 0)
899		nfs_sec_name(secname, &args.flags);
900
901	if (mp->mnt_flag & MNT_UPDATE) {
902		struct nfsmount *nmp = VFSTONFS(mp);
903
904		if (nmp == NULL) {
905			error = EIO;
906			goto out;
907		}
908		/*
909		 * When doing an update, we can't change version,
910		 * security, switch lockd strategies or change cookie
911		 * translation
912		 */
913		args.flags = (args.flags &
914		    ~(NFSMNT_NFSV3 |
915		      NFSMNT_NFSV4 |
916		      NFSMNT_KERB |
917		      NFSMNT_INTEGRITY |
918		      NFSMNT_PRIVACY |
919		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
920		    (nmp->nm_flag &
921			(NFSMNT_NFSV3 |
922			 NFSMNT_NFSV4 |
923			 NFSMNT_KERB |
924			 NFSMNT_INTEGRITY |
925			 NFSMNT_PRIVACY |
926			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
927		nfs_decode_args(mp, nmp, &args, td->td_ucred, td);
928		goto out;
929	}
930
931	/*
932	 * Make the nfs_ip_paranoia sysctl serve as the default connection
933	 * or no-connection mode for those protocols that support
934	 * no-connection mode (the flag will be cleared later for protocols
935	 * that do not support no-connection mode).  This will allow a client
936	 * to receive replies from a different IP then the request was
937	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
938	 * not 0.
939	 */
940	if (nfs_ip_paranoia == 0)
941		args.flags |= NFSMNT_NOCONN;
942
943	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
944	    &args.fhsize) == 0) {
945		if (args.fhsize > NFSX_FHMAX) {
946			vfs_mount_error(mp, "Bad file handle");
947			error = EINVAL;
948			goto out;
949		}
950		bcopy(args.fh, nfh, args.fhsize);
951	} else {
952		args.fhsize = 0;
953	}
954
955	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
956	    &len);
957	if (args.hostname == NULL) {
958		vfs_mount_error(mp, "Invalid hostname");
959		error = EINVAL;
960		goto out;
961	}
962	bcopy(args.hostname, hst, MNAMELEN);
963	hst[MNAMELEN - 1] = '\0';
964
965	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
966		strlcpy(srvkrbname, name, sizeof (srvkrbname));
967	else
968		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
969	args.srvkrbnamelen = strlen(srvkrbname);
970
971	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
972		strlcpy(krbname, name, sizeof (krbname));
973	else
974		krbname[0] = '\0';
975	args.krbnamelen = strlen(krbname);
976
977	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
978		strlcpy(dirpath, name, sizeof (dirpath));
979	else
980		dirpath[0] = '\0';
981	args.dirlen = strlen(dirpath);
982
983	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
984	    &args.addrlen) == 0) {
985		if (args.addrlen > SOCK_MAXADDRLEN) {
986			error = ENAMETOOLONG;
987			goto out;
988		}
989		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
990		bcopy(args.addr, nam, args.addrlen);
991		nam->sa_len = args.addrlen;
992	}
993
994	args.fh = nfh;
995	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
996	    &vp, td->td_ucred, td);
997out:
998	if (!error) {
999		MNT_ILOCK(mp);
1000		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1001		MNT_IUNLOCK(mp);
1002	}
1003	return (error);
1004}
1005
1006
1007/*
1008 * VFS Operations.
1009 *
1010 * mount system call
1011 * It seems a bit dumb to copyinstr() the host and path here and then
1012 * bcopy() them in mountnfs(), but I wanted to detect errors before
1013 * doing the sockargs() call because sockargs() allocates an mbuf and
1014 * an error after that means that I have to release the mbuf.
1015 */
1016/* ARGSUSED */
1017static int
1018nfs_cmount(struct mntarg *ma, void *data, int flags)
1019{
1020	int error;
1021	struct nfs_args args;
1022
1023	error = copyin(data, &args, sizeof (struct nfs_args));
1024	if (error)
1025		return error;
1026
1027	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1028
1029	error = kernel_mount(ma, flags);
1030	return (error);
1031}
1032
1033/*
1034 * Common code for mount and mountroot
1035 */
1036static int
1037mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1038    char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
1039    struct vnode **vpp, struct ucred *cred, struct thread *td)
1040{
1041	struct nfsmount *nmp;
1042	struct nfsnode *np;
1043	int error, trycnt, ret, clearintr;
1044	struct nfsvattr nfsva;
1045	static u_int64_t clval = 0;
1046
1047	if (mp->mnt_flag & MNT_UPDATE) {
1048		nmp = VFSTONFS(mp);
1049		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1050		FREE(nam, M_SONAME);
1051		return (0);
1052	} else {
1053		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1054		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
1055		    M_NEWNFSMNT, M_WAITOK);
1056		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
1057		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
1058		TAILQ_INIT(&nmp->nm_bufq);
1059		if (clval == 0)
1060			clval = (u_int64_t)nfsboottime.tv_sec;
1061		nmp->nm_clval = clval++;
1062		nmp->nm_krbnamelen = argp->krbnamelen;
1063		nmp->nm_dirpathlen = argp->dirlen;
1064		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
1065		if (nmp->nm_dirpathlen > 0) {
1066			/*
1067			 * Since we will be doing dirpath as root,
1068			 * set nm_uid to the real uid doing the mount,
1069			 * since that is normally the user with a valid TGT.
1070			 */
1071			nmp->nm_uid = td->td_ucred->cr_ruid;
1072		} else {
1073			/*
1074			 * Just set to -1, so the first Op
1075			 * will set it later, to the uid of
1076			 * the process doing that (usually
1077			 * from a first open in the mount
1078			 * point).
1079			 */
1080			nmp->nm_uid = (uid_t)-1;
1081		}
1082
1083		/* Copy and null terminate all the names */
1084		if (nmp->nm_krbnamelen > 0) {
1085			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1086			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1087		}
1088		if (nmp->nm_dirpathlen > 0) {
1089			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1090			    nmp->nm_dirpathlen);
1091			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1092			    + 1] = '\0';
1093		}
1094		if (nmp->nm_srvkrbnamelen > 0) {
1095			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1096			    nmp->nm_srvkrbnamelen);
1097			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1098			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1099		}
1100		nmp->nm_sockreq.nr_cred = crhold(cred);
1101		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1102		mp->mnt_data = nmp;
1103	}
1104	vfs_getnewfsid(mp);
1105	nmp->nm_mountp = mp;
1106	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1107
1108	nfs_decode_args(mp, nmp, argp, cred, td);
1109
1110	/*
1111	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1112	 * high, depending on whether we end up with negative offsets in
1113	 * the client or server somewhere.  2GB-1 may be safer.
1114	 *
1115	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1116	 * that we can handle until we find out otherwise.
1117	 * XXX Our "safe" limit on the client is what we can store in our
1118	 * buffer cache using signed(!) block numbers.
1119	 */
1120	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1121		nmp->nm_maxfilesize = 0xffffffffLL;
1122	else
1123		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1124
1125	nmp->nm_timeo = NFS_TIMEO;
1126	nmp->nm_retry = NFS_RETRANS;
1127	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1128		nmp->nm_wsize = NFS_WSIZE;
1129		nmp->nm_rsize = NFS_RSIZE;
1130		nmp->nm_readdirsize = NFS_READDIRSIZE;
1131	}
1132	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1133	nmp->nm_numgrps = NFS_MAXGRPS;
1134	nmp->nm_readahead = NFS_DEFRAHEAD;
1135	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1136	if (nmp->nm_tprintf_delay < 0)
1137		nmp->nm_tprintf_delay = 0;
1138	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1139	if (nmp->nm_tprintf_initial_delay < 0)
1140		nmp->nm_tprintf_initial_delay = 0;
1141	nmp->nm_fhsize = argp->fhsize;
1142	if (nmp->nm_fhsize > 0)
1143		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1144	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1145	nmp->nm_nam = nam;
1146	/* Set up the sockets and per-host congestion */
1147	nmp->nm_sotype = argp->sotype;
1148	nmp->nm_soproto = argp->proto;
1149	nmp->nm_sockreq.nr_prog = NFS_PROG;
1150	if ((argp->flags & NFSMNT_NFSV4))
1151		nmp->nm_sockreq.nr_vers = NFS_VER4;
1152	else if ((argp->flags & NFSMNT_NFSV3))
1153		nmp->nm_sockreq.nr_vers = NFS_VER3;
1154	else
1155		nmp->nm_sockreq.nr_vers = NFS_VER2;
1156
1157
1158	/*
1159	 * For Connection based sockets (TCP,...) do the connect here,
1160	 * but make it interruptible, even for non-interuptible mounts.
1161	 */
1162	if ((nmp->nm_flag & NFSMNT_INT) == 0) {
1163		nmp->nm_flag |= NFSMNT_INT;
1164		clearintr = 1;
1165	} else {
1166		clearintr = 0;
1167	}
1168	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1169		goto bad;
1170	if (clearintr)
1171		nmp->nm_flag &= ~NFSMNT_INT;
1172
1173	/*
1174	 * A reference count is needed on the nfsnode representing the
1175	 * remote root.  If this object is not persistent, then backward
1176	 * traversals of the mount point (i.e. "..") will not work if
1177	 * the nfsnode gets flushed out of the cache. Ufs does not have
1178	 * this problem, because one can identify root inodes by their
1179	 * number == ROOTINO (2).
1180	 */
1181	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1182	    nmp->nm_dirpathlen > 0) {
1183		/*
1184		 * If the fhsize on the mount point == 0 for V4, the mount
1185		 * path needs to be looked up.
1186		 */
1187		trycnt = 3;
1188		do {
1189			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1190			    cred, td);
1191			if (error)
1192				(void) nfs_catnap(PZERO, "nfsgetdirp");
1193		} while (error && --trycnt > 0);
1194		if (error) {
1195			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1196			goto bad;
1197		}
1198	}
1199	if (nmp->nm_fhsize > 0) {
1200		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1201		if (error)
1202			goto bad;
1203		*vpp = NFSTOV(np);
1204
1205		/*
1206		 * Get file attributes and transfer parameters for the
1207		 * mountpoint.  This has the side effect of filling in
1208		 * (*vpp)->v_type with the correct value.
1209		 */
1210		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1211		    cred, td, &nfsva, NULL);
1212		if (ret) {
1213			/*
1214			 * Just set default values to get things going.
1215			 */
1216			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1217			nfsva.na_vattr.va_type = VDIR;
1218			nfsva.na_vattr.va_mode = 0777;
1219			nfsva.na_vattr.va_nlink = 100;
1220			nfsva.na_vattr.va_uid = (uid_t)0;
1221			nfsva.na_vattr.va_gid = (gid_t)0;
1222			nfsva.na_vattr.va_fileid = 2;
1223			nfsva.na_vattr.va_gen = 1;
1224			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1225			nfsva.na_vattr.va_size = 512 * 1024;
1226		}
1227		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1228		if (argp->flags & NFSMNT_NFSV3)
1229			ncl_fsinfo(nmp, *vpp, cred, td);
1230
1231		/*
1232		 * Lose the lock but keep the ref.
1233		 */
1234		VOP_UNLOCK(*vpp, 0);
1235		return (0);
1236	}
1237	error = EIO;
1238
1239bad:
1240	newnfs_disconnect(&nmp->nm_sockreq);
1241	crfree(nmp->nm_sockreq.nr_cred);
1242	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1243	mtx_destroy(&nmp->nm_mtx);
1244	FREE(nmp, M_NEWNFSMNT);
1245	FREE(nam, M_SONAME);
1246	return (error);
1247}
1248
1249/*
1250 * unmount system call
1251 */
1252static int
1253nfs_unmount(struct mount *mp, int mntflags)
1254{
1255	struct thread *td;
1256	struct nfsmount *nmp;
1257	int error, flags = 0, trycnt = 0;
1258
1259	td = curthread;
1260
1261	if (mntflags & MNT_FORCE)
1262		flags |= FORCECLOSE;
1263	nmp = VFSTONFS(mp);
1264	/*
1265	 * Goes something like this..
1266	 * - Call vflush() to clear out vnodes for this filesystem
1267	 * - Close the socket
1268	 * - Free up the data structures
1269	 */
1270	/* In the forced case, cancel any outstanding requests. */
1271	if (mntflags & MNT_FORCE) {
1272		error = newnfs_nmcancelreqs(nmp);
1273		if (error)
1274			goto out;
1275		/* For a forced close, get rid of the renew thread now */
1276		nfscl_umount(nmp, td);
1277	}
1278	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1279	do {
1280		error = vflush(mp, 1, flags, td);
1281		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1282			(void) nfs_catnap(PSOCK, "newndm");
1283	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1284	if (error)
1285		goto out;
1286
1287	/*
1288	 * We are now committed to the unmount.
1289	 */
1290	if ((mntflags & MNT_FORCE) == 0)
1291		nfscl_umount(nmp, td);
1292	newnfs_disconnect(&nmp->nm_sockreq);
1293	crfree(nmp->nm_sockreq.nr_cred);
1294	FREE(nmp->nm_nam, M_SONAME);
1295
1296	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1297	mtx_destroy(&nmp->nm_mtx);
1298	FREE(nmp, M_NEWNFSMNT);
1299out:
1300	return (error);
1301}
1302
1303/*
1304 * Return root of a filesystem
1305 */
1306static int
1307nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1308{
1309	struct vnode *vp;
1310	struct nfsmount *nmp;
1311	struct nfsnode *np;
1312	int error;
1313
1314	nmp = VFSTONFS(mp);
1315	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1316	if (error)
1317		return error;
1318	vp = NFSTOV(np);
1319	/*
1320	 * Get transfer parameters and attributes for root vnode once.
1321	 */
1322	mtx_lock(&nmp->nm_mtx);
1323	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1324		mtx_unlock(&nmp->nm_mtx);
1325		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1326	} else
1327		mtx_unlock(&nmp->nm_mtx);
1328	if (vp->v_type == VNON)
1329	    vp->v_type = VDIR;
1330	vp->v_vflag |= VV_ROOT;
1331	*vpp = vp;
1332	return (0);
1333}
1334
1335/*
1336 * Flush out the buffer cache
1337 */
1338/* ARGSUSED */
1339static int
1340nfs_sync(struct mount *mp, int waitfor)
1341{
1342	struct vnode *vp, *mvp;
1343	struct thread *td;
1344	int error, allerror = 0;
1345
1346	td = curthread;
1347
1348	/*
1349	 * Force stale buffer cache information to be flushed.
1350	 */
1351	MNT_ILOCK(mp);
1352loop:
1353	MNT_VNODE_FOREACH(vp, mp, mvp) {
1354		VI_LOCK(vp);
1355		MNT_IUNLOCK(mp);
1356		/* XXX Racy bv_cnt check. */
1357		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1358		    waitfor == MNT_LAZY) {
1359			VI_UNLOCK(vp);
1360			MNT_ILOCK(mp);
1361			continue;
1362		}
1363		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1364			MNT_ILOCK(mp);
1365			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1366			goto loop;
1367		}
1368		error = VOP_FSYNC(vp, waitfor, td);
1369		if (error)
1370			allerror = error;
1371		VOP_UNLOCK(vp, 0);
1372		vrele(vp);
1373
1374		MNT_ILOCK(mp);
1375	}
1376	MNT_IUNLOCK(mp);
1377	return (allerror);
1378}
1379
1380static int
1381nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1382{
1383	struct nfsmount *nmp = VFSTONFS(mp);
1384	struct vfsquery vq;
1385	int error;
1386
1387	bzero(&vq, sizeof(vq));
1388	switch (op) {
1389#if 0
1390	case VFS_CTL_NOLOCKS:
1391		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1392 		if (req->oldptr != NULL) {
1393 			error = SYSCTL_OUT(req, &val, sizeof(val));
1394 			if (error)
1395 				return (error);
1396 		}
1397 		if (req->newptr != NULL) {
1398 			error = SYSCTL_IN(req, &val, sizeof(val));
1399 			if (error)
1400 				return (error);
1401			if (val)
1402				nmp->nm_flag |= NFSMNT_NOLOCKS;
1403			else
1404				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1405 		}
1406		break;
1407#endif
1408	case VFS_CTL_QUERY:
1409		mtx_lock(&nmp->nm_mtx);
1410		if (nmp->nm_state & NFSSTA_TIMEO)
1411			vq.vq_flags |= VQ_NOTRESP;
1412		mtx_unlock(&nmp->nm_mtx);
1413#if 0
1414		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1415		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1416			vq.vq_flags |= VQ_NOTRESPLOCK;
1417#endif
1418		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1419		break;
1420 	case VFS_CTL_TIMEO:
1421 		if (req->oldptr != NULL) {
1422 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1423 			    sizeof(nmp->nm_tprintf_initial_delay));
1424 			if (error)
1425 				return (error);
1426 		}
1427 		if (req->newptr != NULL) {
1428			error = vfs_suser(mp, req->td);
1429			if (error)
1430				return (error);
1431 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1432 			    sizeof(nmp->nm_tprintf_initial_delay));
1433 			if (error)
1434 				return (error);
1435 			if (nmp->nm_tprintf_initial_delay < 0)
1436 				nmp->nm_tprintf_initial_delay = 0;
1437 		}
1438		break;
1439	default:
1440		return (ENOTSUP);
1441	}
1442	return (0);
1443}
1444
1445