Deleted Added
full compact
nfs_vfsops.c (176374) nfs_vfsops.c (176559)
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_vfsops.c 176374 2008-02-17 22:32:08Z yar $");
36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_vfsops.c 176559 2008-02-25 18:45:57Z attilio $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/lock.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/mount.h>
53#include <sys/proc.h>
54#include <sys/socket.h>
55#include <sys/socketvar.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/vnode.h>
59#include <sys/signalvar.h>
60
61#include <vm/vm.h>
62#include <vm/vm_extern.h>
63#include <vm/uma.h>
64
65#include <net/if.h>
66#include <net/route.h>
67#include <netinet/in.h>
68
69#include <rpc/rpcclnt.h>
70
71#include <nfs/rpcv2.h>
72#include <nfs/nfsproto.h>
73#include <nfsclient/nfs.h>
74#include <nfsclient/nfsnode.h>
75#include <nfsclient/nfsmount.h>
76#include <nfs/xdr_subs.h>
77#include <nfsclient/nfsm_subs.h>
78#include <nfsclient/nfsdiskless.h>
79
80MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
81MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
82MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
83MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
84MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
85
86uma_zone_t nfsmount_zone;
87
88struct nfsstats nfsstats;
89
90SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
91SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
92 &nfsstats, nfsstats, "S,nfsstats");
93static int nfs_ip_paranoia = 1;
94SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95 &nfs_ip_paranoia, 0, "");
96#ifdef NFS_DEBUG
97int nfs_debug;
98SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, "");
99#endif
100static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
101SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
102 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
103/* how long between console messages "nfs server foo not responding" */
104static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
105SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
106 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
107
108static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
109 struct nfs_args *argp);
110static int mountnfs(struct nfs_args *, struct mount *,
111 struct sockaddr *, char *, struct vnode **,
112 struct ucred *cred);
113static vfs_mount_t nfs_mount;
114static vfs_cmount_t nfs_cmount;
115static vfs_unmount_t nfs_unmount;
116static vfs_root_t nfs_root;
117static vfs_statfs_t nfs_statfs;
118static vfs_sync_t nfs_sync;
119static vfs_sysctl_t nfs_sysctl;
120
121/*
122 * nfs vfs operations.
123 */
124static struct vfsops nfs_vfsops = {
125 .vfs_init = nfs_init,
126 .vfs_mount = nfs_mount,
127 .vfs_cmount = nfs_cmount,
128 .vfs_root = nfs_root,
129 .vfs_statfs = nfs_statfs,
130 .vfs_sync = nfs_sync,
131 .vfs_uninit = nfs_uninit,
132 .vfs_unmount = nfs_unmount,
133 .vfs_sysctl = nfs_sysctl,
134};
135VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
136
137/* So that loader and kldload(2) can find us, wherever we are.. */
138MODULE_VERSION(nfs, 1);
139
140static struct nfs_rpcops nfs_rpcops = {
141 nfs_readrpc,
142 nfs_writerpc,
143 nfs_writebp,
144 nfs_readlinkrpc,
145 nfs_invaldir,
146 nfs_commit,
147};
148
149/*
150 * This structure must be filled in by a primary bootstrap or bootstrap
151 * server for a diskless/dataless machine. It is initialized below just
152 * to ensure that it is allocated to initialized data (.data not .bss).
153 */
154struct nfs_diskless nfs_diskless = { { { 0 } } };
155struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
156int nfs_diskless_valid = 0;
157
158SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
159 &nfs_diskless_valid, 0, "");
160
161SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
162 nfsv3_diskless.root_hostnam, 0, "");
163
164SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
165 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
166 "%Ssockaddr_in", "");
167
168
169void nfsargs_ntoh(struct nfs_args *);
170static int nfs_mountdiskless(char *,
171 struct sockaddr_in *, struct nfs_args *,
172 struct thread *, struct vnode **, struct mount *);
173static void nfs_convert_diskless(void);
174static void nfs_convert_oargs(struct nfs_args *args,
175 struct onfs_args *oargs);
176
177int
178nfs_iosize(struct nfsmount *nmp)
179{
180 int iosize;
181
182 /*
183 * Calculate the size used for io buffers. Use the larger
184 * of the two sizes to minimise nfs requests but make sure
185 * that it is at least one VM page to avoid wasting buffer
186 * space.
187 */
188 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
189 iosize = imax(iosize, PAGE_SIZE);
190 return (iosize);
191}
192
193static void
194nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
195{
196
197 args->version = NFS_ARGSVERSION;
198 args->addr = oargs->addr;
199 args->addrlen = oargs->addrlen;
200 args->sotype = oargs->sotype;
201 args->proto = oargs->proto;
202 args->fh = oargs->fh;
203 args->fhsize = oargs->fhsize;
204 args->flags = oargs->flags;
205 args->wsize = oargs->wsize;
206 args->rsize = oargs->rsize;
207 args->readdirsize = oargs->readdirsize;
208 args->timeo = oargs->timeo;
209 args->retrans = oargs->retrans;
210 args->maxgrouplist = oargs->maxgrouplist;
211 args->readahead = oargs->readahead;
212 args->deadthresh = oargs->deadthresh;
213 args->hostname = oargs->hostname;
214}
215
216static void
217nfs_convert_diskless(void)
218{
219
220 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
221 sizeof(struct ifaliasreq));
222 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
223 sizeof(struct sockaddr_in));
224 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
225 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
226 nfsv3_diskless.root_fhsize = NFSX_V3FH;
227 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
228 } else {
229 nfsv3_diskless.root_fhsize = NFSX_V2FH;
230 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
231 }
232 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
233 sizeof(struct sockaddr_in));
234 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
235 nfsv3_diskless.root_time = nfs_diskless.root_time;
236 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
237 MAXHOSTNAMELEN);
238 nfs_diskless_valid = 3;
239}
240
241/*
242 * nfs statfs call
243 */
244static int
245nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
246{
247 struct vnode *vp;
248 struct nfs_statfs *sfp;
249 caddr_t bpos, dpos;
250 struct nfsmount *nmp = VFSTONFS(mp);
251 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
252 struct mbuf *mreq, *mrep, *md, *mb;
253 struct nfsnode *np;
254 u_quad_t tquad;
255
256#ifndef nolint
257 sfp = NULL;
258#endif
259 error = vfs_busy(mp, LK_NOWAIT, NULL, td);
260 if (error)
261 return (error);
262 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
263 if (error) {
264 vfs_unbusy(mp, td);
265 return (error);
266 }
267 vp = NFSTOV(np);
268 mtx_lock(&nmp->nm_mtx);
269 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
270 mtx_unlock(&nmp->nm_mtx);
271 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
272 } else
273 mtx_unlock(&nmp->nm_mtx);
274 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
275 mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
276 mb = mreq;
277 bpos = mtod(mb, caddr_t);
278 nfsm_fhtom(vp, v3);
279 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
280 if (v3)
281 nfsm_postop_attr(vp, retattr);
282 if (error) {
283 if (mrep != NULL)
284 m_freem(mrep);
285 goto nfsmout;
286 }
287 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
288 mtx_lock(&nmp->nm_mtx);
289 sbp->f_iosize = nfs_iosize(nmp);
290 mtx_unlock(&nmp->nm_mtx);
291 if (v3) {
292 sbp->f_bsize = NFS_FABLKSIZE;
293 tquad = fxdr_hyper(&sfp->sf_tbytes);
294 sbp->f_blocks = tquad / NFS_FABLKSIZE;
295 tquad = fxdr_hyper(&sfp->sf_fbytes);
296 sbp->f_bfree = tquad / NFS_FABLKSIZE;
297 tquad = fxdr_hyper(&sfp->sf_abytes);
298 sbp->f_bavail = tquad / NFS_FABLKSIZE;
299 sbp->f_files = (fxdr_unsigned(int32_t,
300 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
301 sbp->f_ffree = (fxdr_unsigned(int32_t,
302 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
303 } else {
304 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
305 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
306 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
307 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
308 sbp->f_files = 0;
309 sbp->f_ffree = 0;
310 }
311 m_freem(mrep);
312nfsmout:
313 vput(vp);
314 vfs_unbusy(mp, td);
315 return (error);
316}
317
318/*
319 * nfs version 3 fsinfo rpc call
320 */
321int
322nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
323 struct thread *td)
324{
325 struct nfsv3_fsinfo *fsp;
326 u_int32_t pref, max;
327 caddr_t bpos, dpos;
328 int error = 0, retattr;
329 struct mbuf *mreq, *mrep, *md, *mb;
330 u_int64_t maxfsize;
331
332 nfsstats.rpccnt[NFSPROC_FSINFO]++;
333 mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
334 mb = mreq;
335 bpos = mtod(mb, caddr_t);
336 nfsm_fhtom(vp, 1);
337 nfsm_request(vp, NFSPROC_FSINFO, td, cred);
338 nfsm_postop_attr(vp, retattr);
339 if (!error) {
340 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
341 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
342 mtx_lock(&nmp->nm_mtx);
343 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
344 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
345 ~(NFS_FABLKSIZE - 1);
346 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
347 if (max < nmp->nm_wsize && max > 0) {
348 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
349 if (nmp->nm_wsize == 0)
350 nmp->nm_wsize = max;
351 }
352 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
353 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
354 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
355 ~(NFS_FABLKSIZE - 1);
356 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
357 if (max < nmp->nm_rsize && max > 0) {
358 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
359 if (nmp->nm_rsize == 0)
360 nmp->nm_rsize = max;
361 }
362 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
363 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
364 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
365 ~(NFS_DIRBLKSIZ - 1);
366 if (max < nmp->nm_readdirsize && max > 0) {
367 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
368 if (nmp->nm_readdirsize == 0)
369 nmp->nm_readdirsize = max;
370 }
371 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
372 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
373 nmp->nm_maxfilesize = maxfsize;
374 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
375 nmp->nm_state |= NFSSTA_GOTFSINFO;
376 mtx_unlock(&nmp->nm_mtx);
377 }
378 m_freem(mrep);
379nfsmout:
380 return (error);
381}
382
383/*
384 * Mount a remote root fs via. nfs. This depends on the info in the
385 * nfs_diskless structure that has been filled in properly by some primary
386 * bootstrap.
387 * It goes something like this:
388 * - do enough of "ifconfig" by calling ifioctl() so that the system
389 * can talk to the server
390 * - If nfs_diskless.mygateway is filled in, use that address as
391 * a default gateway.
392 * - build the rootfs mount point and call mountnfs() to do the rest.
393 *
394 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
395 * structure, as well as other global NFS client variables here, as
396 * nfs_mountroot() will be called once in the boot before any other NFS
397 * client activity occurs.
398 */
399int
400nfs_mountroot(struct mount *mp, struct thread *td)
401{
402 struct nfsv3_diskless *nd = &nfsv3_diskless;
403 struct socket *so;
404 struct vnode *vp;
405 struct ifreq ir;
406 int error, i;
407 u_long l;
408 char buf[128];
409 char *cp;
410
411#if defined(BOOTP_NFSROOT) && defined(BOOTP)
412 bootpc_init(); /* use bootp to get nfs_diskless filled in */
413#elif defined(NFS_ROOT)
414 nfs_setup_diskless();
415#endif
416
417 if (nfs_diskless_valid == 0)
418 return (-1);
419 if (nfs_diskless_valid == 1)
420 nfs_convert_diskless();
421
422 /*
423 * XXX splnet, so networks will receive...
424 */
425 splnet();
426
427 /*
428 * Do enough of ifconfig(8) so that the critical net interface can
429 * talk to the server.
430 */
431 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
432 td->td_ucred, td);
433 if (error)
434 panic("nfs_mountroot: socreate(%04x): %d",
435 nd->myif.ifra_addr.sa_family, error);
436
437#if 0 /* XXX Bad idea */
438 /*
439 * We might not have been told the right interface, so we pass
440 * over the first ten interfaces of the same kind, until we get
441 * one of them configured.
442 */
443
444 for (i = strlen(nd->myif.ifra_name) - 1;
445 nd->myif.ifra_name[i] >= '0' &&
446 nd->myif.ifra_name[i] <= '9';
447 nd->myif.ifra_name[i] ++) {
448 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
449 if(!error)
450 break;
451 }
452#endif
453 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
454 if (error)
455 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
456 if ((cp = getenv("boot.netif.mtu")) != NULL) {
457 ir.ifr_mtu = strtol(cp, NULL, 10);
458 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
459 freeenv(cp);
460 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
461 if (error)
462 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
463 }
464 soclose(so);
465
466 /*
467 * If the gateway field is filled in, set it as the default route.
468 * Note that pxeboot will set a default route of 0 if the route
469 * is not set by the DHCP server. Check also for a value of 0
470 * to avoid panicking inappropriately in that situation.
471 */
472 if (nd->mygateway.sin_len != 0 &&
473 nd->mygateway.sin_addr.s_addr != 0) {
474 struct sockaddr_in mask, sin;
475
476 bzero((caddr_t)&mask, sizeof(mask));
477 sin = mask;
478 sin.sin_family = AF_INET;
479 sin.sin_len = sizeof(sin);
480 error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
481 (struct sockaddr *)&nd->mygateway,
482 (struct sockaddr *)&mask,
483 RTF_UP | RTF_GATEWAY, NULL);
484 if (error)
485 panic("nfs_mountroot: RTM_ADD: %d", error);
486 }
487
488 /*
489 * Create the rootfs mount point.
490 */
491 nd->root_args.fh = nd->root_fh;
492 nd->root_args.fhsize = nd->root_fhsize;
493 l = ntohl(nd->root_saddr.sin_addr.s_addr);
494 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
495 (l >> 24) & 0xff, (l >> 16) & 0xff,
496 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
497 printf("NFS ROOT: %s\n", buf);
498 if ((error = nfs_mountdiskless(buf,
499 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
500 return (error);
501 }
502
503 /*
504 * This is not really an nfs issue, but it is much easier to
505 * set hostname here and then let the "/etc/rc.xxx" files
506 * mount the right /var based upon its preset value.
507 */
508 bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
509 hostname[MAXHOSTNAMELEN - 1] = '\0';
510 for (i = 0; i < MAXHOSTNAMELEN; i++)
511 if (hostname[i] == '\0')
512 break;
513 inittodr(ntohl(nd->root_time));
514 return (0);
515}
516
517/*
518 * Internal version of mount system call for diskless setup.
519 */
520static int
521nfs_mountdiskless(char *path,
522 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
523 struct vnode **vpp, struct mount *mp)
524{
525 struct sockaddr *nam;
526 int error;
527
528 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
529 if ((error = mountnfs(args, mp, nam, path, vpp,
530 td->td_ucred)) != 0) {
531 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
532 return (error);
533 }
534 return (0);
535}
536
537static void
538nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp)
539{
540 int s;
541 int adjsock;
542 int maxio;
543
544 s = splnet();
545
546 /*
547 * Set read-only flag if requested; otherwise, clear it if this is
548 * an update. If this is not an update, then either the read-only
549 * flag is already clear, or this is a root mount and it was set
550 * intentionally at some previous point.
551 */
552 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
553 MNT_ILOCK(mp);
554 mp->mnt_flag |= MNT_RDONLY;
555 MNT_IUNLOCK(mp);
556 } else if (mp->mnt_flag & MNT_UPDATE) {
557 MNT_ILOCK(mp);
558 mp->mnt_flag &= ~MNT_RDONLY;
559 MNT_IUNLOCK(mp);
560 }
561
562 /*
563 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
564 * no sense in that context. Also, set up appropriate retransmit
565 * and soft timeout behavior.
566 */
567 if (argp->sotype == SOCK_STREAM) {
568 nmp->nm_flag &= ~NFSMNT_NOCONN;
569 nmp->nm_flag |= NFSMNT_DUMBTIMR;
570 nmp->nm_timeo = NFS_MAXTIMEO;
571 nmp->nm_retry = NFS_RETRANS_TCP;
572 }
573
574 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
575 if ((argp->flags & NFSMNT_NFSV3) == 0)
576 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
577
578 /* Re-bind if rsrvd port requested and wasn't on one */
579 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
580 && (argp->flags & NFSMNT_RESVPORT);
581 /* Also re-bind if we're switching to/from a connected UDP socket */
582 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
583 (argp->flags & NFSMNT_NOCONN));
584
585 /* Update flags atomically. Don't change the lock bits. */
586 nmp->nm_flag = argp->flags | nmp->nm_flag;
587 splx(s);
588
589 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
590 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
591 if (nmp->nm_timeo < NFS_MINTIMEO)
592 nmp->nm_timeo = NFS_MINTIMEO;
593 else if (nmp->nm_timeo > NFS_MAXTIMEO)
594 nmp->nm_timeo = NFS_MAXTIMEO;
595 }
596
597 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
598 nmp->nm_retry = argp->retrans;
599 if (nmp->nm_retry > NFS_MAXREXMIT)
600 nmp->nm_retry = NFS_MAXREXMIT;
601 }
602
603 if (argp->flags & NFSMNT_NFSV3) {
604 if (argp->sotype == SOCK_DGRAM)
605 maxio = NFS_MAXDGRAMDATA;
606 else
607 maxio = NFS_MAXDATA;
608 } else
609 maxio = NFS_V2MAXDATA;
610
611 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
612 nmp->nm_wsize = argp->wsize;
613 /* Round down to multiple of blocksize */
614 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
615 if (nmp->nm_wsize <= 0)
616 nmp->nm_wsize = NFS_FABLKSIZE;
617 }
618 if (nmp->nm_wsize > maxio)
619 nmp->nm_wsize = maxio;
620 if (nmp->nm_wsize > MAXBSIZE)
621 nmp->nm_wsize = MAXBSIZE;
622
623 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
624 nmp->nm_rsize = argp->rsize;
625 /* Round down to multiple of blocksize */
626 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
627 if (nmp->nm_rsize <= 0)
628 nmp->nm_rsize = NFS_FABLKSIZE;
629 }
630 if (nmp->nm_rsize > maxio)
631 nmp->nm_rsize = maxio;
632 if (nmp->nm_rsize > MAXBSIZE)
633 nmp->nm_rsize = MAXBSIZE;
634
635 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
636 nmp->nm_readdirsize = argp->readdirsize;
637 }
638 if (nmp->nm_readdirsize > maxio)
639 nmp->nm_readdirsize = maxio;
640 if (nmp->nm_readdirsize > nmp->nm_rsize)
641 nmp->nm_readdirsize = nmp->nm_rsize;
642
643 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
644 nmp->nm_acregmin = argp->acregmin;
645 else
646 nmp->nm_acregmin = NFS_MINATTRTIMO;
647 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
648 nmp->nm_acregmax = argp->acregmax;
649 else
650 nmp->nm_acregmax = NFS_MAXATTRTIMO;
651 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
652 nmp->nm_acdirmin = argp->acdirmin;
653 else
654 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
655 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
656 nmp->nm_acdirmax = argp->acdirmax;
657 else
658 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
659 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
660 nmp->nm_acdirmin = nmp->nm_acdirmax;
661 if (nmp->nm_acregmin > nmp->nm_acregmax)
662 nmp->nm_acregmin = nmp->nm_acregmax;
663
664 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
665 if (argp->maxgrouplist <= NFS_MAXGRPS)
666 nmp->nm_numgrps = argp->maxgrouplist;
667 else
668 nmp->nm_numgrps = NFS_MAXGRPS;
669 }
670 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
671 if (argp->readahead <= NFS_MAXRAHEAD)
672 nmp->nm_readahead = argp->readahead;
673 else
674 nmp->nm_readahead = NFS_MAXRAHEAD;
675 }
676 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
677 if (argp->wcommitsize < nmp->nm_wsize)
678 nmp->nm_wcommitsize = nmp->nm_wsize;
679 else
680 nmp->nm_wcommitsize = argp->wcommitsize;
681 }
682 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
683 if (argp->deadthresh <= NFS_MAXDEADTHRESH)
684 nmp->nm_deadthresh = argp->deadthresh;
685 else
686 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
687 }
688
689 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
690 (nmp->nm_soproto != argp->proto));
691 nmp->nm_sotype = argp->sotype;
692 nmp->nm_soproto = argp->proto;
693
694 if (nmp->nm_so && adjsock) {
695 nfs_safedisconnect(nmp);
696 if (nmp->nm_sotype == SOCK_DGRAM)
697 while (nfs_connect(nmp, NULL)) {
698 printf("nfs_args: retrying connect\n");
699 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
700 }
701 }
702}
703
704static const char *nfs_opts[] = { "from", "nfs_args",
705 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
706 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
707 "async", NULL };
708
709/*
710 * VFS Operations.
711 *
712 * mount system call
713 * It seems a bit dumb to copyinstr() the host and path here and then
714 * bcopy() them in mountnfs(), but I wanted to detect errors before
715 * doing the sockargs() call because sockargs() allocates an mbuf and
716 * an error after that means that I have to release the mbuf.
717 */
718/* ARGSUSED */
719static int
720nfs_mount(struct mount *mp, struct thread *td)
721{
722 int error;
723 struct nfs_args args;
724 struct sockaddr *nam;
725 struct vnode *vp;
726 char hst[MNAMELEN];
727 size_t len;
728 u_char nfh[NFSX_V3FHMAX];
729
730 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
731 error = EINVAL;
732 goto out;
733 }
734
735 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
736 error = nfs_mountroot(mp, td);
737 goto out;
738 }
739
740 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args);
741 if (error)
742 goto out;
743
744 if (args.version != NFS_ARGSVERSION) {
745 error = EPROGMISMATCH;
746 goto out;
747 }
748
749 if (mp->mnt_flag & MNT_UPDATE) {
750 struct nfsmount *nmp = VFSTONFS(mp);
751
752 if (nmp == NULL) {
753 error = EIO;
754 goto out;
755 }
756 /*
757 * When doing an update, we can't change from or to
758 * v3, switch lockd strategies or change cookie translation
759 */
760 args.flags = (args.flags &
761 ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
762 (nmp->nm_flag &
763 (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
764 nfs_decode_args(mp, nmp, &args);
765 goto out;
766 }
767
768 /*
769 * Make the nfs_ip_paranoia sysctl serve as the default connection
770 * or no-connection mode for those protocols that support
771 * no-connection mode (the flag will be cleared later for protocols
772 * that do not support no-connection mode). This will allow a client
773 * to receive replies from a different IP then the request was
774 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
775 * not 0.
776 */
777 if (nfs_ip_paranoia == 0)
778 args.flags |= NFSMNT_NOCONN;
779 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
780 error = EINVAL;
781 goto out;
782 }
783 error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize);
784 if (error)
785 goto out;
786 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
787 if (error)
788 goto out;
789 bzero(&hst[len], MNAMELEN - len);
790 /* sockargs() call must be after above copyin() calls */
791 error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen);
792 if (error)
793 goto out;
794 args.fh = nfh;
795 error = mountnfs(&args, mp, nam, hst, &vp, td->td_ucred);
796out:
797 if (!error) {
798 MNT_ILOCK(mp);
799 mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
800 MNT_IUNLOCK(mp);
801 }
802 return (error);
803}
804
805
806/*
807 * VFS Operations.
808 *
809 * mount system call
810 * It seems a bit dumb to copyinstr() the host and path here and then
811 * bcopy() them in mountnfs(), but I wanted to detect errors before
812 * doing the sockargs() call because sockargs() allocates an mbuf and
813 * an error after that means that I have to release the mbuf.
814 */
815/* ARGSUSED */
816static int
817nfs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
818{
819 int error;
820 struct nfs_args args;
821
822 error = copyin(data, &args, sizeof (struct nfs_args));
823 if (error)
824 return error;
825
826 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
827
828 error = kernel_mount(ma, flags);
829 return (error);
830}
831
832/*
833 * Common code for mount and mountroot
834 */
835static int
836mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
837 char *hst, struct vnode **vpp, struct ucred *cred)
838{
839 struct nfsmount *nmp;
840 struct nfsnode *np;
841 int error;
842 struct vattr attrs;
843
844 if (mp->mnt_flag & MNT_UPDATE) {
845 nmp = VFSTONFS(mp);
846 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
847 FREE(nam, M_SONAME);
848 return (0);
849 } else {
850 nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
851 bzero((caddr_t)nmp, sizeof (struct nfsmount));
852 TAILQ_INIT(&nmp->nm_bufq);
853 mp->mnt_data = nmp;
854 }
855 vfs_getnewfsid(mp);
856 nmp->nm_mountp = mp;
857 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
858
859 /*
860 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
861 * high, depending on whether we end up with negative offsets in
862 * the client or server somewhere. 2GB-1 may be safer.
863 *
864 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
865 * that we can handle until we find out otherwise.
866 * XXX Our "safe" limit on the client is what we can store in our
867 * buffer cache using signed(!) block numbers.
868 */
869 if ((argp->flags & NFSMNT_NFSV3) == 0)
870 nmp->nm_maxfilesize = 0xffffffffLL;
871 else
872 nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
873
874 nmp->nm_timeo = NFS_TIMEO;
875 nmp->nm_retry = NFS_RETRANS;
876 if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) {
877 nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA;
878 } else {
879 nmp->nm_wsize = NFS_WSIZE;
880 nmp->nm_rsize = NFS_RSIZE;
881 }
882 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
883 nmp->nm_readdirsize = NFS_READDIRSIZE;
884 nmp->nm_numgrps = NFS_MAXGRPS;
885 nmp->nm_readahead = NFS_DEFRAHEAD;
886 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
887 nmp->nm_tprintf_delay = nfs_tprintf_delay;
888 if (nmp->nm_tprintf_delay < 0)
889 nmp->nm_tprintf_delay = 0;
890 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
891 if (nmp->nm_tprintf_initial_delay < 0)
892 nmp->nm_tprintf_initial_delay = 0;
893 nmp->nm_fhsize = argp->fhsize;
894 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
895 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
896 nmp->nm_nam = nam;
897 /* Set up the sockets and per-host congestion */
898 nmp->nm_sotype = argp->sotype;
899 nmp->nm_soproto = argp->proto;
900 nmp->nm_rpcops = &nfs_rpcops;
901
902 nfs_decode_args(mp, nmp, argp);
903
904 /*
905 * For Connection based sockets (TCP,...) defer the connect until
906 * the first request, in case the server is not responding.
907 */
908 if (nmp->nm_sotype == SOCK_DGRAM &&
909 (error = nfs_connect(nmp, NULL)))
910 goto bad;
911
912 /*
913 * This is silly, but it has to be set so that vinifod() works.
914 * We do not want to do an nfs_statfs() here since we can get
915 * stuck on a dead server and we are holding a lock on the mount
916 * point.
917 */
918 mtx_lock(&nmp->nm_mtx);
919 mp->mnt_stat.f_iosize = nfs_iosize(nmp);
920 mtx_unlock(&nmp->nm_mtx);
921 /*
922 * A reference count is needed on the nfsnode representing the
923 * remote root. If this object is not persistent, then backward
924 * traversals of the mount point (i.e. "..") will not work if
925 * the nfsnode gets flushed out of the cache. Ufs does not have
926 * this problem, because one can identify root inodes by their
927 * number == ROOTINO (2).
928 */
929 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
930 if (error)
931 goto bad;
932 *vpp = NFSTOV(np);
933
934 /*
935 * Get file attributes and transfer parameters for the
936 * mountpoint. This has the side effect of filling in
937 * (*vpp)->v_type with the correct value.
938 */
939 if (argp->flags & NFSMNT_NFSV3)
940 nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread);
941 else
942 VOP_GETATTR(*vpp, &attrs, curthread->td_ucred, curthread);
943
944 /*
945 * Lose the lock but keep the ref.
946 */
947 VOP_UNLOCK(*vpp, 0);
948
949 return (0);
950bad:
951 nfs_disconnect(nmp);
952 mtx_destroy(&nmp->nm_mtx);
953 uma_zfree(nfsmount_zone, nmp);
954 FREE(nam, M_SONAME);
955 return (error);
956}
957
958/*
959 * unmount system call
960 */
961static int
962nfs_unmount(struct mount *mp, int mntflags, struct thread *td)
963{
964 struct nfsmount *nmp;
965 int error, flags = 0;
966
967 if (mntflags & MNT_FORCE)
968 flags |= FORCECLOSE;
969 nmp = VFSTONFS(mp);
970 /*
971 * Goes something like this..
972 * - Call vflush() to clear out vnodes for this filesystem
973 * - Close the socket
974 * - Free up the data structures
975 */
976 /* In the forced case, cancel any outstanding requests. */
977 if (flags & FORCECLOSE) {
978 error = nfs_nmcancelreqs(nmp);
979 if (error)
980 goto out;
981 }
982 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
983 error = vflush(mp, 1, flags, td);
984 if (error)
985 goto out;
986
987 /*
988 * We are now committed to the unmount.
989 */
990 nfs_disconnect(nmp);
991 FREE(nmp->nm_nam, M_SONAME);
992
993 mtx_destroy(&nmp->nm_mtx);
994 uma_zfree(nfsmount_zone, nmp);
995out:
996 return (error);
997}
998
999/*
1000 * Return root of a filesystem
1001 */
1002static int
1003nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
1004{
1005 struct vnode *vp;
1006 struct nfsmount *nmp;
1007 struct nfsnode *np;
1008 int error;
1009
1010 nmp = VFSTONFS(mp);
1011 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1012 if (error)
1013 return error;
1014 vp = NFSTOV(np);
1015 /*
1016 * Get transfer parameters and attributes for root vnode once.
1017 */
1018 mtx_lock(&nmp->nm_mtx);
1019 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
1020 (nmp->nm_flag & NFSMNT_NFSV3)) {
1021 mtx_unlock(&nmp->nm_mtx);
1022 nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1023 } else
1024 mtx_unlock(&nmp->nm_mtx);
1025 if (vp->v_type == VNON)
1026 vp->v_type = VDIR;
1027 vp->v_vflag |= VV_ROOT;
1028 *vpp = vp;
1029 return (0);
1030}
1031
1032/*
1033 * Flush out the buffer cache
1034 */
1035/* ARGSUSED */
1036static int
1037nfs_sync(struct mount *mp, int waitfor, struct thread *td)
1038{
1039 struct vnode *vp, *mvp;
1040 int error, allerror = 0;
1041
1042 /*
1043 * Force stale buffer cache information to be flushed.
1044 */
1045 MNT_ILOCK(mp);
1046loop:
1047 MNT_VNODE_FOREACH(vp, mp, mvp) {
1048 VI_LOCK(vp);
1049 MNT_IUNLOCK(mp);
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/lock.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/mount.h>
53#include <sys/proc.h>
54#include <sys/socket.h>
55#include <sys/socketvar.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/vnode.h>
59#include <sys/signalvar.h>
60
61#include <vm/vm.h>
62#include <vm/vm_extern.h>
63#include <vm/uma.h>
64
65#include <net/if.h>
66#include <net/route.h>
67#include <netinet/in.h>
68
69#include <rpc/rpcclnt.h>
70
71#include <nfs/rpcv2.h>
72#include <nfs/nfsproto.h>
73#include <nfsclient/nfs.h>
74#include <nfsclient/nfsnode.h>
75#include <nfsclient/nfsmount.h>
76#include <nfs/xdr_subs.h>
77#include <nfsclient/nfsm_subs.h>
78#include <nfsclient/nfsdiskless.h>
79
80MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
81MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
82MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
83MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
84MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
85
86uma_zone_t nfsmount_zone;
87
88struct nfsstats nfsstats;
89
90SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
91SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
92 &nfsstats, nfsstats, "S,nfsstats");
93static int nfs_ip_paranoia = 1;
94SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95 &nfs_ip_paranoia, 0, "");
96#ifdef NFS_DEBUG
97int nfs_debug;
98SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, "");
99#endif
100static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
101SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
102 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
103/* how long between console messages "nfs server foo not responding" */
104static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
105SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
106 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
107
108static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
109 struct nfs_args *argp);
110static int mountnfs(struct nfs_args *, struct mount *,
111 struct sockaddr *, char *, struct vnode **,
112 struct ucred *cred);
113static vfs_mount_t nfs_mount;
114static vfs_cmount_t nfs_cmount;
115static vfs_unmount_t nfs_unmount;
116static vfs_root_t nfs_root;
117static vfs_statfs_t nfs_statfs;
118static vfs_sync_t nfs_sync;
119static vfs_sysctl_t nfs_sysctl;
120
121/*
122 * nfs vfs operations.
123 */
124static struct vfsops nfs_vfsops = {
125 .vfs_init = nfs_init,
126 .vfs_mount = nfs_mount,
127 .vfs_cmount = nfs_cmount,
128 .vfs_root = nfs_root,
129 .vfs_statfs = nfs_statfs,
130 .vfs_sync = nfs_sync,
131 .vfs_uninit = nfs_uninit,
132 .vfs_unmount = nfs_unmount,
133 .vfs_sysctl = nfs_sysctl,
134};
135VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
136
137/* So that loader and kldload(2) can find us, wherever we are.. */
138MODULE_VERSION(nfs, 1);
139
140static struct nfs_rpcops nfs_rpcops = {
141 nfs_readrpc,
142 nfs_writerpc,
143 nfs_writebp,
144 nfs_readlinkrpc,
145 nfs_invaldir,
146 nfs_commit,
147};
148
149/*
150 * This structure must be filled in by a primary bootstrap or bootstrap
151 * server for a diskless/dataless machine. It is initialized below just
152 * to ensure that it is allocated to initialized data (.data not .bss).
153 */
154struct nfs_diskless nfs_diskless = { { { 0 } } };
155struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
156int nfs_diskless_valid = 0;
157
158SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
159 &nfs_diskless_valid, 0, "");
160
161SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
162 nfsv3_diskless.root_hostnam, 0, "");
163
164SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
165 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
166 "%Ssockaddr_in", "");
167
168
169void nfsargs_ntoh(struct nfs_args *);
170static int nfs_mountdiskless(char *,
171 struct sockaddr_in *, struct nfs_args *,
172 struct thread *, struct vnode **, struct mount *);
173static void nfs_convert_diskless(void);
174static void nfs_convert_oargs(struct nfs_args *args,
175 struct onfs_args *oargs);
176
177int
178nfs_iosize(struct nfsmount *nmp)
179{
180 int iosize;
181
182 /*
183 * Calculate the size used for io buffers. Use the larger
184 * of the two sizes to minimise nfs requests but make sure
185 * that it is at least one VM page to avoid wasting buffer
186 * space.
187 */
188 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
189 iosize = imax(iosize, PAGE_SIZE);
190 return (iosize);
191}
192
193static void
194nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
195{
196
197 args->version = NFS_ARGSVERSION;
198 args->addr = oargs->addr;
199 args->addrlen = oargs->addrlen;
200 args->sotype = oargs->sotype;
201 args->proto = oargs->proto;
202 args->fh = oargs->fh;
203 args->fhsize = oargs->fhsize;
204 args->flags = oargs->flags;
205 args->wsize = oargs->wsize;
206 args->rsize = oargs->rsize;
207 args->readdirsize = oargs->readdirsize;
208 args->timeo = oargs->timeo;
209 args->retrans = oargs->retrans;
210 args->maxgrouplist = oargs->maxgrouplist;
211 args->readahead = oargs->readahead;
212 args->deadthresh = oargs->deadthresh;
213 args->hostname = oargs->hostname;
214}
215
216static void
217nfs_convert_diskless(void)
218{
219
220 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
221 sizeof(struct ifaliasreq));
222 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
223 sizeof(struct sockaddr_in));
224 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
225 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
226 nfsv3_diskless.root_fhsize = NFSX_V3FH;
227 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
228 } else {
229 nfsv3_diskless.root_fhsize = NFSX_V2FH;
230 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
231 }
232 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
233 sizeof(struct sockaddr_in));
234 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
235 nfsv3_diskless.root_time = nfs_diskless.root_time;
236 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
237 MAXHOSTNAMELEN);
238 nfs_diskless_valid = 3;
239}
240
241/*
242 * nfs statfs call
243 */
244static int
245nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
246{
247 struct vnode *vp;
248 struct nfs_statfs *sfp;
249 caddr_t bpos, dpos;
250 struct nfsmount *nmp = VFSTONFS(mp);
251 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
252 struct mbuf *mreq, *mrep, *md, *mb;
253 struct nfsnode *np;
254 u_quad_t tquad;
255
256#ifndef nolint
257 sfp = NULL;
258#endif
259 error = vfs_busy(mp, LK_NOWAIT, NULL, td);
260 if (error)
261 return (error);
262 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
263 if (error) {
264 vfs_unbusy(mp, td);
265 return (error);
266 }
267 vp = NFSTOV(np);
268 mtx_lock(&nmp->nm_mtx);
269 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
270 mtx_unlock(&nmp->nm_mtx);
271 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
272 } else
273 mtx_unlock(&nmp->nm_mtx);
274 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
275 mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
276 mb = mreq;
277 bpos = mtod(mb, caddr_t);
278 nfsm_fhtom(vp, v3);
279 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
280 if (v3)
281 nfsm_postop_attr(vp, retattr);
282 if (error) {
283 if (mrep != NULL)
284 m_freem(mrep);
285 goto nfsmout;
286 }
287 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
288 mtx_lock(&nmp->nm_mtx);
289 sbp->f_iosize = nfs_iosize(nmp);
290 mtx_unlock(&nmp->nm_mtx);
291 if (v3) {
292 sbp->f_bsize = NFS_FABLKSIZE;
293 tquad = fxdr_hyper(&sfp->sf_tbytes);
294 sbp->f_blocks = tquad / NFS_FABLKSIZE;
295 tquad = fxdr_hyper(&sfp->sf_fbytes);
296 sbp->f_bfree = tquad / NFS_FABLKSIZE;
297 tquad = fxdr_hyper(&sfp->sf_abytes);
298 sbp->f_bavail = tquad / NFS_FABLKSIZE;
299 sbp->f_files = (fxdr_unsigned(int32_t,
300 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
301 sbp->f_ffree = (fxdr_unsigned(int32_t,
302 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
303 } else {
304 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
305 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
306 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
307 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
308 sbp->f_files = 0;
309 sbp->f_ffree = 0;
310 }
311 m_freem(mrep);
312nfsmout:
313 vput(vp);
314 vfs_unbusy(mp, td);
315 return (error);
316}
317
318/*
319 * nfs version 3 fsinfo rpc call
320 */
321int
322nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
323 struct thread *td)
324{
325 struct nfsv3_fsinfo *fsp;
326 u_int32_t pref, max;
327 caddr_t bpos, dpos;
328 int error = 0, retattr;
329 struct mbuf *mreq, *mrep, *md, *mb;
330 u_int64_t maxfsize;
331
332 nfsstats.rpccnt[NFSPROC_FSINFO]++;
333 mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
334 mb = mreq;
335 bpos = mtod(mb, caddr_t);
336 nfsm_fhtom(vp, 1);
337 nfsm_request(vp, NFSPROC_FSINFO, td, cred);
338 nfsm_postop_attr(vp, retattr);
339 if (!error) {
340 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
341 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
342 mtx_lock(&nmp->nm_mtx);
343 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
344 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
345 ~(NFS_FABLKSIZE - 1);
346 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
347 if (max < nmp->nm_wsize && max > 0) {
348 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
349 if (nmp->nm_wsize == 0)
350 nmp->nm_wsize = max;
351 }
352 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
353 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
354 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
355 ~(NFS_FABLKSIZE - 1);
356 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
357 if (max < nmp->nm_rsize && max > 0) {
358 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
359 if (nmp->nm_rsize == 0)
360 nmp->nm_rsize = max;
361 }
362 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
363 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
364 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
365 ~(NFS_DIRBLKSIZ - 1);
366 if (max < nmp->nm_readdirsize && max > 0) {
367 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
368 if (nmp->nm_readdirsize == 0)
369 nmp->nm_readdirsize = max;
370 }
371 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
372 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
373 nmp->nm_maxfilesize = maxfsize;
374 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
375 nmp->nm_state |= NFSSTA_GOTFSINFO;
376 mtx_unlock(&nmp->nm_mtx);
377 }
378 m_freem(mrep);
379nfsmout:
380 return (error);
381}
382
383/*
384 * Mount a remote root fs via. nfs. This depends on the info in the
385 * nfs_diskless structure that has been filled in properly by some primary
386 * bootstrap.
387 * It goes something like this:
388 * - do enough of "ifconfig" by calling ifioctl() so that the system
389 * can talk to the server
390 * - If nfs_diskless.mygateway is filled in, use that address as
391 * a default gateway.
392 * - build the rootfs mount point and call mountnfs() to do the rest.
393 *
394 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
395 * structure, as well as other global NFS client variables here, as
396 * nfs_mountroot() will be called once in the boot before any other NFS
397 * client activity occurs.
398 */
399int
400nfs_mountroot(struct mount *mp, struct thread *td)
401{
402 struct nfsv3_diskless *nd = &nfsv3_diskless;
403 struct socket *so;
404 struct vnode *vp;
405 struct ifreq ir;
406 int error, i;
407 u_long l;
408 char buf[128];
409 char *cp;
410
411#if defined(BOOTP_NFSROOT) && defined(BOOTP)
412 bootpc_init(); /* use bootp to get nfs_diskless filled in */
413#elif defined(NFS_ROOT)
414 nfs_setup_diskless();
415#endif
416
417 if (nfs_diskless_valid == 0)
418 return (-1);
419 if (nfs_diskless_valid == 1)
420 nfs_convert_diskless();
421
422 /*
423 * XXX splnet, so networks will receive...
424 */
425 splnet();
426
427 /*
428 * Do enough of ifconfig(8) so that the critical net interface can
429 * talk to the server.
430 */
431 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
432 td->td_ucred, td);
433 if (error)
434 panic("nfs_mountroot: socreate(%04x): %d",
435 nd->myif.ifra_addr.sa_family, error);
436
437#if 0 /* XXX Bad idea */
438 /*
439 * We might not have been told the right interface, so we pass
440 * over the first ten interfaces of the same kind, until we get
441 * one of them configured.
442 */
443
444 for (i = strlen(nd->myif.ifra_name) - 1;
445 nd->myif.ifra_name[i] >= '0' &&
446 nd->myif.ifra_name[i] <= '9';
447 nd->myif.ifra_name[i] ++) {
448 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
449 if(!error)
450 break;
451 }
452#endif
453 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
454 if (error)
455 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
456 if ((cp = getenv("boot.netif.mtu")) != NULL) {
457 ir.ifr_mtu = strtol(cp, NULL, 10);
458 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
459 freeenv(cp);
460 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
461 if (error)
462 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
463 }
464 soclose(so);
465
466 /*
467 * If the gateway field is filled in, set it as the default route.
468 * Note that pxeboot will set a default route of 0 if the route
469 * is not set by the DHCP server. Check also for a value of 0
470 * to avoid panicking inappropriately in that situation.
471 */
472 if (nd->mygateway.sin_len != 0 &&
473 nd->mygateway.sin_addr.s_addr != 0) {
474 struct sockaddr_in mask, sin;
475
476 bzero((caddr_t)&mask, sizeof(mask));
477 sin = mask;
478 sin.sin_family = AF_INET;
479 sin.sin_len = sizeof(sin);
480 error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
481 (struct sockaddr *)&nd->mygateway,
482 (struct sockaddr *)&mask,
483 RTF_UP | RTF_GATEWAY, NULL);
484 if (error)
485 panic("nfs_mountroot: RTM_ADD: %d", error);
486 }
487
488 /*
489 * Create the rootfs mount point.
490 */
491 nd->root_args.fh = nd->root_fh;
492 nd->root_args.fhsize = nd->root_fhsize;
493 l = ntohl(nd->root_saddr.sin_addr.s_addr);
494 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
495 (l >> 24) & 0xff, (l >> 16) & 0xff,
496 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
497 printf("NFS ROOT: %s\n", buf);
498 if ((error = nfs_mountdiskless(buf,
499 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
500 return (error);
501 }
502
503 /*
504 * This is not really an nfs issue, but it is much easier to
505 * set hostname here and then let the "/etc/rc.xxx" files
506 * mount the right /var based upon its preset value.
507 */
508 bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
509 hostname[MAXHOSTNAMELEN - 1] = '\0';
510 for (i = 0; i < MAXHOSTNAMELEN; i++)
511 if (hostname[i] == '\0')
512 break;
513 inittodr(ntohl(nd->root_time));
514 return (0);
515}
516
517/*
518 * Internal version of mount system call for diskless setup.
519 */
520static int
521nfs_mountdiskless(char *path,
522 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
523 struct vnode **vpp, struct mount *mp)
524{
525 struct sockaddr *nam;
526 int error;
527
528 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
529 if ((error = mountnfs(args, mp, nam, path, vpp,
530 td->td_ucred)) != 0) {
531 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
532 return (error);
533 }
534 return (0);
535}
536
537static void
538nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp)
539{
540 int s;
541 int adjsock;
542 int maxio;
543
544 s = splnet();
545
546 /*
547 * Set read-only flag if requested; otherwise, clear it if this is
548 * an update. If this is not an update, then either the read-only
549 * flag is already clear, or this is a root mount and it was set
550 * intentionally at some previous point.
551 */
552 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
553 MNT_ILOCK(mp);
554 mp->mnt_flag |= MNT_RDONLY;
555 MNT_IUNLOCK(mp);
556 } else if (mp->mnt_flag & MNT_UPDATE) {
557 MNT_ILOCK(mp);
558 mp->mnt_flag &= ~MNT_RDONLY;
559 MNT_IUNLOCK(mp);
560 }
561
562 /*
563 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
564 * no sense in that context. Also, set up appropriate retransmit
565 * and soft timeout behavior.
566 */
567 if (argp->sotype == SOCK_STREAM) {
568 nmp->nm_flag &= ~NFSMNT_NOCONN;
569 nmp->nm_flag |= NFSMNT_DUMBTIMR;
570 nmp->nm_timeo = NFS_MAXTIMEO;
571 nmp->nm_retry = NFS_RETRANS_TCP;
572 }
573
574 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
575 if ((argp->flags & NFSMNT_NFSV3) == 0)
576 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
577
578 /* Re-bind if rsrvd port requested and wasn't on one */
579 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
580 && (argp->flags & NFSMNT_RESVPORT);
581 /* Also re-bind if we're switching to/from a connected UDP socket */
582 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
583 (argp->flags & NFSMNT_NOCONN));
584
585 /* Update flags atomically. Don't change the lock bits. */
586 nmp->nm_flag = argp->flags | nmp->nm_flag;
587 splx(s);
588
589 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
590 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
591 if (nmp->nm_timeo < NFS_MINTIMEO)
592 nmp->nm_timeo = NFS_MINTIMEO;
593 else if (nmp->nm_timeo > NFS_MAXTIMEO)
594 nmp->nm_timeo = NFS_MAXTIMEO;
595 }
596
597 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
598 nmp->nm_retry = argp->retrans;
599 if (nmp->nm_retry > NFS_MAXREXMIT)
600 nmp->nm_retry = NFS_MAXREXMIT;
601 }
602
603 if (argp->flags & NFSMNT_NFSV3) {
604 if (argp->sotype == SOCK_DGRAM)
605 maxio = NFS_MAXDGRAMDATA;
606 else
607 maxio = NFS_MAXDATA;
608 } else
609 maxio = NFS_V2MAXDATA;
610
611 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
612 nmp->nm_wsize = argp->wsize;
613 /* Round down to multiple of blocksize */
614 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
615 if (nmp->nm_wsize <= 0)
616 nmp->nm_wsize = NFS_FABLKSIZE;
617 }
618 if (nmp->nm_wsize > maxio)
619 nmp->nm_wsize = maxio;
620 if (nmp->nm_wsize > MAXBSIZE)
621 nmp->nm_wsize = MAXBSIZE;
622
623 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
624 nmp->nm_rsize = argp->rsize;
625 /* Round down to multiple of blocksize */
626 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
627 if (nmp->nm_rsize <= 0)
628 nmp->nm_rsize = NFS_FABLKSIZE;
629 }
630 if (nmp->nm_rsize > maxio)
631 nmp->nm_rsize = maxio;
632 if (nmp->nm_rsize > MAXBSIZE)
633 nmp->nm_rsize = MAXBSIZE;
634
635 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
636 nmp->nm_readdirsize = argp->readdirsize;
637 }
638 if (nmp->nm_readdirsize > maxio)
639 nmp->nm_readdirsize = maxio;
640 if (nmp->nm_readdirsize > nmp->nm_rsize)
641 nmp->nm_readdirsize = nmp->nm_rsize;
642
643 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
644 nmp->nm_acregmin = argp->acregmin;
645 else
646 nmp->nm_acregmin = NFS_MINATTRTIMO;
647 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
648 nmp->nm_acregmax = argp->acregmax;
649 else
650 nmp->nm_acregmax = NFS_MAXATTRTIMO;
651 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
652 nmp->nm_acdirmin = argp->acdirmin;
653 else
654 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
655 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
656 nmp->nm_acdirmax = argp->acdirmax;
657 else
658 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
659 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
660 nmp->nm_acdirmin = nmp->nm_acdirmax;
661 if (nmp->nm_acregmin > nmp->nm_acregmax)
662 nmp->nm_acregmin = nmp->nm_acregmax;
663
664 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
665 if (argp->maxgrouplist <= NFS_MAXGRPS)
666 nmp->nm_numgrps = argp->maxgrouplist;
667 else
668 nmp->nm_numgrps = NFS_MAXGRPS;
669 }
670 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
671 if (argp->readahead <= NFS_MAXRAHEAD)
672 nmp->nm_readahead = argp->readahead;
673 else
674 nmp->nm_readahead = NFS_MAXRAHEAD;
675 }
676 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
677 if (argp->wcommitsize < nmp->nm_wsize)
678 nmp->nm_wcommitsize = nmp->nm_wsize;
679 else
680 nmp->nm_wcommitsize = argp->wcommitsize;
681 }
682 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
683 if (argp->deadthresh <= NFS_MAXDEADTHRESH)
684 nmp->nm_deadthresh = argp->deadthresh;
685 else
686 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
687 }
688
689 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
690 (nmp->nm_soproto != argp->proto));
691 nmp->nm_sotype = argp->sotype;
692 nmp->nm_soproto = argp->proto;
693
694 if (nmp->nm_so && adjsock) {
695 nfs_safedisconnect(nmp);
696 if (nmp->nm_sotype == SOCK_DGRAM)
697 while (nfs_connect(nmp, NULL)) {
698 printf("nfs_args: retrying connect\n");
699 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
700 }
701 }
702}
703
704static const char *nfs_opts[] = { "from", "nfs_args",
705 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
706 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
707 "async", NULL };
708
709/*
710 * VFS Operations.
711 *
712 * mount system call
713 * It seems a bit dumb to copyinstr() the host and path here and then
714 * bcopy() them in mountnfs(), but I wanted to detect errors before
715 * doing the sockargs() call because sockargs() allocates an mbuf and
716 * an error after that means that I have to release the mbuf.
717 */
718/* ARGSUSED */
719static int
720nfs_mount(struct mount *mp, struct thread *td)
721{
722 int error;
723 struct nfs_args args;
724 struct sockaddr *nam;
725 struct vnode *vp;
726 char hst[MNAMELEN];
727 size_t len;
728 u_char nfh[NFSX_V3FHMAX];
729
730 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
731 error = EINVAL;
732 goto out;
733 }
734
735 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
736 error = nfs_mountroot(mp, td);
737 goto out;
738 }
739
740 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args);
741 if (error)
742 goto out;
743
744 if (args.version != NFS_ARGSVERSION) {
745 error = EPROGMISMATCH;
746 goto out;
747 }
748
749 if (mp->mnt_flag & MNT_UPDATE) {
750 struct nfsmount *nmp = VFSTONFS(mp);
751
752 if (nmp == NULL) {
753 error = EIO;
754 goto out;
755 }
756 /*
757 * When doing an update, we can't change from or to
758 * v3, switch lockd strategies or change cookie translation
759 */
760 args.flags = (args.flags &
761 ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
762 (nmp->nm_flag &
763 (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
764 nfs_decode_args(mp, nmp, &args);
765 goto out;
766 }
767
768 /*
769 * Make the nfs_ip_paranoia sysctl serve as the default connection
770 * or no-connection mode for those protocols that support
771 * no-connection mode (the flag will be cleared later for protocols
772 * that do not support no-connection mode). This will allow a client
773 * to receive replies from a different IP then the request was
774 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
775 * not 0.
776 */
777 if (nfs_ip_paranoia == 0)
778 args.flags |= NFSMNT_NOCONN;
779 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
780 error = EINVAL;
781 goto out;
782 }
783 error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize);
784 if (error)
785 goto out;
786 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
787 if (error)
788 goto out;
789 bzero(&hst[len], MNAMELEN - len);
790 /* sockargs() call must be after above copyin() calls */
791 error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen);
792 if (error)
793 goto out;
794 args.fh = nfh;
795 error = mountnfs(&args, mp, nam, hst, &vp, td->td_ucred);
796out:
797 if (!error) {
798 MNT_ILOCK(mp);
799 mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
800 MNT_IUNLOCK(mp);
801 }
802 return (error);
803}
804
805
806/*
807 * VFS Operations.
808 *
809 * mount system call
810 * It seems a bit dumb to copyinstr() the host and path here and then
811 * bcopy() them in mountnfs(), but I wanted to detect errors before
812 * doing the sockargs() call because sockargs() allocates an mbuf and
813 * an error after that means that I have to release the mbuf.
814 */
815/* ARGSUSED */
816static int
817nfs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
818{
819 int error;
820 struct nfs_args args;
821
822 error = copyin(data, &args, sizeof (struct nfs_args));
823 if (error)
824 return error;
825
826 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
827
828 error = kernel_mount(ma, flags);
829 return (error);
830}
831
832/*
833 * Common code for mount and mountroot
834 */
835static int
836mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
837 char *hst, struct vnode **vpp, struct ucred *cred)
838{
839 struct nfsmount *nmp;
840 struct nfsnode *np;
841 int error;
842 struct vattr attrs;
843
844 if (mp->mnt_flag & MNT_UPDATE) {
845 nmp = VFSTONFS(mp);
846 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
847 FREE(nam, M_SONAME);
848 return (0);
849 } else {
850 nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
851 bzero((caddr_t)nmp, sizeof (struct nfsmount));
852 TAILQ_INIT(&nmp->nm_bufq);
853 mp->mnt_data = nmp;
854 }
855 vfs_getnewfsid(mp);
856 nmp->nm_mountp = mp;
857 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
858
859 /*
860 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
861 * high, depending on whether we end up with negative offsets in
862 * the client or server somewhere. 2GB-1 may be safer.
863 *
864 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
865 * that we can handle until we find out otherwise.
866 * XXX Our "safe" limit on the client is what we can store in our
867 * buffer cache using signed(!) block numbers.
868 */
869 if ((argp->flags & NFSMNT_NFSV3) == 0)
870 nmp->nm_maxfilesize = 0xffffffffLL;
871 else
872 nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
873
874 nmp->nm_timeo = NFS_TIMEO;
875 nmp->nm_retry = NFS_RETRANS;
876 if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) {
877 nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA;
878 } else {
879 nmp->nm_wsize = NFS_WSIZE;
880 nmp->nm_rsize = NFS_RSIZE;
881 }
882 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
883 nmp->nm_readdirsize = NFS_READDIRSIZE;
884 nmp->nm_numgrps = NFS_MAXGRPS;
885 nmp->nm_readahead = NFS_DEFRAHEAD;
886 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
887 nmp->nm_tprintf_delay = nfs_tprintf_delay;
888 if (nmp->nm_tprintf_delay < 0)
889 nmp->nm_tprintf_delay = 0;
890 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
891 if (nmp->nm_tprintf_initial_delay < 0)
892 nmp->nm_tprintf_initial_delay = 0;
893 nmp->nm_fhsize = argp->fhsize;
894 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
895 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
896 nmp->nm_nam = nam;
897 /* Set up the sockets and per-host congestion */
898 nmp->nm_sotype = argp->sotype;
899 nmp->nm_soproto = argp->proto;
900 nmp->nm_rpcops = &nfs_rpcops;
901
902 nfs_decode_args(mp, nmp, argp);
903
904 /*
905 * For Connection based sockets (TCP,...) defer the connect until
906 * the first request, in case the server is not responding.
907 */
908 if (nmp->nm_sotype == SOCK_DGRAM &&
909 (error = nfs_connect(nmp, NULL)))
910 goto bad;
911
912 /*
913 * This is silly, but it has to be set so that vinifod() works.
914 * We do not want to do an nfs_statfs() here since we can get
915 * stuck on a dead server and we are holding a lock on the mount
916 * point.
917 */
918 mtx_lock(&nmp->nm_mtx);
919 mp->mnt_stat.f_iosize = nfs_iosize(nmp);
920 mtx_unlock(&nmp->nm_mtx);
921 /*
922 * A reference count is needed on the nfsnode representing the
923 * remote root. If this object is not persistent, then backward
924 * traversals of the mount point (i.e. "..") will not work if
925 * the nfsnode gets flushed out of the cache. Ufs does not have
926 * this problem, because one can identify root inodes by their
927 * number == ROOTINO (2).
928 */
929 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
930 if (error)
931 goto bad;
932 *vpp = NFSTOV(np);
933
934 /*
935 * Get file attributes and transfer parameters for the
936 * mountpoint. This has the side effect of filling in
937 * (*vpp)->v_type with the correct value.
938 */
939 if (argp->flags & NFSMNT_NFSV3)
940 nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread);
941 else
942 VOP_GETATTR(*vpp, &attrs, curthread->td_ucred, curthread);
943
944 /*
945 * Lose the lock but keep the ref.
946 */
947 VOP_UNLOCK(*vpp, 0);
948
949 return (0);
950bad:
951 nfs_disconnect(nmp);
952 mtx_destroy(&nmp->nm_mtx);
953 uma_zfree(nfsmount_zone, nmp);
954 FREE(nam, M_SONAME);
955 return (error);
956}
957
958/*
959 * unmount system call
960 */
961static int
962nfs_unmount(struct mount *mp, int mntflags, struct thread *td)
963{
964 struct nfsmount *nmp;
965 int error, flags = 0;
966
967 if (mntflags & MNT_FORCE)
968 flags |= FORCECLOSE;
969 nmp = VFSTONFS(mp);
970 /*
971 * Goes something like this..
972 * - Call vflush() to clear out vnodes for this filesystem
973 * - Close the socket
974 * - Free up the data structures
975 */
976 /* In the forced case, cancel any outstanding requests. */
977 if (flags & FORCECLOSE) {
978 error = nfs_nmcancelreqs(nmp);
979 if (error)
980 goto out;
981 }
982 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
983 error = vflush(mp, 1, flags, td);
984 if (error)
985 goto out;
986
987 /*
988 * We are now committed to the unmount.
989 */
990 nfs_disconnect(nmp);
991 FREE(nmp->nm_nam, M_SONAME);
992
993 mtx_destroy(&nmp->nm_mtx);
994 uma_zfree(nfsmount_zone, nmp);
995out:
996 return (error);
997}
998
999/*
1000 * Return root of a filesystem
1001 */
1002static int
1003nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
1004{
1005 struct vnode *vp;
1006 struct nfsmount *nmp;
1007 struct nfsnode *np;
1008 int error;
1009
1010 nmp = VFSTONFS(mp);
1011 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1012 if (error)
1013 return error;
1014 vp = NFSTOV(np);
1015 /*
1016 * Get transfer parameters and attributes for root vnode once.
1017 */
1018 mtx_lock(&nmp->nm_mtx);
1019 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
1020 (nmp->nm_flag & NFSMNT_NFSV3)) {
1021 mtx_unlock(&nmp->nm_mtx);
1022 nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1023 } else
1024 mtx_unlock(&nmp->nm_mtx);
1025 if (vp->v_type == VNON)
1026 vp->v_type = VDIR;
1027 vp->v_vflag |= VV_ROOT;
1028 *vpp = vp;
1029 return (0);
1030}
1031
1032/*
1033 * Flush out the buffer cache
1034 */
1035/* ARGSUSED */
1036static int
1037nfs_sync(struct mount *mp, int waitfor, struct thread *td)
1038{
1039 struct vnode *vp, *mvp;
1040 int error, allerror = 0;
1041
1042 /*
1043 * Force stale buffer cache information to be flushed.
1044 */
1045 MNT_ILOCK(mp);
1046loop:
1047 MNT_VNODE_FOREACH(vp, mp, mvp) {
1048 VI_LOCK(vp);
1049 MNT_IUNLOCK(mp);
1050 if (VOP_ISLOCKED(vp, curthread) ||
1051 vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1050 if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1052 waitfor == MNT_LAZY) {
1053 VI_UNLOCK(vp);
1054 MNT_ILOCK(mp);
1055 continue;
1056 }
1057 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1058 MNT_ILOCK(mp);
1059 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1060 goto loop;
1061 }
1062 error = VOP_FSYNC(vp, waitfor, td);
1063 if (error)
1064 allerror = error;
1065 VOP_UNLOCK(vp, 0);
1066 vrele(vp);
1067
1068 MNT_ILOCK(mp);
1069 }
1070 MNT_IUNLOCK(mp);
1071 return (allerror);
1072}
1073
1074static int
1075nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1076{
1077 struct nfsmount *nmp = VFSTONFS(mp);
1078 struct vfsquery vq;
1079 int error;
1080
1081 bzero(&vq, sizeof(vq));
1082 switch (op) {
1083#if 0
1084 case VFS_CTL_NOLOCKS:
1085 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1086 if (req->oldptr != NULL) {
1087 error = SYSCTL_OUT(req, &val, sizeof(val));
1088 if (error)
1089 return (error);
1090 }
1091 if (req->newptr != NULL) {
1092 error = SYSCTL_IN(req, &val, sizeof(val));
1093 if (error)
1094 return (error);
1095 if (val)
1096 nmp->nm_flag |= NFSMNT_NOLOCKS;
1097 else
1098 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1099 }
1100 break;
1101#endif
1102 case VFS_CTL_QUERY:
1103 mtx_lock(&nmp->nm_mtx);
1104 if (nmp->nm_state & NFSSTA_TIMEO)
1105 vq.vq_flags |= VQ_NOTRESP;
1106 mtx_unlock(&nmp->nm_mtx);
1107#if 0
1108 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1109 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1110 vq.vq_flags |= VQ_NOTRESPLOCK;
1111#endif
1112 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1113 break;
1114 case VFS_CTL_TIMEO:
1115 if (req->oldptr != NULL) {
1116 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1117 sizeof(nmp->nm_tprintf_initial_delay));
1118 if (error)
1119 return (error);
1120 }
1121 if (req->newptr != NULL) {
1122 error = vfs_suser(mp, req->td);
1123 if (error)
1124 return (error);
1125 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1126 sizeof(nmp->nm_tprintf_initial_delay));
1127 if (error)
1128 return (error);
1129 if (nmp->nm_tprintf_initial_delay < 0)
1130 nmp->nm_tprintf_initial_delay = 0;
1131 }
1132 break;
1133 default:
1134 return (ENOTSUP);
1135 }
1136 return (0);
1137}
1051 waitfor == MNT_LAZY) {
1052 VI_UNLOCK(vp);
1053 MNT_ILOCK(mp);
1054 continue;
1055 }
1056 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1057 MNT_ILOCK(mp);
1058 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1059 goto loop;
1060 }
1061 error = VOP_FSYNC(vp, waitfor, td);
1062 if (error)
1063 allerror = error;
1064 VOP_UNLOCK(vp, 0);
1065 vrele(vp);
1066
1067 MNT_ILOCK(mp);
1068 }
1069 MNT_IUNLOCK(mp);
1070 return (allerror);
1071}
1072
1073static int
1074nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1075{
1076 struct nfsmount *nmp = VFSTONFS(mp);
1077 struct vfsquery vq;
1078 int error;
1079
1080 bzero(&vq, sizeof(vq));
1081 switch (op) {
1082#if 0
1083 case VFS_CTL_NOLOCKS:
1084 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1085 if (req->oldptr != NULL) {
1086 error = SYSCTL_OUT(req, &val, sizeof(val));
1087 if (error)
1088 return (error);
1089 }
1090 if (req->newptr != NULL) {
1091 error = SYSCTL_IN(req, &val, sizeof(val));
1092 if (error)
1093 return (error);
1094 if (val)
1095 nmp->nm_flag |= NFSMNT_NOLOCKS;
1096 else
1097 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1098 }
1099 break;
1100#endif
1101 case VFS_CTL_QUERY:
1102 mtx_lock(&nmp->nm_mtx);
1103 if (nmp->nm_state & NFSSTA_TIMEO)
1104 vq.vq_flags |= VQ_NOTRESP;
1105 mtx_unlock(&nmp->nm_mtx);
1106#if 0
1107 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1108 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1109 vq.vq_flags |= VQ_NOTRESPLOCK;
1110#endif
1111 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1112 break;
1113 case VFS_CTL_TIMEO:
1114 if (req->oldptr != NULL) {
1115 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1116 sizeof(nmp->nm_tprintf_initial_delay));
1117 if (error)
1118 return (error);
1119 }
1120 if (req->newptr != NULL) {
1121 error = vfs_suser(mp, req->td);
1122 if (error)
1123 return (error);
1124 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1125 sizeof(nmp->nm_tprintf_initial_delay));
1126 if (error)
1127 return (error);
1128 if (nmp->nm_tprintf_initial_delay < 0)
1129 nmp->nm_tprintf_initial_delay = 0;
1130 }
1131 break;
1132 default:
1133 return (ENOTSUP);
1134 }
1135 return (0);
1136}