37 38 39#include "opt_bootp.h" 40#include "opt_nfsroot.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/kernel.h> 45#include <sys/bio.h> 46#include <sys/buf.h> 47#include <sys/clock.h> 48#include <sys/lock.h> 49#include <sys/malloc.h> 50#include <sys/mbuf.h> 51#include <sys/module.h> 52#include <sys/mount.h> 53#include <sys/proc.h> 54#include <sys/socket.h> 55#include <sys/socketvar.h> 56#include <sys/sockio.h> 57#include <sys/sysctl.h> 58#include <sys/vnode.h> 59#include <sys/signalvar.h> 60 61#include <vm/vm.h> 62#include <vm/vm_extern.h> 63#include <vm/uma.h> 64 65#include <net/if.h> 66#include <net/route.h> 67#include <netinet/in.h> 68 69#include <rpc/rpcclnt.h> 70 71#include <nfs/rpcv2.h> 72#include <nfs/nfsproto.h> 73#include <nfsclient/nfs.h> 74#include <nfsclient/nfsnode.h> 75#include <nfsclient/nfsmount.h> 76#include <nfs/xdr_subs.h> 77#include <nfsclient/nfsm_subs.h> 78#include <nfsclient/nfsdiskless.h> 79 80MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header"); 81MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle"); 82MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data"); 83MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables"); 84MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state"); 85 86uma_zone_t nfsmount_zone; 87 88struct nfsstats nfsstats; 89 90SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem"); 91SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW, 92 &nfsstats, nfsstats, "S,nfsstats"); 93static int nfs_ip_paranoia = 1; 94SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, 95 &nfs_ip_paranoia, 0, ""); 96#ifdef NFS_DEBUG 97int nfs_debug; 98SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, ""); 99#endif 100static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY; 101SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY, 102 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, ""); 103/* how long between console messages "nfs server foo not responding" */ 104static int nfs_tprintf_delay = NFS_TPRINTF_DELAY; 105SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY, 106 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, ""); 107 108static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, 109 struct nfs_args *argp); 110static int mountnfs(struct nfs_args *, struct mount *, 111 struct sockaddr *, char *, struct vnode **, 112 struct ucred *cred); 113static vfs_mount_t nfs_mount; 114static vfs_cmount_t nfs_cmount; 115static vfs_unmount_t nfs_unmount; 116static vfs_root_t nfs_root; 117static vfs_statfs_t nfs_statfs; 118static vfs_sync_t nfs_sync; 119static vfs_sysctl_t nfs_sysctl; 120 121/* 122 * nfs vfs operations. 123 */ 124static struct vfsops nfs_vfsops = { 125 .vfs_init = nfs_init, 126 .vfs_mount = nfs_mount, 127 .vfs_cmount = nfs_cmount, 128 .vfs_root = nfs_root, 129 .vfs_statfs = nfs_statfs, 130 .vfs_sync = nfs_sync, 131 .vfs_uninit = nfs_uninit, 132 .vfs_unmount = nfs_unmount, 133 .vfs_sysctl = nfs_sysctl, 134}; 135VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK); 136 137/* So that loader and kldload(2) can find us, wherever we are.. */ 138MODULE_VERSION(nfs, 1); 139 140static struct nfs_rpcops nfs_rpcops = { 141 nfs_readrpc, 142 nfs_writerpc, 143 nfs_writebp, 144 nfs_readlinkrpc, 145 nfs_invaldir, 146 nfs_commit, 147}; 148 149/* 150 * This structure must be filled in by a primary bootstrap or bootstrap 151 * server for a diskless/dataless machine. It is initialized below just 152 * to ensure that it is allocated to initialized data (.data not .bss). 153 */ 154struct nfs_diskless nfs_diskless = { { { 0 } } }; 155struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; 156int nfs_diskless_valid = 0; 157 158SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD, 159 &nfs_diskless_valid, 0, ""); 160 161SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, 162 nfsv3_diskless.root_hostnam, 0, ""); 163 164SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, 165 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr, 166 "%Ssockaddr_in", ""); 167 168 169void nfsargs_ntoh(struct nfs_args *); 170static int nfs_mountdiskless(char *, 171 struct sockaddr_in *, struct nfs_args *, 172 struct thread *, struct vnode **, struct mount *); 173static void nfs_convert_diskless(void); 174static void nfs_convert_oargs(struct nfs_args *args, 175 struct onfs_args *oargs); 176 177int 178nfs_iosize(struct nfsmount *nmp) 179{ 180 int iosize; 181 182 /* 183 * Calculate the size used for io buffers. Use the larger 184 * of the two sizes to minimise nfs requests but make sure 185 * that it is at least one VM page to avoid wasting buffer 186 * space. 187 */ 188 iosize = imax(nmp->nm_rsize, nmp->nm_wsize); 189 iosize = imax(iosize, PAGE_SIZE); 190 return (iosize); 191} 192 193static void 194nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) 195{ 196 197 args->version = NFS_ARGSVERSION; 198 args->addr = oargs->addr; 199 args->addrlen = oargs->addrlen; 200 args->sotype = oargs->sotype; 201 args->proto = oargs->proto; 202 args->fh = oargs->fh; 203 args->fhsize = oargs->fhsize; 204 args->flags = oargs->flags; 205 args->wsize = oargs->wsize; 206 args->rsize = oargs->rsize; 207 args->readdirsize = oargs->readdirsize; 208 args->timeo = oargs->timeo; 209 args->retrans = oargs->retrans; 210 args->maxgrouplist = oargs->maxgrouplist; 211 args->readahead = oargs->readahead; 212 args->deadthresh = oargs->deadthresh; 213 args->hostname = oargs->hostname; 214} 215 216static void 217nfs_convert_diskless(void) 218{ 219 220 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, 221 sizeof(struct ifaliasreq)); 222 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, 223 sizeof(struct sockaddr_in)); 224 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args); 225 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) { 226 nfsv3_diskless.root_fhsize = NFSX_V3FH; 227 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH); 228 } else { 229 nfsv3_diskless.root_fhsize = NFSX_V2FH; 230 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH); 231 } 232 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, 233 sizeof(struct sockaddr_in)); 234 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN); 235 nfsv3_diskless.root_time = nfs_diskless.root_time; 236 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam, 237 MAXHOSTNAMELEN); 238 nfs_diskless_valid = 3; 239} 240 241/* 242 * nfs statfs call 243 */ 244static int 245nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) 246{ 247 struct vnode *vp; 248 struct nfs_statfs *sfp; 249 caddr_t bpos, dpos; 250 struct nfsmount *nmp = VFSTONFS(mp); 251 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr; 252 struct mbuf *mreq, *mrep, *md, *mb; 253 struct nfsnode *np; 254 u_quad_t tquad; 255 256#ifndef nolint 257 sfp = NULL; 258#endif 259 error = vfs_busy(mp, LK_NOWAIT, NULL, td); 260 if (error) 261 return (error); 262 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); 263 if (error) { 264 vfs_unbusy(mp, td); 265 return (error); 266 } 267 vp = NFSTOV(np); 268 mtx_lock(&nmp->nm_mtx); 269 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { 270 mtx_unlock(&nmp->nm_mtx); 271 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td); 272 } else 273 mtx_unlock(&nmp->nm_mtx); 274 nfsstats.rpccnt[NFSPROC_FSSTAT]++; 275 mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3)); 276 mb = mreq; 277 bpos = mtod(mb, caddr_t); 278 nfsm_fhtom(vp, v3); 279 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred); 280 if (v3) 281 nfsm_postop_attr(vp, retattr); 282 if (error) { 283 if (mrep != NULL) 284 m_freem(mrep); 285 goto nfsmout; 286 } 287 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3)); 288 mtx_lock(&nmp->nm_mtx); 289 sbp->f_iosize = nfs_iosize(nmp); 290 mtx_unlock(&nmp->nm_mtx); 291 if (v3) { 292 sbp->f_bsize = NFS_FABLKSIZE; 293 tquad = fxdr_hyper(&sfp->sf_tbytes); 294 sbp->f_blocks = tquad / NFS_FABLKSIZE; 295 tquad = fxdr_hyper(&sfp->sf_fbytes); 296 sbp->f_bfree = tquad / NFS_FABLKSIZE; 297 tquad = fxdr_hyper(&sfp->sf_abytes); 298 sbp->f_bavail = tquad / NFS_FABLKSIZE; 299 sbp->f_files = (fxdr_unsigned(int32_t, 300 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff); 301 sbp->f_ffree = (fxdr_unsigned(int32_t, 302 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff); 303 } else { 304 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize); 305 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks); 306 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree); 307 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail); 308 sbp->f_files = 0; 309 sbp->f_ffree = 0; 310 } 311 m_freem(mrep); 312nfsmout: 313 vput(vp); 314 vfs_unbusy(mp, td); 315 return (error); 316} 317 318/* 319 * nfs version 3 fsinfo rpc call 320 */ 321int 322nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred, 323 struct thread *td) 324{ 325 struct nfsv3_fsinfo *fsp; 326 u_int32_t pref, max; 327 caddr_t bpos, dpos; 328 int error = 0, retattr; 329 struct mbuf *mreq, *mrep, *md, *mb; 330 u_int64_t maxfsize; 331 332 nfsstats.rpccnt[NFSPROC_FSINFO]++; 333 mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1)); 334 mb = mreq; 335 bpos = mtod(mb, caddr_t); 336 nfsm_fhtom(vp, 1); 337 nfsm_request(vp, NFSPROC_FSINFO, td, cred); 338 nfsm_postop_attr(vp, retattr); 339 if (!error) { 340 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO); 341 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref); 342 mtx_lock(&nmp->nm_mtx); 343 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE) 344 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) & 345 ~(NFS_FABLKSIZE - 1); 346 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax); 347 if (max < nmp->nm_wsize && max > 0) { 348 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1); 349 if (nmp->nm_wsize == 0) 350 nmp->nm_wsize = max; 351 } 352 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref); 353 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE) 354 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) & 355 ~(NFS_FABLKSIZE - 1); 356 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax); 357 if (max < nmp->nm_rsize && max > 0) { 358 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1); 359 if (nmp->nm_rsize == 0) 360 nmp->nm_rsize = max; 361 } 362 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref); 363 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ) 364 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) & 365 ~(NFS_DIRBLKSIZ - 1); 366 if (max < nmp->nm_readdirsize && max > 0) { 367 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1); 368 if (nmp->nm_readdirsize == 0) 369 nmp->nm_readdirsize = max; 370 } 371 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize); 372 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize) 373 nmp->nm_maxfilesize = maxfsize; 374 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp); 375 nmp->nm_state |= NFSSTA_GOTFSINFO; 376 mtx_unlock(&nmp->nm_mtx); 377 } 378 m_freem(mrep); 379nfsmout: 380 return (error); 381} 382 383/* 384 * Mount a remote root fs via. nfs. This depends on the info in the 385 * nfs_diskless structure that has been filled in properly by some primary 386 * bootstrap. 387 * It goes something like this: 388 * - do enough of "ifconfig" by calling ifioctl() so that the system 389 * can talk to the server 390 * - If nfs_diskless.mygateway is filled in, use that address as 391 * a default gateway. 392 * - build the rootfs mount point and call mountnfs() to do the rest. 393 * 394 * It is assumed to be safe to read, modify, and write the nfsv3_diskless 395 * structure, as well as other global NFS client variables here, as 396 * nfs_mountroot() will be called once in the boot before any other NFS 397 * client activity occurs. 398 */ 399int 400nfs_mountroot(struct mount *mp, struct thread *td) 401{ 402 struct nfsv3_diskless *nd = &nfsv3_diskless; 403 struct socket *so; 404 struct vnode *vp; 405 struct ifreq ir; 406 int error, i; 407 u_long l; 408 char buf[128]; 409 char *cp; 410 411#if defined(BOOTP_NFSROOT) && defined(BOOTP) 412 bootpc_init(); /* use bootp to get nfs_diskless filled in */ 413#elif defined(NFS_ROOT) 414 nfs_setup_diskless(); 415#endif 416 417 if (nfs_diskless_valid == 0) 418 return (-1); 419 if (nfs_diskless_valid == 1) 420 nfs_convert_diskless(); 421 422 /* 423 * XXX splnet, so networks will receive... 424 */ 425 splnet(); 426 427 /* 428 * Do enough of ifconfig(8) so that the critical net interface can 429 * talk to the server. 430 */ 431 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0, 432 td->td_ucred, td); 433 if (error) 434 panic("nfs_mountroot: socreate(%04x): %d", 435 nd->myif.ifra_addr.sa_family, error); 436 437#if 0 /* XXX Bad idea */ 438 /* 439 * We might not have been told the right interface, so we pass 440 * over the first ten interfaces of the same kind, until we get 441 * one of them configured. 442 */ 443 444 for (i = strlen(nd->myif.ifra_name) - 1; 445 nd->myif.ifra_name[i] >= '0' && 446 nd->myif.ifra_name[i] <= '9'; 447 nd->myif.ifra_name[i] ++) { 448 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); 449 if(!error) 450 break; 451 } 452#endif 453 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); 454 if (error) 455 panic("nfs_mountroot: SIOCAIFADDR: %d", error); 456 if ((cp = getenv("boot.netif.mtu")) != NULL) { 457 ir.ifr_mtu = strtol(cp, NULL, 10); 458 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ); 459 freeenv(cp); 460 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td); 461 if (error) 462 printf("nfs_mountroot: SIOCSIFMTU: %d", error); 463 } 464 soclose(so); 465 466 /* 467 * If the gateway field is filled in, set it as the default route. 468 * Note that pxeboot will set a default route of 0 if the route 469 * is not set by the DHCP server. Check also for a value of 0 470 * to avoid panicking inappropriately in that situation. 471 */ 472 if (nd->mygateway.sin_len != 0 && 473 nd->mygateway.sin_addr.s_addr != 0) { 474 struct sockaddr_in mask, sin; 475 476 bzero((caddr_t)&mask, sizeof(mask)); 477 sin = mask; 478 sin.sin_family = AF_INET; 479 sin.sin_len = sizeof(sin); 480 error = rtrequest(RTM_ADD, (struct sockaddr *)&sin, 481 (struct sockaddr *)&nd->mygateway, 482 (struct sockaddr *)&mask, 483 RTF_UP | RTF_GATEWAY, NULL); 484 if (error) 485 panic("nfs_mountroot: RTM_ADD: %d", error); 486 } 487 488 /* 489 * Create the rootfs mount point. 490 */ 491 nd->root_args.fh = nd->root_fh; 492 nd->root_args.fhsize = nd->root_fhsize; 493 l = ntohl(nd->root_saddr.sin_addr.s_addr); 494 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", 495 (l >> 24) & 0xff, (l >> 16) & 0xff, 496 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam); 497 printf("NFS ROOT: %s\n", buf); 498 if ((error = nfs_mountdiskless(buf, 499 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) { 500 return (error); 501 } 502 503 /* 504 * This is not really an nfs issue, but it is much easier to 505 * set hostname here and then let the "/etc/rc.xxx" files 506 * mount the right /var based upon its preset value. 507 */ 508 bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN); 509 hostname[MAXHOSTNAMELEN - 1] = '\0'; 510 for (i = 0; i < MAXHOSTNAMELEN; i++) 511 if (hostname[i] == '\0') 512 break; 513 inittodr(ntohl(nd->root_time)); 514 return (0); 515} 516 517/* 518 * Internal version of mount system call for diskless setup. 519 */ 520static int 521nfs_mountdiskless(char *path, 522 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td, 523 struct vnode **vpp, struct mount *mp) 524{ 525 struct sockaddr *nam; 526 int error; 527 528 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); 529 if ((error = mountnfs(args, mp, nam, path, vpp, 530 td->td_ucred)) != 0) { 531 printf("nfs_mountroot: mount %s on /: %d\n", path, error); 532 return (error); 533 } 534 return (0); 535} 536 537static void 538nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp) 539{ 540 int s; 541 int adjsock; 542 int maxio; 543 544 s = splnet(); 545 546 /* 547 * Set read-only flag if requested; otherwise, clear it if this is 548 * an update. If this is not an update, then either the read-only 549 * flag is already clear, or this is a root mount and it was set 550 * intentionally at some previous point. 551 */ 552 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) { 553 MNT_ILOCK(mp); 554 mp->mnt_flag |= MNT_RDONLY; 555 MNT_IUNLOCK(mp); 556 } else if (mp->mnt_flag & MNT_UPDATE) { 557 MNT_ILOCK(mp); 558 mp->mnt_flag &= ~MNT_RDONLY; 559 MNT_IUNLOCK(mp); 560 } 561 562 /* 563 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes 564 * no sense in that context. Also, set up appropriate retransmit 565 * and soft timeout behavior. 566 */ 567 if (argp->sotype == SOCK_STREAM) { 568 nmp->nm_flag &= ~NFSMNT_NOCONN; 569 nmp->nm_flag |= NFSMNT_DUMBTIMR; 570 nmp->nm_timeo = NFS_MAXTIMEO; 571 nmp->nm_retry = NFS_RETRANS_TCP; 572 } 573 574 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */ 575 if ((argp->flags & NFSMNT_NFSV3) == 0) 576 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 577 578 /* Re-bind if rsrvd port requested and wasn't on one */ 579 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT) 580 && (argp->flags & NFSMNT_RESVPORT); 581 /* Also re-bind if we're switching to/from a connected UDP socket */ 582 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) != 583 (argp->flags & NFSMNT_NOCONN)); 584 585 /* Update flags atomically. Don't change the lock bits. */ 586 nmp->nm_flag = argp->flags | nmp->nm_flag; 587 splx(s); 588 589 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { 590 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; 591 if (nmp->nm_timeo < NFS_MINTIMEO) 592 nmp->nm_timeo = NFS_MINTIMEO; 593 else if (nmp->nm_timeo > NFS_MAXTIMEO) 594 nmp->nm_timeo = NFS_MAXTIMEO; 595 } 596 597 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { 598 nmp->nm_retry = argp->retrans; 599 if (nmp->nm_retry > NFS_MAXREXMIT) 600 nmp->nm_retry = NFS_MAXREXMIT; 601 } 602 603 if (argp->flags & NFSMNT_NFSV3) { 604 if (argp->sotype == SOCK_DGRAM) 605 maxio = NFS_MAXDGRAMDATA; 606 else 607 maxio = NFS_MAXDATA; 608 } else 609 maxio = NFS_V2MAXDATA; 610 611 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { 612 nmp->nm_wsize = argp->wsize; 613 /* Round down to multiple of blocksize */ 614 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1); 615 if (nmp->nm_wsize <= 0) 616 nmp->nm_wsize = NFS_FABLKSIZE; 617 } 618 if (nmp->nm_wsize > maxio) 619 nmp->nm_wsize = maxio; 620 if (nmp->nm_wsize > MAXBSIZE) 621 nmp->nm_wsize = MAXBSIZE; 622 623 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { 624 nmp->nm_rsize = argp->rsize; 625 /* Round down to multiple of blocksize */ 626 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1); 627 if (nmp->nm_rsize <= 0) 628 nmp->nm_rsize = NFS_FABLKSIZE; 629 } 630 if (nmp->nm_rsize > maxio) 631 nmp->nm_rsize = maxio; 632 if (nmp->nm_rsize > MAXBSIZE) 633 nmp->nm_rsize = MAXBSIZE; 634 635 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { 636 nmp->nm_readdirsize = argp->readdirsize; 637 } 638 if (nmp->nm_readdirsize > maxio) 639 nmp->nm_readdirsize = maxio; 640 if (nmp->nm_readdirsize > nmp->nm_rsize) 641 nmp->nm_readdirsize = nmp->nm_rsize; 642 643 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) 644 nmp->nm_acregmin = argp->acregmin; 645 else 646 nmp->nm_acregmin = NFS_MINATTRTIMO; 647 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) 648 nmp->nm_acregmax = argp->acregmax; 649 else 650 nmp->nm_acregmax = NFS_MAXATTRTIMO; 651 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) 652 nmp->nm_acdirmin = argp->acdirmin; 653 else 654 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; 655 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) 656 nmp->nm_acdirmax = argp->acdirmax; 657 else 658 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; 659 if (nmp->nm_acdirmin > nmp->nm_acdirmax) 660 nmp->nm_acdirmin = nmp->nm_acdirmax; 661 if (nmp->nm_acregmin > nmp->nm_acregmax) 662 nmp->nm_acregmin = nmp->nm_acregmax; 663 664 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) { 665 if (argp->maxgrouplist <= NFS_MAXGRPS) 666 nmp->nm_numgrps = argp->maxgrouplist; 667 else 668 nmp->nm_numgrps = NFS_MAXGRPS; 669 } 670 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { 671 if (argp->readahead <= NFS_MAXRAHEAD) 672 nmp->nm_readahead = argp->readahead; 673 else 674 nmp->nm_readahead = NFS_MAXRAHEAD; 675 } 676 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) { 677 if (argp->wcommitsize < nmp->nm_wsize) 678 nmp->nm_wcommitsize = nmp->nm_wsize; 679 else 680 nmp->nm_wcommitsize = argp->wcommitsize; 681 } 682 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) { 683 if (argp->deadthresh <= NFS_MAXDEADTHRESH) 684 nmp->nm_deadthresh = argp->deadthresh; 685 else 686 nmp->nm_deadthresh = NFS_MAXDEADTHRESH; 687 } 688 689 adjsock |= ((nmp->nm_sotype != argp->sotype) || 690 (nmp->nm_soproto != argp->proto)); 691 nmp->nm_sotype = argp->sotype; 692 nmp->nm_soproto = argp->proto; 693 694 if (nmp->nm_so && adjsock) { 695 nfs_safedisconnect(nmp); 696 if (nmp->nm_sotype == SOCK_DGRAM) 697 while (nfs_connect(nmp, NULL)) { 698 printf("nfs_args: retrying connect\n"); 699 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 700 } 701 } 702} 703 704static const char *nfs_opts[] = { "from", "nfs_args", 705 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union", 706 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update", 707 "async", NULL }; 708 709/* 710 * VFS Operations. 711 * 712 * mount system call 713 * It seems a bit dumb to copyinstr() the host and path here and then 714 * bcopy() them in mountnfs(), but I wanted to detect errors before 715 * doing the sockargs() call because sockargs() allocates an mbuf and 716 * an error after that means that I have to release the mbuf. 717 */ 718/* ARGSUSED */ 719static int 720nfs_mount(struct mount *mp, struct thread *td) 721{ 722 int error; 723 struct nfs_args args; 724 struct sockaddr *nam; 725 struct vnode *vp; 726 char hst[MNAMELEN]; 727 size_t len; 728 u_char nfh[NFSX_V3FHMAX]; 729 730 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { 731 error = EINVAL; 732 goto out; 733 } 734 735 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) { 736 error = nfs_mountroot(mp, td); 737 goto out; 738 } 739 740 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args); 741 if (error) 742 goto out; 743 744 if (args.version != NFS_ARGSVERSION) { 745 error = EPROGMISMATCH; 746 goto out; 747 } 748 749 if (mp->mnt_flag & MNT_UPDATE) { 750 struct nfsmount *nmp = VFSTONFS(mp); 751 752 if (nmp == NULL) { 753 error = EIO; 754 goto out; 755 } 756 /* 757 * When doing an update, we can't change from or to 758 * v3, switch lockd strategies or change cookie translation 759 */ 760 args.flags = (args.flags & 761 ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) | 762 (nmp->nm_flag & 763 (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)); 764 nfs_decode_args(mp, nmp, &args); 765 goto out; 766 } 767 768 /* 769 * Make the nfs_ip_paranoia sysctl serve as the default connection 770 * or no-connection mode for those protocols that support 771 * no-connection mode (the flag will be cleared later for protocols 772 * that do not support no-connection mode). This will allow a client 773 * to receive replies from a different IP then the request was 774 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), 775 * not 0. 776 */ 777 if (nfs_ip_paranoia == 0) 778 args.flags |= NFSMNT_NOCONN; 779 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) { 780 error = EINVAL; 781 goto out; 782 } 783 error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); 784 if (error) 785 goto out; 786 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); 787 if (error) 788 goto out; 789 bzero(&hst[len], MNAMELEN - len); 790 /* sockargs() call must be after above copyin() calls */ 791 error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); 792 if (error) 793 goto out; 794 args.fh = nfh; 795 error = mountnfs(&args, mp, nam, hst, &vp, td->td_ucred); 796out: 797 if (!error) { 798 MNT_ILOCK(mp); 799 mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED); 800 MNT_IUNLOCK(mp); 801 } 802 return (error); 803} 804 805 806/* 807 * VFS Operations. 808 * 809 * mount system call 810 * It seems a bit dumb to copyinstr() the host and path here and then 811 * bcopy() them in mountnfs(), but I wanted to detect errors before 812 * doing the sockargs() call because sockargs() allocates an mbuf and 813 * an error after that means that I have to release the mbuf. 814 */ 815/* ARGSUSED */ 816static int 817nfs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td) 818{ 819 int error; 820 struct nfs_args args; 821 822 error = copyin(data, &args, sizeof (struct nfs_args)); 823 if (error) 824 return error; 825 826 ma = mount_arg(ma, "nfs_args", &args, sizeof args); 827 828 error = kernel_mount(ma, flags); 829 return (error); 830} 831 832/* 833 * Common code for mount and mountroot 834 */ 835static int 836mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, 837 char *hst, struct vnode **vpp, struct ucred *cred) 838{ 839 struct nfsmount *nmp; 840 struct nfsnode *np; 841 int error; 842 struct vattr attrs; 843 844 if (mp->mnt_flag & MNT_UPDATE) { 845 nmp = VFSTONFS(mp); 846 printf("%s: MNT_UPDATE is no longer handled here\n", __func__); 847 FREE(nam, M_SONAME); 848 return (0); 849 } else { 850 nmp = uma_zalloc(nfsmount_zone, M_WAITOK); 851 bzero((caddr_t)nmp, sizeof (struct nfsmount)); 852 TAILQ_INIT(&nmp->nm_bufq); 853 mp->mnt_data = nmp; 854 } 855 vfs_getnewfsid(mp); 856 nmp->nm_mountp = mp; 857 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF); 858 859 /* 860 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too 861 * high, depending on whether we end up with negative offsets in 862 * the client or server somewhere. 2GB-1 may be safer. 863 * 864 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum 865 * that we can handle until we find out otherwise. 866 * XXX Our "safe" limit on the client is what we can store in our 867 * buffer cache using signed(!) block numbers. 868 */ 869 if ((argp->flags & NFSMNT_NFSV3) == 0) 870 nmp->nm_maxfilesize = 0xffffffffLL; 871 else 872 nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1; 873 874 nmp->nm_timeo = NFS_TIMEO; 875 nmp->nm_retry = NFS_RETRANS; 876 if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) { 877 nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA; 878 } else { 879 nmp->nm_wsize = NFS_WSIZE; 880 nmp->nm_rsize = NFS_RSIZE; 881 } 882 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000); 883 nmp->nm_readdirsize = NFS_READDIRSIZE; 884 nmp->nm_numgrps = NFS_MAXGRPS; 885 nmp->nm_readahead = NFS_DEFRAHEAD; 886 nmp->nm_deadthresh = NFS_MAXDEADTHRESH; 887 nmp->nm_tprintf_delay = nfs_tprintf_delay; 888 if (nmp->nm_tprintf_delay < 0) 889 nmp->nm_tprintf_delay = 0; 890 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay; 891 if (nmp->nm_tprintf_initial_delay < 0) 892 nmp->nm_tprintf_initial_delay = 0; 893 nmp->nm_fhsize = argp->fhsize; 894 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); 895 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); 896 nmp->nm_nam = nam; 897 /* Set up the sockets and per-host congestion */ 898 nmp->nm_sotype = argp->sotype; 899 nmp->nm_soproto = argp->proto; 900 nmp->nm_rpcops = &nfs_rpcops; 901 902 nfs_decode_args(mp, nmp, argp); 903 904 /* 905 * For Connection based sockets (TCP,...) defer the connect until 906 * the first request, in case the server is not responding. 907 */ 908 if (nmp->nm_sotype == SOCK_DGRAM && 909 (error = nfs_connect(nmp, NULL))) 910 goto bad; 911 912 /* 913 * This is silly, but it has to be set so that vinifod() works. 914 * We do not want to do an nfs_statfs() here since we can get 915 * stuck on a dead server and we are holding a lock on the mount 916 * point. 917 */ 918 mtx_lock(&nmp->nm_mtx); 919 mp->mnt_stat.f_iosize = nfs_iosize(nmp); 920 mtx_unlock(&nmp->nm_mtx); 921 /* 922 * A reference count is needed on the nfsnode representing the 923 * remote root. If this object is not persistent, then backward 924 * traversals of the mount point (i.e. "..") will not work if 925 * the nfsnode gets flushed out of the cache. Ufs does not have 926 * this problem, because one can identify root inodes by their 927 * number == ROOTINO (2). 928 */ 929 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); 930 if (error) 931 goto bad; 932 *vpp = NFSTOV(np); 933 934 /* 935 * Get file attributes and transfer parameters for the 936 * mountpoint. This has the side effect of filling in 937 * (*vpp)->v_type with the correct value. 938 */ 939 if (argp->flags & NFSMNT_NFSV3) 940 nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread); 941 else 942 VOP_GETATTR(*vpp, &attrs, curthread->td_ucred, curthread); 943 944 /* 945 * Lose the lock but keep the ref. 946 */ 947 VOP_UNLOCK(*vpp, 0); 948 949 return (0); 950bad: 951 nfs_disconnect(nmp); 952 mtx_destroy(&nmp->nm_mtx); 953 uma_zfree(nfsmount_zone, nmp); 954 FREE(nam, M_SONAME); 955 return (error); 956} 957 958/* 959 * unmount system call 960 */ 961static int 962nfs_unmount(struct mount *mp, int mntflags, struct thread *td) 963{ 964 struct nfsmount *nmp; 965 int error, flags = 0; 966 967 if (mntflags & MNT_FORCE) 968 flags |= FORCECLOSE; 969 nmp = VFSTONFS(mp); 970 /* 971 * Goes something like this.. 972 * - Call vflush() to clear out vnodes for this filesystem 973 * - Close the socket 974 * - Free up the data structures 975 */ 976 /* In the forced case, cancel any outstanding requests. */ 977 if (flags & FORCECLOSE) { 978 error = nfs_nmcancelreqs(nmp); 979 if (error) 980 goto out; 981 } 982 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */ 983 error = vflush(mp, 1, flags, td); 984 if (error) 985 goto out; 986 987 /* 988 * We are now committed to the unmount. 989 */ 990 nfs_disconnect(nmp); 991 FREE(nmp->nm_nam, M_SONAME); 992 993 mtx_destroy(&nmp->nm_mtx); 994 uma_zfree(nfsmount_zone, nmp); 995out: 996 return (error); 997} 998 999/* 1000 * Return root of a filesystem 1001 */ 1002static int 1003nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td) 1004{ 1005 struct vnode *vp; 1006 struct nfsmount *nmp; 1007 struct nfsnode *np; 1008 int error; 1009 1010 nmp = VFSTONFS(mp); 1011 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags); 1012 if (error) 1013 return error; 1014 vp = NFSTOV(np); 1015 /* 1016 * Get transfer parameters and attributes for root vnode once. 1017 */ 1018 mtx_lock(&nmp->nm_mtx); 1019 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 && 1020 (nmp->nm_flag & NFSMNT_NFSV3)) { 1021 mtx_unlock(&nmp->nm_mtx); 1022 nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread); 1023 } else 1024 mtx_unlock(&nmp->nm_mtx); 1025 if (vp->v_type == VNON) 1026 vp->v_type = VDIR; 1027 vp->v_vflag |= VV_ROOT; 1028 *vpp = vp; 1029 return (0); 1030} 1031 1032/* 1033 * Flush out the buffer cache 1034 */ 1035/* ARGSUSED */ 1036static int 1037nfs_sync(struct mount *mp, int waitfor, struct thread *td) 1038{ 1039 struct vnode *vp, *mvp; 1040 int error, allerror = 0; 1041 1042 /* 1043 * Force stale buffer cache information to be flushed. 1044 */ 1045 MNT_ILOCK(mp); 1046loop: 1047 MNT_VNODE_FOREACH(vp, mp, mvp) { 1048 VI_LOCK(vp); 1049 MNT_IUNLOCK(mp);
| 37 38 39#include "opt_bootp.h" 40#include "opt_nfsroot.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/kernel.h> 45#include <sys/bio.h> 46#include <sys/buf.h> 47#include <sys/clock.h> 48#include <sys/lock.h> 49#include <sys/malloc.h> 50#include <sys/mbuf.h> 51#include <sys/module.h> 52#include <sys/mount.h> 53#include <sys/proc.h> 54#include <sys/socket.h> 55#include <sys/socketvar.h> 56#include <sys/sockio.h> 57#include <sys/sysctl.h> 58#include <sys/vnode.h> 59#include <sys/signalvar.h> 60 61#include <vm/vm.h> 62#include <vm/vm_extern.h> 63#include <vm/uma.h> 64 65#include <net/if.h> 66#include <net/route.h> 67#include <netinet/in.h> 68 69#include <rpc/rpcclnt.h> 70 71#include <nfs/rpcv2.h> 72#include <nfs/nfsproto.h> 73#include <nfsclient/nfs.h> 74#include <nfsclient/nfsnode.h> 75#include <nfsclient/nfsmount.h> 76#include <nfs/xdr_subs.h> 77#include <nfsclient/nfsm_subs.h> 78#include <nfsclient/nfsdiskless.h> 79 80MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header"); 81MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle"); 82MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data"); 83MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables"); 84MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state"); 85 86uma_zone_t nfsmount_zone; 87 88struct nfsstats nfsstats; 89 90SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem"); 91SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW, 92 &nfsstats, nfsstats, "S,nfsstats"); 93static int nfs_ip_paranoia = 1; 94SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, 95 &nfs_ip_paranoia, 0, ""); 96#ifdef NFS_DEBUG 97int nfs_debug; 98SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, ""); 99#endif 100static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY; 101SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY, 102 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, ""); 103/* how long between console messages "nfs server foo not responding" */ 104static int nfs_tprintf_delay = NFS_TPRINTF_DELAY; 105SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY, 106 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, ""); 107 108static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, 109 struct nfs_args *argp); 110static int mountnfs(struct nfs_args *, struct mount *, 111 struct sockaddr *, char *, struct vnode **, 112 struct ucred *cred); 113static vfs_mount_t nfs_mount; 114static vfs_cmount_t nfs_cmount; 115static vfs_unmount_t nfs_unmount; 116static vfs_root_t nfs_root; 117static vfs_statfs_t nfs_statfs; 118static vfs_sync_t nfs_sync; 119static vfs_sysctl_t nfs_sysctl; 120 121/* 122 * nfs vfs operations. 123 */ 124static struct vfsops nfs_vfsops = { 125 .vfs_init = nfs_init, 126 .vfs_mount = nfs_mount, 127 .vfs_cmount = nfs_cmount, 128 .vfs_root = nfs_root, 129 .vfs_statfs = nfs_statfs, 130 .vfs_sync = nfs_sync, 131 .vfs_uninit = nfs_uninit, 132 .vfs_unmount = nfs_unmount, 133 .vfs_sysctl = nfs_sysctl, 134}; 135VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK); 136 137/* So that loader and kldload(2) can find us, wherever we are.. */ 138MODULE_VERSION(nfs, 1); 139 140static struct nfs_rpcops nfs_rpcops = { 141 nfs_readrpc, 142 nfs_writerpc, 143 nfs_writebp, 144 nfs_readlinkrpc, 145 nfs_invaldir, 146 nfs_commit, 147}; 148 149/* 150 * This structure must be filled in by a primary bootstrap or bootstrap 151 * server for a diskless/dataless machine. It is initialized below just 152 * to ensure that it is allocated to initialized data (.data not .bss). 153 */ 154struct nfs_diskless nfs_diskless = { { { 0 } } }; 155struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; 156int nfs_diskless_valid = 0; 157 158SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD, 159 &nfs_diskless_valid, 0, ""); 160 161SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, 162 nfsv3_diskless.root_hostnam, 0, ""); 163 164SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, 165 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr, 166 "%Ssockaddr_in", ""); 167 168 169void nfsargs_ntoh(struct nfs_args *); 170static int nfs_mountdiskless(char *, 171 struct sockaddr_in *, struct nfs_args *, 172 struct thread *, struct vnode **, struct mount *); 173static void nfs_convert_diskless(void); 174static void nfs_convert_oargs(struct nfs_args *args, 175 struct onfs_args *oargs); 176 177int 178nfs_iosize(struct nfsmount *nmp) 179{ 180 int iosize; 181 182 /* 183 * Calculate the size used for io buffers. Use the larger 184 * of the two sizes to minimise nfs requests but make sure 185 * that it is at least one VM page to avoid wasting buffer 186 * space. 187 */ 188 iosize = imax(nmp->nm_rsize, nmp->nm_wsize); 189 iosize = imax(iosize, PAGE_SIZE); 190 return (iosize); 191} 192 193static void 194nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) 195{ 196 197 args->version = NFS_ARGSVERSION; 198 args->addr = oargs->addr; 199 args->addrlen = oargs->addrlen; 200 args->sotype = oargs->sotype; 201 args->proto = oargs->proto; 202 args->fh = oargs->fh; 203 args->fhsize = oargs->fhsize; 204 args->flags = oargs->flags; 205 args->wsize = oargs->wsize; 206 args->rsize = oargs->rsize; 207 args->readdirsize = oargs->readdirsize; 208 args->timeo = oargs->timeo; 209 args->retrans = oargs->retrans; 210 args->maxgrouplist = oargs->maxgrouplist; 211 args->readahead = oargs->readahead; 212 args->deadthresh = oargs->deadthresh; 213 args->hostname = oargs->hostname; 214} 215 216static void 217nfs_convert_diskless(void) 218{ 219 220 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, 221 sizeof(struct ifaliasreq)); 222 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, 223 sizeof(struct sockaddr_in)); 224 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args); 225 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) { 226 nfsv3_diskless.root_fhsize = NFSX_V3FH; 227 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH); 228 } else { 229 nfsv3_diskless.root_fhsize = NFSX_V2FH; 230 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH); 231 } 232 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, 233 sizeof(struct sockaddr_in)); 234 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN); 235 nfsv3_diskless.root_time = nfs_diskless.root_time; 236 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam, 237 MAXHOSTNAMELEN); 238 nfs_diskless_valid = 3; 239} 240 241/* 242 * nfs statfs call 243 */ 244static int 245nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) 246{ 247 struct vnode *vp; 248 struct nfs_statfs *sfp; 249 caddr_t bpos, dpos; 250 struct nfsmount *nmp = VFSTONFS(mp); 251 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr; 252 struct mbuf *mreq, *mrep, *md, *mb; 253 struct nfsnode *np; 254 u_quad_t tquad; 255 256#ifndef nolint 257 sfp = NULL; 258#endif 259 error = vfs_busy(mp, LK_NOWAIT, NULL, td); 260 if (error) 261 return (error); 262 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); 263 if (error) { 264 vfs_unbusy(mp, td); 265 return (error); 266 } 267 vp = NFSTOV(np); 268 mtx_lock(&nmp->nm_mtx); 269 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { 270 mtx_unlock(&nmp->nm_mtx); 271 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td); 272 } else 273 mtx_unlock(&nmp->nm_mtx); 274 nfsstats.rpccnt[NFSPROC_FSSTAT]++; 275 mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3)); 276 mb = mreq; 277 bpos = mtod(mb, caddr_t); 278 nfsm_fhtom(vp, v3); 279 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred); 280 if (v3) 281 nfsm_postop_attr(vp, retattr); 282 if (error) { 283 if (mrep != NULL) 284 m_freem(mrep); 285 goto nfsmout; 286 } 287 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3)); 288 mtx_lock(&nmp->nm_mtx); 289 sbp->f_iosize = nfs_iosize(nmp); 290 mtx_unlock(&nmp->nm_mtx); 291 if (v3) { 292 sbp->f_bsize = NFS_FABLKSIZE; 293 tquad = fxdr_hyper(&sfp->sf_tbytes); 294 sbp->f_blocks = tquad / NFS_FABLKSIZE; 295 tquad = fxdr_hyper(&sfp->sf_fbytes); 296 sbp->f_bfree = tquad / NFS_FABLKSIZE; 297 tquad = fxdr_hyper(&sfp->sf_abytes); 298 sbp->f_bavail = tquad / NFS_FABLKSIZE; 299 sbp->f_files = (fxdr_unsigned(int32_t, 300 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff); 301 sbp->f_ffree = (fxdr_unsigned(int32_t, 302 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff); 303 } else { 304 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize); 305 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks); 306 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree); 307 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail); 308 sbp->f_files = 0; 309 sbp->f_ffree = 0; 310 } 311 m_freem(mrep); 312nfsmout: 313 vput(vp); 314 vfs_unbusy(mp, td); 315 return (error); 316} 317 318/* 319 * nfs version 3 fsinfo rpc call 320 */ 321int 322nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred, 323 struct thread *td) 324{ 325 struct nfsv3_fsinfo *fsp; 326 u_int32_t pref, max; 327 caddr_t bpos, dpos; 328 int error = 0, retattr; 329 struct mbuf *mreq, *mrep, *md, *mb; 330 u_int64_t maxfsize; 331 332 nfsstats.rpccnt[NFSPROC_FSINFO]++; 333 mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1)); 334 mb = mreq; 335 bpos = mtod(mb, caddr_t); 336 nfsm_fhtom(vp, 1); 337 nfsm_request(vp, NFSPROC_FSINFO, td, cred); 338 nfsm_postop_attr(vp, retattr); 339 if (!error) { 340 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO); 341 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref); 342 mtx_lock(&nmp->nm_mtx); 343 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE) 344 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) & 345 ~(NFS_FABLKSIZE - 1); 346 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax); 347 if (max < nmp->nm_wsize && max > 0) { 348 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1); 349 if (nmp->nm_wsize == 0) 350 nmp->nm_wsize = max; 351 } 352 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref); 353 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE) 354 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) & 355 ~(NFS_FABLKSIZE - 1); 356 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax); 357 if (max < nmp->nm_rsize && max > 0) { 358 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1); 359 if (nmp->nm_rsize == 0) 360 nmp->nm_rsize = max; 361 } 362 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref); 363 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ) 364 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) & 365 ~(NFS_DIRBLKSIZ - 1); 366 if (max < nmp->nm_readdirsize && max > 0) { 367 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1); 368 if (nmp->nm_readdirsize == 0) 369 nmp->nm_readdirsize = max; 370 } 371 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize); 372 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize) 373 nmp->nm_maxfilesize = maxfsize; 374 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp); 375 nmp->nm_state |= NFSSTA_GOTFSINFO; 376 mtx_unlock(&nmp->nm_mtx); 377 } 378 m_freem(mrep); 379nfsmout: 380 return (error); 381} 382 383/* 384 * Mount a remote root fs via. nfs. This depends on the info in the 385 * nfs_diskless structure that has been filled in properly by some primary 386 * bootstrap. 387 * It goes something like this: 388 * - do enough of "ifconfig" by calling ifioctl() so that the system 389 * can talk to the server 390 * - If nfs_diskless.mygateway is filled in, use that address as 391 * a default gateway. 392 * - build the rootfs mount point and call mountnfs() to do the rest. 393 * 394 * It is assumed to be safe to read, modify, and write the nfsv3_diskless 395 * structure, as well as other global NFS client variables here, as 396 * nfs_mountroot() will be called once in the boot before any other NFS 397 * client activity occurs. 398 */ 399int 400nfs_mountroot(struct mount *mp, struct thread *td) 401{ 402 struct nfsv3_diskless *nd = &nfsv3_diskless; 403 struct socket *so; 404 struct vnode *vp; 405 struct ifreq ir; 406 int error, i; 407 u_long l; 408 char buf[128]; 409 char *cp; 410 411#if defined(BOOTP_NFSROOT) && defined(BOOTP) 412 bootpc_init(); /* use bootp to get nfs_diskless filled in */ 413#elif defined(NFS_ROOT) 414 nfs_setup_diskless(); 415#endif 416 417 if (nfs_diskless_valid == 0) 418 return (-1); 419 if (nfs_diskless_valid == 1) 420 nfs_convert_diskless(); 421 422 /* 423 * XXX splnet, so networks will receive... 424 */ 425 splnet(); 426 427 /* 428 * Do enough of ifconfig(8) so that the critical net interface can 429 * talk to the server. 430 */ 431 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0, 432 td->td_ucred, td); 433 if (error) 434 panic("nfs_mountroot: socreate(%04x): %d", 435 nd->myif.ifra_addr.sa_family, error); 436 437#if 0 /* XXX Bad idea */ 438 /* 439 * We might not have been told the right interface, so we pass 440 * over the first ten interfaces of the same kind, until we get 441 * one of them configured. 442 */ 443 444 for (i = strlen(nd->myif.ifra_name) - 1; 445 nd->myif.ifra_name[i] >= '0' && 446 nd->myif.ifra_name[i] <= '9'; 447 nd->myif.ifra_name[i] ++) { 448 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); 449 if(!error) 450 break; 451 } 452#endif 453 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); 454 if (error) 455 panic("nfs_mountroot: SIOCAIFADDR: %d", error); 456 if ((cp = getenv("boot.netif.mtu")) != NULL) { 457 ir.ifr_mtu = strtol(cp, NULL, 10); 458 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ); 459 freeenv(cp); 460 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td); 461 if (error) 462 printf("nfs_mountroot: SIOCSIFMTU: %d", error); 463 } 464 soclose(so); 465 466 /* 467 * If the gateway field is filled in, set it as the default route. 468 * Note that pxeboot will set a default route of 0 if the route 469 * is not set by the DHCP server. Check also for a value of 0 470 * to avoid panicking inappropriately in that situation. 471 */ 472 if (nd->mygateway.sin_len != 0 && 473 nd->mygateway.sin_addr.s_addr != 0) { 474 struct sockaddr_in mask, sin; 475 476 bzero((caddr_t)&mask, sizeof(mask)); 477 sin = mask; 478 sin.sin_family = AF_INET; 479 sin.sin_len = sizeof(sin); 480 error = rtrequest(RTM_ADD, (struct sockaddr *)&sin, 481 (struct sockaddr *)&nd->mygateway, 482 (struct sockaddr *)&mask, 483 RTF_UP | RTF_GATEWAY, NULL); 484 if (error) 485 panic("nfs_mountroot: RTM_ADD: %d", error); 486 } 487 488 /* 489 * Create the rootfs mount point. 490 */ 491 nd->root_args.fh = nd->root_fh; 492 nd->root_args.fhsize = nd->root_fhsize; 493 l = ntohl(nd->root_saddr.sin_addr.s_addr); 494 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", 495 (l >> 24) & 0xff, (l >> 16) & 0xff, 496 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam); 497 printf("NFS ROOT: %s\n", buf); 498 if ((error = nfs_mountdiskless(buf, 499 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) { 500 return (error); 501 } 502 503 /* 504 * This is not really an nfs issue, but it is much easier to 505 * set hostname here and then let the "/etc/rc.xxx" files 506 * mount the right /var based upon its preset value. 507 */ 508 bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN); 509 hostname[MAXHOSTNAMELEN - 1] = '\0'; 510 for (i = 0; i < MAXHOSTNAMELEN; i++) 511 if (hostname[i] == '\0') 512 break; 513 inittodr(ntohl(nd->root_time)); 514 return (0); 515} 516 517/* 518 * Internal version of mount system call for diskless setup. 519 */ 520static int 521nfs_mountdiskless(char *path, 522 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td, 523 struct vnode **vpp, struct mount *mp) 524{ 525 struct sockaddr *nam; 526 int error; 527 528 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); 529 if ((error = mountnfs(args, mp, nam, path, vpp, 530 td->td_ucred)) != 0) { 531 printf("nfs_mountroot: mount %s on /: %d\n", path, error); 532 return (error); 533 } 534 return (0); 535} 536 537static void 538nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp) 539{ 540 int s; 541 int adjsock; 542 int maxio; 543 544 s = splnet(); 545 546 /* 547 * Set read-only flag if requested; otherwise, clear it if this is 548 * an update. If this is not an update, then either the read-only 549 * flag is already clear, or this is a root mount and it was set 550 * intentionally at some previous point. 551 */ 552 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) { 553 MNT_ILOCK(mp); 554 mp->mnt_flag |= MNT_RDONLY; 555 MNT_IUNLOCK(mp); 556 } else if (mp->mnt_flag & MNT_UPDATE) { 557 MNT_ILOCK(mp); 558 mp->mnt_flag &= ~MNT_RDONLY; 559 MNT_IUNLOCK(mp); 560 } 561 562 /* 563 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes 564 * no sense in that context. Also, set up appropriate retransmit 565 * and soft timeout behavior. 566 */ 567 if (argp->sotype == SOCK_STREAM) { 568 nmp->nm_flag &= ~NFSMNT_NOCONN; 569 nmp->nm_flag |= NFSMNT_DUMBTIMR; 570 nmp->nm_timeo = NFS_MAXTIMEO; 571 nmp->nm_retry = NFS_RETRANS_TCP; 572 } 573 574 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */ 575 if ((argp->flags & NFSMNT_NFSV3) == 0) 576 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 577 578 /* Re-bind if rsrvd port requested and wasn't on one */ 579 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT) 580 && (argp->flags & NFSMNT_RESVPORT); 581 /* Also re-bind if we're switching to/from a connected UDP socket */ 582 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) != 583 (argp->flags & NFSMNT_NOCONN)); 584 585 /* Update flags atomically. Don't change the lock bits. */ 586 nmp->nm_flag = argp->flags | nmp->nm_flag; 587 splx(s); 588 589 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { 590 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; 591 if (nmp->nm_timeo < NFS_MINTIMEO) 592 nmp->nm_timeo = NFS_MINTIMEO; 593 else if (nmp->nm_timeo > NFS_MAXTIMEO) 594 nmp->nm_timeo = NFS_MAXTIMEO; 595 } 596 597 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { 598 nmp->nm_retry = argp->retrans; 599 if (nmp->nm_retry > NFS_MAXREXMIT) 600 nmp->nm_retry = NFS_MAXREXMIT; 601 } 602 603 if (argp->flags & NFSMNT_NFSV3) { 604 if (argp->sotype == SOCK_DGRAM) 605 maxio = NFS_MAXDGRAMDATA; 606 else 607 maxio = NFS_MAXDATA; 608 } else 609 maxio = NFS_V2MAXDATA; 610 611 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { 612 nmp->nm_wsize = argp->wsize; 613 /* Round down to multiple of blocksize */ 614 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1); 615 if (nmp->nm_wsize <= 0) 616 nmp->nm_wsize = NFS_FABLKSIZE; 617 } 618 if (nmp->nm_wsize > maxio) 619 nmp->nm_wsize = maxio; 620 if (nmp->nm_wsize > MAXBSIZE) 621 nmp->nm_wsize = MAXBSIZE; 622 623 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { 624 nmp->nm_rsize = argp->rsize; 625 /* Round down to multiple of blocksize */ 626 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1); 627 if (nmp->nm_rsize <= 0) 628 nmp->nm_rsize = NFS_FABLKSIZE; 629 } 630 if (nmp->nm_rsize > maxio) 631 nmp->nm_rsize = maxio; 632 if (nmp->nm_rsize > MAXBSIZE) 633 nmp->nm_rsize = MAXBSIZE; 634 635 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { 636 nmp->nm_readdirsize = argp->readdirsize; 637 } 638 if (nmp->nm_readdirsize > maxio) 639 nmp->nm_readdirsize = maxio; 640 if (nmp->nm_readdirsize > nmp->nm_rsize) 641 nmp->nm_readdirsize = nmp->nm_rsize; 642 643 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) 644 nmp->nm_acregmin = argp->acregmin; 645 else 646 nmp->nm_acregmin = NFS_MINATTRTIMO; 647 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) 648 nmp->nm_acregmax = argp->acregmax; 649 else 650 nmp->nm_acregmax = NFS_MAXATTRTIMO; 651 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) 652 nmp->nm_acdirmin = argp->acdirmin; 653 else 654 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; 655 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) 656 nmp->nm_acdirmax = argp->acdirmax; 657 else 658 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; 659 if (nmp->nm_acdirmin > nmp->nm_acdirmax) 660 nmp->nm_acdirmin = nmp->nm_acdirmax; 661 if (nmp->nm_acregmin > nmp->nm_acregmax) 662 nmp->nm_acregmin = nmp->nm_acregmax; 663 664 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) { 665 if (argp->maxgrouplist <= NFS_MAXGRPS) 666 nmp->nm_numgrps = argp->maxgrouplist; 667 else 668 nmp->nm_numgrps = NFS_MAXGRPS; 669 } 670 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { 671 if (argp->readahead <= NFS_MAXRAHEAD) 672 nmp->nm_readahead = argp->readahead; 673 else 674 nmp->nm_readahead = NFS_MAXRAHEAD; 675 } 676 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) { 677 if (argp->wcommitsize < nmp->nm_wsize) 678 nmp->nm_wcommitsize = nmp->nm_wsize; 679 else 680 nmp->nm_wcommitsize = argp->wcommitsize; 681 } 682 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) { 683 if (argp->deadthresh <= NFS_MAXDEADTHRESH) 684 nmp->nm_deadthresh = argp->deadthresh; 685 else 686 nmp->nm_deadthresh = NFS_MAXDEADTHRESH; 687 } 688 689 adjsock |= ((nmp->nm_sotype != argp->sotype) || 690 (nmp->nm_soproto != argp->proto)); 691 nmp->nm_sotype = argp->sotype; 692 nmp->nm_soproto = argp->proto; 693 694 if (nmp->nm_so && adjsock) { 695 nfs_safedisconnect(nmp); 696 if (nmp->nm_sotype == SOCK_DGRAM) 697 while (nfs_connect(nmp, NULL)) { 698 printf("nfs_args: retrying connect\n"); 699 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 700 } 701 } 702} 703 704static const char *nfs_opts[] = { "from", "nfs_args", 705 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union", 706 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update", 707 "async", NULL }; 708 709/* 710 * VFS Operations. 711 * 712 * mount system call 713 * It seems a bit dumb to copyinstr() the host and path here and then 714 * bcopy() them in mountnfs(), but I wanted to detect errors before 715 * doing the sockargs() call because sockargs() allocates an mbuf and 716 * an error after that means that I have to release the mbuf. 717 */ 718/* ARGSUSED */ 719static int 720nfs_mount(struct mount *mp, struct thread *td) 721{ 722 int error; 723 struct nfs_args args; 724 struct sockaddr *nam; 725 struct vnode *vp; 726 char hst[MNAMELEN]; 727 size_t len; 728 u_char nfh[NFSX_V3FHMAX]; 729 730 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { 731 error = EINVAL; 732 goto out; 733 } 734 735 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) { 736 error = nfs_mountroot(mp, td); 737 goto out; 738 } 739 740 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args); 741 if (error) 742 goto out; 743 744 if (args.version != NFS_ARGSVERSION) { 745 error = EPROGMISMATCH; 746 goto out; 747 } 748 749 if (mp->mnt_flag & MNT_UPDATE) { 750 struct nfsmount *nmp = VFSTONFS(mp); 751 752 if (nmp == NULL) { 753 error = EIO; 754 goto out; 755 } 756 /* 757 * When doing an update, we can't change from or to 758 * v3, switch lockd strategies or change cookie translation 759 */ 760 args.flags = (args.flags & 761 ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) | 762 (nmp->nm_flag & 763 (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)); 764 nfs_decode_args(mp, nmp, &args); 765 goto out; 766 } 767 768 /* 769 * Make the nfs_ip_paranoia sysctl serve as the default connection 770 * or no-connection mode for those protocols that support 771 * no-connection mode (the flag will be cleared later for protocols 772 * that do not support no-connection mode). This will allow a client 773 * to receive replies from a different IP then the request was 774 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), 775 * not 0. 776 */ 777 if (nfs_ip_paranoia == 0) 778 args.flags |= NFSMNT_NOCONN; 779 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) { 780 error = EINVAL; 781 goto out; 782 } 783 error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); 784 if (error) 785 goto out; 786 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); 787 if (error) 788 goto out; 789 bzero(&hst[len], MNAMELEN - len); 790 /* sockargs() call must be after above copyin() calls */ 791 error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); 792 if (error) 793 goto out; 794 args.fh = nfh; 795 error = mountnfs(&args, mp, nam, hst, &vp, td->td_ucred); 796out: 797 if (!error) { 798 MNT_ILOCK(mp); 799 mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED); 800 MNT_IUNLOCK(mp); 801 } 802 return (error); 803} 804 805 806/* 807 * VFS Operations. 808 * 809 * mount system call 810 * It seems a bit dumb to copyinstr() the host and path here and then 811 * bcopy() them in mountnfs(), but I wanted to detect errors before 812 * doing the sockargs() call because sockargs() allocates an mbuf and 813 * an error after that means that I have to release the mbuf. 814 */ 815/* ARGSUSED */ 816static int 817nfs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td) 818{ 819 int error; 820 struct nfs_args args; 821 822 error = copyin(data, &args, sizeof (struct nfs_args)); 823 if (error) 824 return error; 825 826 ma = mount_arg(ma, "nfs_args", &args, sizeof args); 827 828 error = kernel_mount(ma, flags); 829 return (error); 830} 831 832/* 833 * Common code for mount and mountroot 834 */ 835static int 836mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, 837 char *hst, struct vnode **vpp, struct ucred *cred) 838{ 839 struct nfsmount *nmp; 840 struct nfsnode *np; 841 int error; 842 struct vattr attrs; 843 844 if (mp->mnt_flag & MNT_UPDATE) { 845 nmp = VFSTONFS(mp); 846 printf("%s: MNT_UPDATE is no longer handled here\n", __func__); 847 FREE(nam, M_SONAME); 848 return (0); 849 } else { 850 nmp = uma_zalloc(nfsmount_zone, M_WAITOK); 851 bzero((caddr_t)nmp, sizeof (struct nfsmount)); 852 TAILQ_INIT(&nmp->nm_bufq); 853 mp->mnt_data = nmp; 854 } 855 vfs_getnewfsid(mp); 856 nmp->nm_mountp = mp; 857 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF); 858 859 /* 860 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too 861 * high, depending on whether we end up with negative offsets in 862 * the client or server somewhere. 2GB-1 may be safer. 863 * 864 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum 865 * that we can handle until we find out otherwise. 866 * XXX Our "safe" limit on the client is what we can store in our 867 * buffer cache using signed(!) block numbers. 868 */ 869 if ((argp->flags & NFSMNT_NFSV3) == 0) 870 nmp->nm_maxfilesize = 0xffffffffLL; 871 else 872 nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1; 873 874 nmp->nm_timeo = NFS_TIMEO; 875 nmp->nm_retry = NFS_RETRANS; 876 if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) { 877 nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA; 878 } else { 879 nmp->nm_wsize = NFS_WSIZE; 880 nmp->nm_rsize = NFS_RSIZE; 881 } 882 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000); 883 nmp->nm_readdirsize = NFS_READDIRSIZE; 884 nmp->nm_numgrps = NFS_MAXGRPS; 885 nmp->nm_readahead = NFS_DEFRAHEAD; 886 nmp->nm_deadthresh = NFS_MAXDEADTHRESH; 887 nmp->nm_tprintf_delay = nfs_tprintf_delay; 888 if (nmp->nm_tprintf_delay < 0) 889 nmp->nm_tprintf_delay = 0; 890 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay; 891 if (nmp->nm_tprintf_initial_delay < 0) 892 nmp->nm_tprintf_initial_delay = 0; 893 nmp->nm_fhsize = argp->fhsize; 894 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); 895 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); 896 nmp->nm_nam = nam; 897 /* Set up the sockets and per-host congestion */ 898 nmp->nm_sotype = argp->sotype; 899 nmp->nm_soproto = argp->proto; 900 nmp->nm_rpcops = &nfs_rpcops; 901 902 nfs_decode_args(mp, nmp, argp); 903 904 /* 905 * For Connection based sockets (TCP,...) defer the connect until 906 * the first request, in case the server is not responding. 907 */ 908 if (nmp->nm_sotype == SOCK_DGRAM && 909 (error = nfs_connect(nmp, NULL))) 910 goto bad; 911 912 /* 913 * This is silly, but it has to be set so that vinifod() works. 914 * We do not want to do an nfs_statfs() here since we can get 915 * stuck on a dead server and we are holding a lock on the mount 916 * point. 917 */ 918 mtx_lock(&nmp->nm_mtx); 919 mp->mnt_stat.f_iosize = nfs_iosize(nmp); 920 mtx_unlock(&nmp->nm_mtx); 921 /* 922 * A reference count is needed on the nfsnode representing the 923 * remote root. If this object is not persistent, then backward 924 * traversals of the mount point (i.e. "..") will not work if 925 * the nfsnode gets flushed out of the cache. Ufs does not have 926 * this problem, because one can identify root inodes by their 927 * number == ROOTINO (2). 928 */ 929 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); 930 if (error) 931 goto bad; 932 *vpp = NFSTOV(np); 933 934 /* 935 * Get file attributes and transfer parameters for the 936 * mountpoint. This has the side effect of filling in 937 * (*vpp)->v_type with the correct value. 938 */ 939 if (argp->flags & NFSMNT_NFSV3) 940 nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread); 941 else 942 VOP_GETATTR(*vpp, &attrs, curthread->td_ucred, curthread); 943 944 /* 945 * Lose the lock but keep the ref. 946 */ 947 VOP_UNLOCK(*vpp, 0); 948 949 return (0); 950bad: 951 nfs_disconnect(nmp); 952 mtx_destroy(&nmp->nm_mtx); 953 uma_zfree(nfsmount_zone, nmp); 954 FREE(nam, M_SONAME); 955 return (error); 956} 957 958/* 959 * unmount system call 960 */ 961static int 962nfs_unmount(struct mount *mp, int mntflags, struct thread *td) 963{ 964 struct nfsmount *nmp; 965 int error, flags = 0; 966 967 if (mntflags & MNT_FORCE) 968 flags |= FORCECLOSE; 969 nmp = VFSTONFS(mp); 970 /* 971 * Goes something like this.. 972 * - Call vflush() to clear out vnodes for this filesystem 973 * - Close the socket 974 * - Free up the data structures 975 */ 976 /* In the forced case, cancel any outstanding requests. */ 977 if (flags & FORCECLOSE) { 978 error = nfs_nmcancelreqs(nmp); 979 if (error) 980 goto out; 981 } 982 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */ 983 error = vflush(mp, 1, flags, td); 984 if (error) 985 goto out; 986 987 /* 988 * We are now committed to the unmount. 989 */ 990 nfs_disconnect(nmp); 991 FREE(nmp->nm_nam, M_SONAME); 992 993 mtx_destroy(&nmp->nm_mtx); 994 uma_zfree(nfsmount_zone, nmp); 995out: 996 return (error); 997} 998 999/* 1000 * Return root of a filesystem 1001 */ 1002static int 1003nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td) 1004{ 1005 struct vnode *vp; 1006 struct nfsmount *nmp; 1007 struct nfsnode *np; 1008 int error; 1009 1010 nmp = VFSTONFS(mp); 1011 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags); 1012 if (error) 1013 return error; 1014 vp = NFSTOV(np); 1015 /* 1016 * Get transfer parameters and attributes for root vnode once. 1017 */ 1018 mtx_lock(&nmp->nm_mtx); 1019 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 && 1020 (nmp->nm_flag & NFSMNT_NFSV3)) { 1021 mtx_unlock(&nmp->nm_mtx); 1022 nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread); 1023 } else 1024 mtx_unlock(&nmp->nm_mtx); 1025 if (vp->v_type == VNON) 1026 vp->v_type = VDIR; 1027 vp->v_vflag |= VV_ROOT; 1028 *vpp = vp; 1029 return (0); 1030} 1031 1032/* 1033 * Flush out the buffer cache 1034 */ 1035/* ARGSUSED */ 1036static int 1037nfs_sync(struct mount *mp, int waitfor, struct thread *td) 1038{ 1039 struct vnode *vp, *mvp; 1040 int error, allerror = 0; 1041 1042 /* 1043 * Force stale buffer cache information to be flushed. 1044 */ 1045 MNT_ILOCK(mp); 1046loop: 1047 MNT_VNODE_FOREACH(vp, mp, mvp) { 1048 VI_LOCK(vp); 1049 MNT_IUNLOCK(mp);
|