nfs_krpc.c revision 228757
1193645Ssimon/*- 2193645Ssimon * Copyright (c) 1989, 1991, 1993, 1995 3238405Sjkim * The Regents of the University of California. All rights reserved. 4193645Ssimon * 5193645Ssimon * This code is derived from software contributed to Berkeley by 6193645Ssimon * Rick Macklem at The University of Guelph. 7193645Ssimon * 8193645Ssimon * Redistribution and use in source and binary forms, with or without 9193645Ssimon * modification, are permitted provided that the following conditions 10280304Sjkim * are met: 11193645Ssimon * 1. Redistributions of source code must retain the above copyright 12193645Ssimon * notice, this list of conditions and the following disclaimer. 13193645Ssimon * 2. Redistributions in binary form must reproduce the above copyright 14193645Ssimon * notice, this list of conditions and the following disclaimer in the 15193645Ssimon * documentation and/or other materials provided with the distribution. 16193645Ssimon * 4. Neither the name of the University nor the names of its contributors 17193645Ssimon * may be used to endorse or promote products derived from this software 18193645Ssimon * without specific prior written permission. 19193645Ssimon * 20193645Ssimon * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21193645Ssimon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22193645Ssimon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23193645Ssimon * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24193645Ssimon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25193645Ssimon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26193645Ssimon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27193645Ssimon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28193645Ssimon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29193645Ssimon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30193645Ssimon * SUCH DAMAGE. 31193645Ssimon * 32193645Ssimon * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 33193645Ssimon */ 34193645Ssimon 35193645Ssimon#include <sys/cdefs.h> 36193645Ssimon__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_krpc.c 228757 2011-12-21 02:45:51Z rmacklem $"); 37193645Ssimon 38193645Ssimon/* 39193645Ssimon * Socket operations for use by nfs 40193645Ssimon */ 41193645Ssimon 42193645Ssimon#include "opt_inet6.h" 43193645Ssimon#include "opt_kdtrace.h" 44193645Ssimon#include "opt_kgssapi.h" 45193645Ssimon 46193645Ssimon#include <sys/param.h> 47193645Ssimon#include <sys/systm.h> 48193645Ssimon#include <sys/kernel.h> 49193645Ssimon#include <sys/limits.h> 50193645Ssimon#include <sys/lock.h> 51193645Ssimon#include <sys/malloc.h> 52193645Ssimon#include <sys/mbuf.h> 53193645Ssimon#include <sys/mount.h> 54193645Ssimon#include <sys/mutex.h> 55193645Ssimon#include <sys/proc.h> 56280304Sjkim#include <sys/signalvar.h> 57280304Sjkim#include <sys/syscallsubr.h> 58193645Ssimon#include <sys/sysctl.h> 59193645Ssimon#include <sys/syslog.h> 60193645Ssimon#include <sys/vnode.h> 61193645Ssimon 62193645Ssimon#include <rpc/rpc.h> 63193645Ssimon 64193645Ssimon#include <nfs/nfsproto.h> 65193645Ssimon#include <nfsclient/nfs.h> 66193645Ssimon#include <nfs/xdr_subs.h> 67193645Ssimon#include <nfsclient/nfsm_subs.h> 68193645Ssimon#include <nfsclient/nfsmount.h> 69280304Sjkim#include <nfsclient/nfsnode.h> 70280304Sjkim 71193645Ssimon#ifdef KDTRACE_HOOKS 72280304Sjkim#include <sys/dtrace_bsd.h> 73280304Sjkim 74280304Sjkimdtrace_nfsclient_nfs23_start_probe_func_t 75280304Sjkim dtrace_nfsclient_nfs23_start_probe; 76280304Sjkim 77280304Sjkimdtrace_nfsclient_nfs23_done_probe_func_t 78280304Sjkim dtrace_nfsclient_nfs23_done_probe; 79280304Sjkim 80280304Sjkim/* 81280304Sjkim * Registered probes by RPC type. 82280304Sjkim */ 83280304Sjkimuint32_t nfsclient_nfs2_start_probes[NFS_NPROCS]; 84280304Sjkimuint32_t nfsclient_nfs2_done_probes[NFS_NPROCS]; 85280304Sjkim 86280304Sjkimuint32_t nfsclient_nfs3_start_probes[NFS_NPROCS]; 87280304Sjkimuint32_t nfsclient_nfs3_done_probes[NFS_NPROCS]; 88280304Sjkim#endif 89280304Sjkim 90280304Sjkimstatic int nfs_bufpackets = 4; 91280304Sjkimstatic int nfs_reconnects; 92280304Sjkimstatic int nfs3_jukebox_delay = 10; 93280304Sjkimstatic int nfs_skip_wcc_data_onerr = 1; 94280304Sjkimstatic int fake_wchan; 95280304Sjkim 96280304SjkimSYSCTL_DECL(_vfs_oldnfs); 97280304Sjkim 98280304SjkimSYSCTL_INT(_vfs_oldnfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, 99280304Sjkim "Buffer reservation size 2 < x < 64"); 100280304SjkimSYSCTL_INT(_vfs_oldnfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, 101280304Sjkim "Number of times the nfs client has had to reconnect"); 102280304SjkimSYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, 103280304Sjkim &nfs3_jukebox_delay, 0, 104280304Sjkim "Number of seconds to delay a retry after receiving EJUKEBOX"); 105280304SjkimSYSCTL_INT(_vfs_oldnfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, 106280304Sjkim &nfs_skip_wcc_data_onerr, 0, 107280304Sjkim "Disable weak cache consistency checking when server returns an error"); 108280304Sjkim 109280304Sjkimstatic void nfs_down(struct nfsmount *, struct thread *, const char *, 110280304Sjkim int, int); 111280304Sjkimstatic void nfs_up(struct nfsmount *, struct thread *, const char *, 112280304Sjkim int, int); 113280304Sjkimstatic int nfs_msg(struct thread *, const char *, const char *, int); 114280304Sjkim 115280304Sjkimextern int nfsv2_procid[]; 116280304Sjkim 117280304Sjkimstruct nfs_cached_auth { 118280304Sjkim int ca_refs; /* refcount, including 1 from the cache */ 119280304Sjkim uid_t ca_uid; /* uid that corresponds to this auth */ 120280304Sjkim AUTH *ca_auth; /* RPC auth handle */ 121280304Sjkim}; 122280304Sjkim 123280304Sjkim/* 124280304Sjkim * RTT estimator 125280304Sjkim */ 126280304Sjkim 127280304Sjkimstatic enum nfs_rto_timer_t nfs_proct[NFS_NPROCS] = { 128280304Sjkim NFS_DEFAULT_TIMER, /* NULL */ 129280304Sjkim NFS_GETATTR_TIMER, /* GETATTR */ 130280304Sjkim NFS_DEFAULT_TIMER, /* SETATTR */ 131280304Sjkim NFS_LOOKUP_TIMER, /* LOOKUP */ 132280304Sjkim NFS_GETATTR_TIMER, /* ACCESS */ 133280304Sjkim NFS_READ_TIMER, /* READLINK */ 134280304Sjkim NFS_READ_TIMER, /* READ */ 135280304Sjkim NFS_WRITE_TIMER, /* WRITE */ 136280304Sjkim NFS_DEFAULT_TIMER, /* CREATE */ 137280304Sjkim NFS_DEFAULT_TIMER, /* MKDIR */ 138280304Sjkim NFS_DEFAULT_TIMER, /* SYMLINK */ 139193645Ssimon NFS_DEFAULT_TIMER, /* MKNOD */ 140280304Sjkim NFS_DEFAULT_TIMER, /* REMOVE */ 141280304Sjkim NFS_DEFAULT_TIMER, /* RMDIR */ 142280304Sjkim NFS_DEFAULT_TIMER, /* RENAME */ 143280304Sjkim NFS_DEFAULT_TIMER, /* LINK */ 144280304Sjkim NFS_READ_TIMER, /* READDIR */ 145280304Sjkim NFS_READ_TIMER, /* READDIRPLUS */ 146280304Sjkim NFS_DEFAULT_TIMER, /* FSSTAT */ 147280304Sjkim NFS_DEFAULT_TIMER, /* FSINFO */ 148280304Sjkim NFS_DEFAULT_TIMER, /* PATHCONF */ 149280304Sjkim NFS_DEFAULT_TIMER, /* COMMIT */ 150280304Sjkim NFS_DEFAULT_TIMER, /* NOOP */ 151280304Sjkim}; 152280304Sjkim 153280304Sjkim/* 154280304Sjkim * Choose the correct RTT timer for this NFS procedure. 155280304Sjkim */ 156280304Sjkimstatic inline enum nfs_rto_timer_t 157280304Sjkimnfs_rto_timer(u_int32_t procnum) 158280304Sjkim{ 159280304Sjkim 160280304Sjkim return (nfs_proct[procnum]); 161280304Sjkim} 162280304Sjkim 163280304Sjkim/* 164280304Sjkim * Initialize the RTT estimator state for a new mount point. 165280304Sjkim */ 166280304Sjkimstatic void 167280304Sjkimnfs_init_rtt(struct nfsmount *nmp) 168280304Sjkim{ 169280304Sjkim int i; 170280304Sjkim 171280304Sjkim for (i = 0; i < NFS_MAX_TIMER; i++) { 172280304Sjkim nmp->nm_timers[i].rt_srtt = hz; 173280304Sjkim nmp->nm_timers[i].rt_deviate = 0; 174280304Sjkim nmp->nm_timers[i].rt_rtxcur = hz; 175280304Sjkim } 176280304Sjkim} 177280304Sjkim 178280304Sjkim/* 179280304Sjkim * Initialize sockets and congestion for a new NFS connection. 180280304Sjkim * We do not free the sockaddr if error. 181280304Sjkim */ 182280304Sjkimint 183280304Sjkimnfs_connect(struct nfsmount *nmp) 184280304Sjkim{ 185280304Sjkim int rcvreserve, sndreserve; 186280304Sjkim int pktscale; 187280304Sjkim struct sockaddr *saddr; 188280304Sjkim struct ucred *origcred; 189280304Sjkim struct thread *td = curthread; 190280304Sjkim CLIENT *client; 191280304Sjkim struct netconfig *nconf; 192280304Sjkim rpcvers_t vers; 193280304Sjkim int one = 1, retries; 194280304Sjkim struct timeval timo; 195280304Sjkim 196280304Sjkim /* 197280304Sjkim * We need to establish the socket using the credentials of 198280304Sjkim * the mountpoint. Some parts of this process (such as 199280304Sjkim * sobind() and soconnect()) will use the curent thread's 200280304Sjkim * credential instead of the socket credential. To work 201280304Sjkim * around this, temporarily change the current thread's 202280304Sjkim * credential to that of the mountpoint. 203280304Sjkim * 204280304Sjkim * XXX: It would be better to explicitly pass the correct 205280304Sjkim * credential to sobind() and soconnect(). 206280304Sjkim */ 207280304Sjkim origcred = td->td_ucred; 208280304Sjkim td->td_ucred = nmp->nm_mountp->mnt_cred; 209280304Sjkim saddr = nmp->nm_nam; 210280304Sjkim 211193645Ssimon vers = NFS_VER2; 212193645Ssimon if (nmp->nm_flag & NFSMNT_NFSV3) 213193645Ssimon vers = NFS_VER3; 214193645Ssimon else if (nmp->nm_flag & NFSMNT_NFSV4) 215280304Sjkim vers = NFS_VER4; 216193645Ssimon if (saddr->sa_family == AF_INET) 217193645Ssimon if (nmp->nm_sotype == SOCK_DGRAM) 218280304Sjkim nconf = getnetconfigent("udp"); 219280304Sjkim else 220280304Sjkim nconf = getnetconfigent("tcp"); 221280304Sjkim else 222193645Ssimon if (nmp->nm_sotype == SOCK_DGRAM) 223280304Sjkim nconf = getnetconfigent("udp6"); 224 else 225 nconf = getnetconfigent("tcp6"); 226 227 /* 228 * Get buffer reservation size from sysctl, but impose reasonable 229 * limits. 230 */ 231 pktscale = nfs_bufpackets; 232 if (pktscale < 2) 233 pktscale = 2; 234 if (pktscale > 64) 235 pktscale = 64; 236 mtx_lock(&nmp->nm_mtx); 237 if (nmp->nm_sotype == SOCK_DGRAM) { 238 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; 239 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 240 NFS_MAXPKTHDR) * pktscale; 241 } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 242 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; 243 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 244 NFS_MAXPKTHDR) * pktscale; 245 } else { 246 if (nmp->nm_sotype != SOCK_STREAM) 247 panic("nfscon sotype"); 248 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + 249 sizeof (u_int32_t)) * pktscale; 250 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + 251 sizeof (u_int32_t)) * pktscale; 252 } 253 mtx_unlock(&nmp->nm_mtx); 254 255 client = clnt_reconnect_create(nconf, saddr, NFS_PROG, vers, 256 sndreserve, rcvreserve); 257 CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq"); 258 if (nmp->nm_flag & NFSMNT_INT) 259 CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); 260 if (nmp->nm_flag & NFSMNT_RESVPORT) 261 CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 262 if ((nmp->nm_flag & NFSMNT_SOFT) != 0) { 263 if (nmp->nm_sotype == SOCK_DGRAM) 264 /* 265 * For UDP, the large timeout for a reconnect will 266 * be set to "nm_retry * nm_timeo / 2", so we only 267 * want to do 2 reconnect timeout retries. 268 */ 269 retries = 2; 270 else 271 retries = nmp->nm_retry; 272 } else 273 retries = INT_MAX; 274 CLNT_CONTROL(client, CLSET_RETRIES, &retries); 275 276 /* 277 * For UDP, there are 2 timeouts: 278 * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer 279 * that does a retransmit of an RPC request using the same socket 280 * and xid. This is what you normally want to do, since NFS 281 * servers depend on "same xid" for their Duplicate Request Cache. 282 * - timeout specified in CLNT_CALL_MBUF(), which specifies when 283 * retransmits on the same socket should fail and a fresh socket 284 * created. Each of these timeouts counts as one CLSET_RETRIES, 285 * as set above. 286 * Set the initial retransmit timeout for UDP. This timeout doesn't 287 * exist for TCP and the following call just fails, which is ok. 288 */ 289 timo.tv_sec = nmp->nm_timeo / NFS_HZ; 290 timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; 291 CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); 292 293 mtx_lock(&nmp->nm_mtx); 294 if (nmp->nm_client) { 295 /* 296 * Someone else already connected. 297 */ 298 CLNT_RELEASE(client); 299 } else 300 nmp->nm_client = client; 301 302 /* 303 * Protocols that do not require connections may be optionally left 304 * unconnected for servers that reply from a port other than NFS_PORT. 305 */ 306 if (!(nmp->nm_flag & NFSMNT_NOCONN)) { 307 mtx_unlock(&nmp->nm_mtx); 308 CLNT_CONTROL(client, CLSET_CONNECT, &one); 309 } else 310 mtx_unlock(&nmp->nm_mtx); 311 312 /* Restore current thread's credentials. */ 313 td->td_ucred = origcred; 314 315 mtx_lock(&nmp->nm_mtx); 316 /* Initialize other non-zero congestion variables. */ 317 nfs_init_rtt(nmp); 318 mtx_unlock(&nmp->nm_mtx); 319 return (0); 320} 321 322/* 323 * NFS disconnect. Clean up and unlink. 324 */ 325void 326nfs_disconnect(struct nfsmount *nmp) 327{ 328 CLIENT *client; 329 330 mtx_lock(&nmp->nm_mtx); 331 if (nmp->nm_client) { 332 client = nmp->nm_client; 333 nmp->nm_client = NULL; 334 mtx_unlock(&nmp->nm_mtx); 335 rpc_gss_secpurge_call(client); 336 CLNT_CLOSE(client); 337 CLNT_RELEASE(client); 338 } else 339 mtx_unlock(&nmp->nm_mtx); 340} 341 342void 343nfs_safedisconnect(struct nfsmount *nmp) 344{ 345 346 nfs_disconnect(nmp); 347} 348 349static AUTH * 350nfs_getauth(struct nfsmount *nmp, struct ucred *cred) 351{ 352 rpc_gss_service_t svc; 353 AUTH *auth; 354 355 switch (nmp->nm_secflavor) { 356 case RPCSEC_GSS_KRB5: 357 case RPCSEC_GSS_KRB5I: 358 case RPCSEC_GSS_KRB5P: 359 if (!nmp->nm_mech_oid) 360 if (!rpc_gss_mech_to_oid_call("kerberosv5", 361 &nmp->nm_mech_oid)) 362 return (NULL); 363 if (nmp->nm_secflavor == RPCSEC_GSS_KRB5) 364 svc = rpc_gss_svc_none; 365 else if (nmp->nm_secflavor == RPCSEC_GSS_KRB5I) 366 svc = rpc_gss_svc_integrity; 367 else 368 svc = rpc_gss_svc_privacy; 369 auth = rpc_gss_secfind_call(nmp->nm_client, cred, 370 nmp->nm_principal, nmp->nm_mech_oid, svc); 371 if (auth) 372 return (auth); 373 /* fallthrough */ 374 case AUTH_SYS: 375 default: 376 return (authunix_create(cred)); 377 378 } 379} 380 381/* 382 * Callback from the RPC code to generate up/down notifications. 383 */ 384 385struct nfs_feedback_arg { 386 struct nfsmount *nf_mount; 387 int nf_lastmsg; /* last tprintf */ 388 int nf_tprintfmsg; 389 struct thread *nf_td; 390}; 391 392static void 393nfs_feedback(int type, int proc, void *arg) 394{ 395 struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; 396 struct nfsmount *nmp = nf->nf_mount; 397 struct timeval now; 398 399 getmicrouptime(&now); 400 401 switch (type) { 402 case FEEDBACK_REXMIT2: 403 case FEEDBACK_RECONNECT: 404 if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) { 405 nfs_down(nmp, nf->nf_td, 406 "not responding", 0, NFSSTA_TIMEO); 407 nf->nf_tprintfmsg = TRUE; 408 nf->nf_lastmsg = now.tv_sec; 409 } 410 break; 411 412 case FEEDBACK_OK: 413 nfs_up(nf->nf_mount, nf->nf_td, 414 "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); 415 break; 416 } 417} 418 419/* 420 * nfs_request - goes something like this 421 * - fill in request struct 422 * - links it into list 423 * - calls nfs_send() for first transmit 424 * - calls nfs_receive() to get reply 425 * - break down rpc header and return with nfs reply pointed to 426 * by mrep or error 427 * nb: always frees up mreq mbuf list 428 */ 429int 430nfs_request(struct vnode *vp, struct mbuf *mreq, int procnum, 431 struct thread *td, struct ucred *cred, struct mbuf **mrp, 432 struct mbuf **mdp, caddr_t *dposp) 433{ 434 struct mbuf *mrep; 435 u_int32_t *tl; 436 struct nfsmount *nmp; 437 struct mbuf *md; 438 time_t waituntil; 439 caddr_t dpos; 440 int error = 0, timeo; 441 struct timeval now; 442 AUTH *auth = NULL; 443 enum nfs_rto_timer_t timer; 444 struct nfs_feedback_arg nf; 445 struct rpc_callextra ext; 446 enum clnt_stat stat; 447 struct timeval timo; 448 449 /* Reject requests while attempting a forced unmount. */ 450 if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) { 451 m_freem(mreq); 452 return (ESTALE); 453 } 454 nmp = VFSTONFS(vp->v_mount); 455 bzero(&nf, sizeof(struct nfs_feedback_arg)); 456 nf.nf_mount = nmp; 457 nf.nf_td = td; 458 getmicrouptime(&now); 459 nf.nf_lastmsg = now.tv_sec - 460 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); 461 462 /* 463 * XXX if not already connected call nfs_connect now. Longer 464 * term, change nfs_mount to call nfs_connect unconditionally 465 * and let clnt_reconnect_create handle reconnects. 466 */ 467 if (!nmp->nm_client) 468 nfs_connect(nmp); 469 470 auth = nfs_getauth(nmp, cred); 471 if (!auth) { 472 m_freem(mreq); 473 return (EACCES); 474 } 475 bzero(&ext, sizeof(ext)); 476 ext.rc_auth = auth; 477 478 ext.rc_feedback = nfs_feedback; 479 ext.rc_feedback_arg = &nf; 480 481 /* 482 * Use a conservative timeout for RPCs other than getattr, 483 * lookup, read or write. The justification for doing "other" 484 * this way is that these RPCs happen so infrequently that 485 * timer est. would probably be stale. Also, since many of 486 * these RPCs are non-idempotent, a conservative timeout is 487 * desired. 488 */ 489 timer = nfs_rto_timer(procnum); 490 if (timer != NFS_DEFAULT_TIMER) 491 ext.rc_timers = &nmp->nm_timers[timer - 1]; 492 else 493 ext.rc_timers = NULL; 494 495#ifdef KDTRACE_HOOKS 496 if (dtrace_nfsclient_nfs23_start_probe != NULL) { 497 uint32_t probe_id; 498 int probe_procnum; 499 500 if (nmp->nm_flag & NFSMNT_NFSV3) { 501 probe_id = nfsclient_nfs3_start_probes[procnum]; 502 probe_procnum = procnum; 503 } else { 504 probe_id = nfsclient_nfs2_start_probes[procnum]; 505 probe_procnum = nfsv2_procid[procnum]; 506 } 507 if (probe_id != 0) 508 (dtrace_nfsclient_nfs23_start_probe)(probe_id, vp, 509 mreq, cred, probe_procnum); 510 } 511#endif 512 513 nfsstats.rpcrequests++; 514tryagain: 515 /* 516 * This timeout specifies when a new socket should be created, 517 * along with new xid values. For UDP, this should be done 518 * infrequently, since retransmits of RPC requests should normally 519 * use the same xid. 520 */ 521 if (nmp->nm_sotype == SOCK_DGRAM) { 522 if ((nmp->nm_flag & NFSMNT_SOFT) != 0) { 523 /* 524 * CLSET_RETRIES is set to 2, so this should be half 525 * of the total timeout required. 526 */ 527 timeo = nmp->nm_retry * nmp->nm_timeo / 2; 528 if (timeo < 1) 529 timeo = 1; 530 timo.tv_sec = timeo / NFS_HZ; 531 timo.tv_usec = (timeo % NFS_HZ) * 1000000 / NFS_HZ; 532 } else { 533 /* For UDP hard mounts, use a large value. */ 534 timo.tv_sec = NFS_MAXTIMEO / NFS_HZ; 535 timo.tv_usec = 0; 536 } 537 } else { 538 timo.tv_sec = nmp->nm_timeo / NFS_HZ; 539 timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; 540 } 541 mrep = NULL; 542 stat = CLNT_CALL_MBUF(nmp->nm_client, &ext, 543 (nmp->nm_flag & NFSMNT_NFSV3) ? procnum : nfsv2_procid[procnum], 544 mreq, &mrep, timo); 545 546 /* 547 * If there was a successful reply and a tprintf msg. 548 * tprintf a response. 549 */ 550 if (stat == RPC_SUCCESS) 551 error = 0; 552 else if (stat == RPC_TIMEDOUT) 553 error = ETIMEDOUT; 554 else if (stat == RPC_VERSMISMATCH) 555 error = EOPNOTSUPP; 556 else if (stat == RPC_PROGVERSMISMATCH) 557 error = EPROTONOSUPPORT; 558 else 559 error = EACCES; 560 if (error) 561 goto nfsmout; 562 563 KASSERT(mrep != NULL, ("mrep shouldn't be NULL if no error\n")); 564 565 /* 566 * Search for any mbufs that are not a multiple of 4 bytes long 567 * or with m_data not longword aligned. 568 * These could cause pointer alignment problems, so copy them to 569 * well aligned mbufs. 570 */ 571 error = nfs_realign(&mrep, M_DONTWAIT); 572 if (error == ENOMEM) { 573 m_freem(mrep); 574 AUTH_DESTROY(auth); 575 return (error); 576 } 577 578 md = mrep; 579 dpos = mtod(mrep, caddr_t); 580 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 581 if (*tl != 0) { 582 error = fxdr_unsigned(int, *tl); 583 if ((nmp->nm_flag & NFSMNT_NFSV3) && 584 error == NFSERR_TRYLATER) { 585 m_freem(mrep); 586 error = 0; 587 waituntil = time_second + nfs3_jukebox_delay; 588 while (time_second < waituntil) 589 (void)tsleep(&fake_wchan, PSOCK, "nqnfstry", 590 hz); 591 goto tryagain; 592 } 593 /* 594 * Make sure NFSERR_RETERR isn't bogusly set by a server 595 * such as amd. (No actual NFS error has bit 31 set.) 596 */ 597 error &= ~NFSERR_RETERR; 598 599 /* 600 * If the File Handle was stale, invalidate the lookup 601 * cache, just in case. 602 */ 603 if (error == ESTALE) 604 nfs_purgecache(vp); 605 /* 606 * Skip wcc data on NFS errors for now. NetApp filers 607 * return corrupt postop attrs in the wcc data for NFS 608 * err EROFS. Not sure if they could return corrupt 609 * postop attrs for others errors. 610 */ 611 if ((nmp->nm_flag & NFSMNT_NFSV3) && 612 !nfs_skip_wcc_data_onerr) { 613 *mrp = mrep; 614 *mdp = md; 615 *dposp = dpos; 616 error |= NFSERR_RETERR; 617 } else 618 m_freem(mrep); 619 goto nfsmout; 620 } 621 622#ifdef KDTRACE_HOOKS 623 if (dtrace_nfsclient_nfs23_done_probe != NULL) { 624 uint32_t probe_id; 625 int probe_procnum; 626 627 if (nmp->nm_flag & NFSMNT_NFSV3) { 628 probe_id = nfsclient_nfs3_done_probes[procnum]; 629 probe_procnum = procnum; 630 } else { 631 probe_id = nfsclient_nfs2_done_probes[procnum]; 632 probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ? 633 procnum : nfsv2_procid[procnum]; 634 } 635 if (probe_id != 0) 636 (dtrace_nfsclient_nfs23_done_probe)(probe_id, vp, 637 mreq, cred, probe_procnum, 0); 638 } 639#endif 640 m_freem(mreq); 641 *mrp = mrep; 642 *mdp = md; 643 *dposp = dpos; 644 AUTH_DESTROY(auth); 645 return (0); 646 647nfsmout: 648#ifdef KDTRACE_HOOKS 649 if (dtrace_nfsclient_nfs23_done_probe != NULL) { 650 uint32_t probe_id; 651 int probe_procnum; 652 653 if (nmp->nm_flag & NFSMNT_NFSV3) { 654 probe_id = nfsclient_nfs3_done_probes[procnum]; 655 probe_procnum = procnum; 656 } else { 657 probe_id = nfsclient_nfs2_done_probes[procnum]; 658 probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ? 659 procnum : nfsv2_procid[procnum]; 660 } 661 if (probe_id != 0) 662 (dtrace_nfsclient_nfs23_done_probe)(probe_id, vp, 663 mreq, cred, probe_procnum, error); 664 } 665#endif 666 m_freem(mreq); 667 if (auth) 668 AUTH_DESTROY(auth); 669 return (error); 670} 671 672/* 673 * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and 674 * wait for all requests to complete. This is used by forced unmounts 675 * to terminate any outstanding RPCs. 676 */ 677int 678nfs_nmcancelreqs(struct nfsmount *nmp) 679{ 680 681 if (nmp->nm_client) 682 CLNT_CLOSE(nmp->nm_client); 683 return (0); 684} 685 686/* 687 * Any signal that can interrupt an NFS operation in an intr mount 688 * should be added to this set. SIGSTOP and SIGKILL cannot be masked. 689 */ 690int nfs_sig_set[] = { 691 SIGINT, 692 SIGTERM, 693 SIGHUP, 694 SIGKILL, 695 SIGSTOP, 696 SIGQUIT 697}; 698 699/* 700 * Check to see if one of the signals in our subset is pending on 701 * the process (in an intr mount). 702 */ 703static int 704nfs_sig_pending(sigset_t set) 705{ 706 int i; 707 708 for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) 709 if (SIGISMEMBER(set, nfs_sig_set[i])) 710 return (1); 711 return (0); 712} 713 714/* 715 * The set/restore sigmask functions are used to (temporarily) overwrite 716 * the process p_sigmask during an RPC call (for example). These are also 717 * used in other places in the NFS client that might tsleep(). 718 */ 719void 720nfs_set_sigmask(struct thread *td, sigset_t *oldset) 721{ 722 sigset_t newset; 723 int i; 724 struct proc *p; 725 726 SIGFILLSET(newset); 727 if (td == NULL) 728 td = curthread; /* XXX */ 729 p = td->td_proc; 730 /* Remove the NFS set of signals from newset. */ 731 PROC_LOCK(p); 732 mtx_lock(&p->p_sigacts->ps_mtx); 733 for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) { 734 /* 735 * But make sure we leave the ones already masked 736 * by the process, i.e. remove the signal from the 737 * temporary signalmask only if it wasn't already 738 * in p_sigmask. 739 */ 740 if (!SIGISMEMBER(td->td_sigmask, nfs_sig_set[i]) && 741 !SIGISMEMBER(p->p_sigacts->ps_sigignore, nfs_sig_set[i])) 742 SIGDELSET(newset, nfs_sig_set[i]); 743 } 744 mtx_unlock(&p->p_sigacts->ps_mtx); 745 PROC_UNLOCK(p); 746 kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0); 747} 748 749void 750nfs_restore_sigmask(struct thread *td, sigset_t *set) 751{ 752 if (td == NULL) 753 td = curthread; /* XXX */ 754 kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); 755} 756 757/* 758 * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the 759 * old one after msleep() returns. 760 */ 761int 762nfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, 763 char *wmesg, int timo) 764{ 765 sigset_t oldset; 766 int error; 767 struct proc *p; 768 769 if ((priority & PCATCH) == 0) 770 return msleep(ident, mtx, priority, wmesg, timo); 771 if (td == NULL) 772 td = curthread; /* XXX */ 773 nfs_set_sigmask(td, &oldset); 774 error = msleep(ident, mtx, priority, wmesg, timo); 775 nfs_restore_sigmask(td, &oldset); 776 p = td->td_proc; 777 return (error); 778} 779 780/* 781 * Test for a termination condition pending on the process. 782 * This is used for NFSMNT_INT mounts. 783 */ 784int 785nfs_sigintr(struct nfsmount *nmp, struct thread *td) 786{ 787 struct proc *p; 788 sigset_t tmpset; 789 790 /* Terminate all requests while attempting a forced unmount. */ 791 if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) 792 return (EIO); 793 if (!(nmp->nm_flag & NFSMNT_INT)) 794 return (0); 795 if (td == NULL) 796 return (0); 797 p = td->td_proc; 798 PROC_LOCK(p); 799 tmpset = p->p_siglist; 800 SIGSETOR(tmpset, td->td_siglist); 801 SIGSETNAND(tmpset, td->td_sigmask); 802 mtx_lock(&p->p_sigacts->ps_mtx); 803 SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); 804 mtx_unlock(&p->p_sigacts->ps_mtx); 805 if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) 806 && nfs_sig_pending(tmpset)) { 807 PROC_UNLOCK(p); 808 return (EINTR); 809 } 810 PROC_UNLOCK(p); 811 return (0); 812} 813 814static int 815nfs_msg(struct thread *td, const char *server, const char *msg, int error) 816{ 817 struct proc *p; 818 819 p = td ? td->td_proc : NULL; 820 if (error) 821 tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, 822 msg, error); 823 else 824 tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); 825 return (0); 826} 827 828static void 829nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, 830 int error, int flags) 831{ 832 if (nmp == NULL) 833 return; 834 mtx_lock(&nmp->nm_mtx); 835 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { 836 nmp->nm_state |= NFSSTA_TIMEO; 837 mtx_unlock(&nmp->nm_mtx); 838 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 839 VQ_NOTRESP, 0); 840 } else 841 mtx_unlock(&nmp->nm_mtx); 842 mtx_lock(&nmp->nm_mtx); 843 if ((flags & NFSSTA_LOCKTIMEO) && 844 !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { 845 nmp->nm_state |= NFSSTA_LOCKTIMEO; 846 mtx_unlock(&nmp->nm_mtx); 847 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 848 VQ_NOTRESPLOCK, 0); 849 } else 850 mtx_unlock(&nmp->nm_mtx); 851 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); 852} 853 854static void 855nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, 856 int flags, int tprintfmsg) 857{ 858 if (nmp == NULL) 859 return; 860 if (tprintfmsg) 861 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); 862 863 mtx_lock(&nmp->nm_mtx); 864 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { 865 nmp->nm_state &= ~NFSSTA_TIMEO; 866 mtx_unlock(&nmp->nm_mtx); 867 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 868 VQ_NOTRESP, 1); 869 } else 870 mtx_unlock(&nmp->nm_mtx); 871 872 mtx_lock(&nmp->nm_mtx); 873 if ((flags & NFSSTA_LOCKTIMEO) && 874 (nmp->nm_state & NFSSTA_LOCKTIMEO)) { 875 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 876 mtx_unlock(&nmp->nm_mtx); 877 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 878 VQ_NOTRESPLOCK, 1); 879 } else 880 mtx_unlock(&nmp->nm_mtx); 881} 882