nfs_krpc.c revision 232116
1/*- 2 * Copyright (c) 1989, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_krpc.c 232116 2012-02-24 17:26:06Z jhb $"); 37 38/* 39 * Socket operations for use by nfs 40 */ 41 42#include "opt_inet6.h" 43#include "opt_kdtrace.h" 44#include "opt_kgssapi.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/kernel.h> 49#include <sys/limits.h> 50#include <sys/lock.h> 51#include <sys/malloc.h> 52#include <sys/mbuf.h> 53#include <sys/mount.h> 54#include <sys/mutex.h> 55#include <sys/proc.h> 56#include <sys/signalvar.h> 57#include <sys/syscallsubr.h> 58#include <sys/sysctl.h> 59#include <sys/syslog.h> 60#include <sys/vnode.h> 61 62#include <rpc/rpc.h> 63 64#include <nfs/nfsproto.h> 65#include <nfsclient/nfs.h> 66#include <nfs/xdr_subs.h> 67#include <nfsclient/nfsm_subs.h> 68#include <nfsclient/nfsmount.h> 69#include <nfsclient/nfsnode.h> 70 71#ifdef KDTRACE_HOOKS 72#include <sys/dtrace_bsd.h> 73 74dtrace_nfsclient_nfs23_start_probe_func_t 75 dtrace_nfsclient_nfs23_start_probe; 76 77dtrace_nfsclient_nfs23_done_probe_func_t 78 dtrace_nfsclient_nfs23_done_probe; 79 80/* 81 * Registered probes by RPC type. 82 */ 83uint32_t nfsclient_nfs2_start_probes[NFS_NPROCS]; 84uint32_t nfsclient_nfs2_done_probes[NFS_NPROCS]; 85 86uint32_t nfsclient_nfs3_start_probes[NFS_NPROCS]; 87uint32_t nfsclient_nfs3_done_probes[NFS_NPROCS]; 88#endif 89 90static int nfs_bufpackets = 4; 91static int nfs_reconnects; 92static int nfs3_jukebox_delay = 10; 93static int nfs_skip_wcc_data_onerr = 1; 94static int fake_wchan; 95 96SYSCTL_DECL(_vfs_oldnfs); 97 98SYSCTL_INT(_vfs_oldnfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, 99 "Buffer reservation size 2 < x < 64"); 100SYSCTL_INT(_vfs_oldnfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, 101 "Number of times the nfs client has had to reconnect"); 102SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, 103 &nfs3_jukebox_delay, 0, 104 "Number of seconds to delay a retry after receiving EJUKEBOX"); 105SYSCTL_INT(_vfs_oldnfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, 106 &nfs_skip_wcc_data_onerr, 0, 107 "Disable weak cache consistency checking when server returns an error"); 108 109static void nfs_down(struct nfsmount *, struct thread *, const char *, 110 int, int); 111static void nfs_up(struct nfsmount *, struct thread *, const char *, 112 int, int); 113static int nfs_msg(struct thread *, const char *, const char *, int); 114 115extern int nfsv2_procid[]; 116 117struct nfs_cached_auth { 118 int ca_refs; /* refcount, including 1 from the cache */ 119 uid_t ca_uid; /* uid that corresponds to this auth */ 120 AUTH *ca_auth; /* RPC auth handle */ 121}; 122 123/* 124 * RTT estimator 125 */ 126 127static enum nfs_rto_timer_t nfs_proct[NFS_NPROCS] = { 128 NFS_DEFAULT_TIMER, /* NULL */ 129 NFS_GETATTR_TIMER, /* GETATTR */ 130 NFS_DEFAULT_TIMER, /* SETATTR */ 131 NFS_LOOKUP_TIMER, /* LOOKUP */ 132 NFS_GETATTR_TIMER, /* ACCESS */ 133 NFS_READ_TIMER, /* READLINK */ 134 NFS_READ_TIMER, /* READ */ 135 NFS_WRITE_TIMER, /* WRITE */ 136 NFS_DEFAULT_TIMER, /* CREATE */ 137 NFS_DEFAULT_TIMER, /* MKDIR */ 138 NFS_DEFAULT_TIMER, /* SYMLINK */ 139 NFS_DEFAULT_TIMER, /* MKNOD */ 140 NFS_DEFAULT_TIMER, /* REMOVE */ 141 NFS_DEFAULT_TIMER, /* RMDIR */ 142 NFS_DEFAULT_TIMER, /* RENAME */ 143 NFS_DEFAULT_TIMER, /* LINK */ 144 NFS_READ_TIMER, /* READDIR */ 145 NFS_READ_TIMER, /* READDIRPLUS */ 146 NFS_DEFAULT_TIMER, /* FSSTAT */ 147 NFS_DEFAULT_TIMER, /* FSINFO */ 148 NFS_DEFAULT_TIMER, /* PATHCONF */ 149 NFS_DEFAULT_TIMER, /* COMMIT */ 150 NFS_DEFAULT_TIMER, /* NOOP */ 151}; 152 153/* 154 * Choose the correct RTT timer for this NFS procedure. 155 */ 156static inline enum nfs_rto_timer_t 157nfs_rto_timer(u_int32_t procnum) 158{ 159 160 return (nfs_proct[procnum]); 161} 162 163/* 164 * Initialize the RTT estimator state for a new mount point. 165 */ 166static void 167nfs_init_rtt(struct nfsmount *nmp) 168{ 169 int i; 170 171 for (i = 0; i < NFS_MAX_TIMER; i++) { 172 nmp->nm_timers[i].rt_srtt = hz; 173 nmp->nm_timers[i].rt_deviate = 0; 174 nmp->nm_timers[i].rt_rtxcur = hz; 175 } 176} 177 178/* 179 * Initialize sockets and congestion for a new NFS connection. 180 * We do not free the sockaddr if error. 181 */ 182int 183nfs_connect(struct nfsmount *nmp) 184{ 185 int rcvreserve, sndreserve; 186 int pktscale; 187 struct sockaddr *saddr; 188 struct ucred *origcred; 189 struct thread *td = curthread; 190 CLIENT *client; 191 struct netconfig *nconf; 192 rpcvers_t vers; 193 int one = 1, retries; 194 struct timeval timo; 195 196 /* 197 * We need to establish the socket using the credentials of 198 * the mountpoint. Some parts of this process (such as 199 * sobind() and soconnect()) will use the curent thread's 200 * credential instead of the socket credential. To work 201 * around this, temporarily change the current thread's 202 * credential to that of the mountpoint. 203 * 204 * XXX: It would be better to explicitly pass the correct 205 * credential to sobind() and soconnect(). 206 */ 207 origcred = td->td_ucred; 208 td->td_ucred = nmp->nm_mountp->mnt_cred; 209 saddr = nmp->nm_nam; 210 211 vers = NFS_VER2; 212 if (nmp->nm_flag & NFSMNT_NFSV3) 213 vers = NFS_VER3; 214 else if (nmp->nm_flag & NFSMNT_NFSV4) 215 vers = NFS_VER4; 216 if (saddr->sa_family == AF_INET) 217 if (nmp->nm_sotype == SOCK_DGRAM) 218 nconf = getnetconfigent("udp"); 219 else 220 nconf = getnetconfigent("tcp"); 221 else 222 if (nmp->nm_sotype == SOCK_DGRAM) 223 nconf = getnetconfigent("udp6"); 224 else 225 nconf = getnetconfigent("tcp6"); 226 227 /* 228 * Get buffer reservation size from sysctl, but impose reasonable 229 * limits. 230 */ 231 pktscale = nfs_bufpackets; 232 if (pktscale < 2) 233 pktscale = 2; 234 if (pktscale > 64) 235 pktscale = 64; 236 mtx_lock(&nmp->nm_mtx); 237 if (nmp->nm_sotype == SOCK_DGRAM) { 238 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; 239 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 240 NFS_MAXPKTHDR) * pktscale; 241 } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 242 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; 243 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 244 NFS_MAXPKTHDR) * pktscale; 245 } else { 246 if (nmp->nm_sotype != SOCK_STREAM) 247 panic("nfscon sotype"); 248 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + 249 sizeof (u_int32_t)) * pktscale; 250 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + 251 sizeof (u_int32_t)) * pktscale; 252 } 253 mtx_unlock(&nmp->nm_mtx); 254 255 client = clnt_reconnect_create(nconf, saddr, NFS_PROG, vers, 256 sndreserve, rcvreserve); 257 CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq"); 258 if (nmp->nm_flag & NFSMNT_INT) 259 CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); 260 if (nmp->nm_flag & NFSMNT_RESVPORT) 261 CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 262 if ((nmp->nm_flag & NFSMNT_SOFT) != 0) { 263 if (nmp->nm_sotype == SOCK_DGRAM) 264 /* 265 * For UDP, the large timeout for a reconnect will 266 * be set to "nm_retry * nm_timeo / 2", so we only 267 * want to do 2 reconnect timeout retries. 268 */ 269 retries = 2; 270 else 271 retries = nmp->nm_retry; 272 } else 273 retries = INT_MAX; 274 CLNT_CONTROL(client, CLSET_RETRIES, &retries); 275 276 /* 277 * For UDP, there are 2 timeouts: 278 * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer 279 * that does a retransmit of an RPC request using the same socket 280 * and xid. This is what you normally want to do, since NFS 281 * servers depend on "same xid" for their Duplicate Request Cache. 282 * - timeout specified in CLNT_CALL_MBUF(), which specifies when 283 * retransmits on the same socket should fail and a fresh socket 284 * created. Each of these timeouts counts as one CLSET_RETRIES, 285 * as set above. 286 * Set the initial retransmit timeout for UDP. This timeout doesn't 287 * exist for TCP and the following call just fails, which is ok. 288 */ 289 timo.tv_sec = nmp->nm_timeo / NFS_HZ; 290 timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; 291 CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); 292 293 mtx_lock(&nmp->nm_mtx); 294 if (nmp->nm_client) { 295 /* 296 * Someone else already connected. 297 */ 298 CLNT_RELEASE(client); 299 } else 300 nmp->nm_client = client; 301 302 /* 303 * Protocols that do not require connections may be optionally left 304 * unconnected for servers that reply from a port other than NFS_PORT. 305 */ 306 if (!(nmp->nm_flag & NFSMNT_NOCONN)) { 307 mtx_unlock(&nmp->nm_mtx); 308 CLNT_CONTROL(client, CLSET_CONNECT, &one); 309 } else 310 mtx_unlock(&nmp->nm_mtx); 311 312 /* Restore current thread's credentials. */ 313 td->td_ucred = origcred; 314 315 mtx_lock(&nmp->nm_mtx); 316 /* Initialize other non-zero congestion variables. */ 317 nfs_init_rtt(nmp); 318 mtx_unlock(&nmp->nm_mtx); 319 return (0); 320} 321 322/* 323 * NFS disconnect. Clean up and unlink. 324 */ 325void 326nfs_disconnect(struct nfsmount *nmp) 327{ 328 CLIENT *client; 329 330 mtx_lock(&nmp->nm_mtx); 331 if (nmp->nm_client) { 332 client = nmp->nm_client; 333 nmp->nm_client = NULL; 334 mtx_unlock(&nmp->nm_mtx); 335 rpc_gss_secpurge_call(client); 336 CLNT_CLOSE(client); 337 CLNT_RELEASE(client); 338 } else 339 mtx_unlock(&nmp->nm_mtx); 340} 341 342void 343nfs_safedisconnect(struct nfsmount *nmp) 344{ 345 346 nfs_disconnect(nmp); 347} 348 349static AUTH * 350nfs_getauth(struct nfsmount *nmp, struct ucred *cred) 351{ 352 rpc_gss_service_t svc; 353 AUTH *auth; 354 355 switch (nmp->nm_secflavor) { 356 case RPCSEC_GSS_KRB5: 357 case RPCSEC_GSS_KRB5I: 358 case RPCSEC_GSS_KRB5P: 359 if (!nmp->nm_mech_oid) 360 if (!rpc_gss_mech_to_oid_call("kerberosv5", 361 &nmp->nm_mech_oid)) 362 return (NULL); 363 if (nmp->nm_secflavor == RPCSEC_GSS_KRB5) 364 svc = rpc_gss_svc_none; 365 else if (nmp->nm_secflavor == RPCSEC_GSS_KRB5I) 366 svc = rpc_gss_svc_integrity; 367 else 368 svc = rpc_gss_svc_privacy; 369 auth = rpc_gss_secfind_call(nmp->nm_client, cred, 370 nmp->nm_principal, nmp->nm_mech_oid, svc); 371 if (auth) 372 return (auth); 373 /* fallthrough */ 374 case AUTH_SYS: 375 default: 376 return (authunix_create(cred)); 377 378 } 379} 380 381/* 382 * Callback from the RPC code to generate up/down notifications. 383 */ 384 385struct nfs_feedback_arg { 386 struct nfsmount *nf_mount; 387 int nf_lastmsg; /* last tprintf */ 388 int nf_tprintfmsg; 389 struct thread *nf_td; 390}; 391 392static void 393nfs_feedback(int type, int proc, void *arg) 394{ 395 struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; 396 struct nfsmount *nmp = nf->nf_mount; 397 struct timeval now; 398 399 getmicrouptime(&now); 400 401 switch (type) { 402 case FEEDBACK_REXMIT2: 403 case FEEDBACK_RECONNECT: 404 if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) { 405 nfs_down(nmp, nf->nf_td, 406 "not responding", 0, NFSSTA_TIMEO); 407 nf->nf_tprintfmsg = TRUE; 408 nf->nf_lastmsg = now.tv_sec; 409 } 410 break; 411 412 case FEEDBACK_OK: 413 nfs_up(nf->nf_mount, nf->nf_td, 414 "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); 415 break; 416 } 417} 418 419/* 420 * nfs_request - goes something like this 421 * - fill in request struct 422 * - links it into list 423 * - calls nfs_send() for first transmit 424 * - calls nfs_receive() to get reply 425 * - break down rpc header and return with nfs reply pointed to 426 * by mrep or error 427 * nb: always frees up mreq mbuf list 428 */ 429int 430nfs_request(struct vnode *vp, struct mbuf *mreq, int procnum, 431 struct thread *td, struct ucred *cred, struct mbuf **mrp, 432 struct mbuf **mdp, caddr_t *dposp) 433{ 434 struct mbuf *mrep; 435 u_int32_t *tl; 436 struct nfsmount *nmp; 437 struct mbuf *md; 438 time_t waituntil; 439 caddr_t dpos; 440 int error = 0, timeo; 441 struct timeval now; 442 AUTH *auth = NULL; 443 enum nfs_rto_timer_t timer; 444 struct nfs_feedback_arg nf; 445 struct rpc_callextra ext; 446 enum clnt_stat stat; 447 struct timeval timo; 448 449 /* Reject requests while attempting a forced unmount. */ 450 if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) { 451 m_freem(mreq); 452 return (ESTALE); 453 } 454 nmp = VFSTONFS(vp->v_mount); 455 bzero(&nf, sizeof(struct nfs_feedback_arg)); 456 nf.nf_mount = nmp; 457 nf.nf_td = td; 458 getmicrouptime(&now); 459 nf.nf_lastmsg = now.tv_sec - 460 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); 461 462 /* 463 * XXX if not already connected call nfs_connect now. Longer 464 * term, change nfs_mount to call nfs_connect unconditionally 465 * and let clnt_reconnect_create handle reconnects. 466 */ 467 if (!nmp->nm_client) 468 nfs_connect(nmp); 469 470 auth = nfs_getauth(nmp, cred); 471 if (!auth) { 472 m_freem(mreq); 473 return (EACCES); 474 } 475 bzero(&ext, sizeof(ext)); 476 ext.rc_auth = auth; 477 478 ext.rc_feedback = nfs_feedback; 479 ext.rc_feedback_arg = &nf; 480 481 /* 482 * Use a conservative timeout for RPCs other than getattr, 483 * lookup, read or write. The justification for doing "other" 484 * this way is that these RPCs happen so infrequently that 485 * timer est. would probably be stale. Also, since many of 486 * these RPCs are non-idempotent, a conservative timeout is 487 * desired. 488 */ 489 timer = nfs_rto_timer(procnum); 490 if (timer != NFS_DEFAULT_TIMER) 491 ext.rc_timers = &nmp->nm_timers[timer - 1]; 492 else 493 ext.rc_timers = NULL; 494 495#ifdef KDTRACE_HOOKS 496 if (dtrace_nfsclient_nfs23_start_probe != NULL) { 497 uint32_t probe_id; 498 int probe_procnum; 499 500 if (nmp->nm_flag & NFSMNT_NFSV3) { 501 probe_id = nfsclient_nfs3_start_probes[procnum]; 502 probe_procnum = procnum; 503 } else { 504 probe_id = nfsclient_nfs2_start_probes[procnum]; 505 probe_procnum = nfsv2_procid[procnum]; 506 } 507 if (probe_id != 0) 508 (dtrace_nfsclient_nfs23_start_probe)(probe_id, vp, 509 mreq, cred, probe_procnum); 510 } 511#endif 512 513 nfsstats.rpcrequests++; 514tryagain: 515 /* 516 * This timeout specifies when a new socket should be created, 517 * along with new xid values. For UDP, this should be done 518 * infrequently, since retransmits of RPC requests should normally 519 * use the same xid. 520 */ 521 if (nmp->nm_sotype == SOCK_DGRAM) { 522 if ((nmp->nm_flag & NFSMNT_SOFT) != 0) { 523 /* 524 * CLSET_RETRIES is set to 2, so this should be half 525 * of the total timeout required. 526 */ 527 timeo = nmp->nm_retry * nmp->nm_timeo / 2; 528 if (timeo < 1) 529 timeo = 1; 530 timo.tv_sec = timeo / NFS_HZ; 531 timo.tv_usec = (timeo % NFS_HZ) * 1000000 / NFS_HZ; 532 } else { 533 /* For UDP hard mounts, use a large value. */ 534 timo.tv_sec = NFS_MAXTIMEO / NFS_HZ; 535 timo.tv_usec = 0; 536 } 537 } else { 538 timo.tv_sec = nmp->nm_timeo / NFS_HZ; 539 timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; 540 } 541 mrep = NULL; 542 stat = CLNT_CALL_MBUF(nmp->nm_client, &ext, 543 (nmp->nm_flag & NFSMNT_NFSV3) ? procnum : nfsv2_procid[procnum], 544 mreq, &mrep, timo); 545 546 /* 547 * If there was a successful reply and a tprintf msg. 548 * tprintf a response. 549 */ 550 if (stat == RPC_SUCCESS) 551 error = 0; 552 else if (stat == RPC_TIMEDOUT) 553 error = ETIMEDOUT; 554 else if (stat == RPC_VERSMISMATCH) 555 error = EOPNOTSUPP; 556 else if (stat == RPC_PROGVERSMISMATCH) 557 error = EPROTONOSUPPORT; 558 else 559 error = EACCES; 560 if (error) 561 goto nfsmout; 562 563 KASSERT(mrep != NULL, ("mrep shouldn't be NULL if no error\n")); 564 565 /* 566 * Search for any mbufs that are not a multiple of 4 bytes long 567 * or with m_data not longword aligned. 568 * These could cause pointer alignment problems, so copy them to 569 * well aligned mbufs. 570 */ 571 error = nfs_realign(&mrep, M_DONTWAIT); 572 if (error == ENOMEM) { 573 m_freem(mrep); 574 AUTH_DESTROY(auth); 575 return (error); 576 } 577 578 md = mrep; 579 dpos = mtod(mrep, caddr_t); 580 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 581 if (*tl != 0) { 582 error = fxdr_unsigned(int, *tl); 583 if ((nmp->nm_flag & NFSMNT_NFSV3) && 584 error == NFSERR_TRYLATER) { 585 m_freem(mrep); 586 error = 0; 587 waituntil = time_second + nfs3_jukebox_delay; 588 while (time_second < waituntil) 589 (void)tsleep(&fake_wchan, PSOCK, "nqnfstry", 590 hz); 591 goto tryagain; 592 } 593 /* 594 * Make sure NFSERR_RETERR isn't bogusly set by a server 595 * such as amd. (No actual NFS error has bit 31 set.) 596 */ 597 error &= ~NFSERR_RETERR; 598 599 /* 600 * If the File Handle was stale, invalidate the lookup 601 * cache, just in case. 602 */ 603 if (error == ESTALE) 604 nfs_purgecache(vp); 605 /* 606 * Skip wcc data on non-ENOENT NFS errors for now. 607 * NetApp filers return corrupt postop attrs in the 608 * wcc data for NFS err EROFS. Not sure if they could 609 * return corrupt postop attrs for others errors. 610 * Blocking ENOENT post-op attributes breaks negative 611 * name caching, so always allow it through. 612 */ 613 if ((nmp->nm_flag & NFSMNT_NFSV3) && 614 (!nfs_skip_wcc_data_onerr || error == ENOENT)) { 615 *mrp = mrep; 616 *mdp = md; 617 *dposp = dpos; 618 error |= NFSERR_RETERR; 619 } else 620 m_freem(mrep); 621 goto nfsmout; 622 } 623 624#ifdef KDTRACE_HOOKS 625 if (dtrace_nfsclient_nfs23_done_probe != NULL) { 626 uint32_t probe_id; 627 int probe_procnum; 628 629 if (nmp->nm_flag & NFSMNT_NFSV3) { 630 probe_id = nfsclient_nfs3_done_probes[procnum]; 631 probe_procnum = procnum; 632 } else { 633 probe_id = nfsclient_nfs2_done_probes[procnum]; 634 probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ? 635 procnum : nfsv2_procid[procnum]; 636 } 637 if (probe_id != 0) 638 (dtrace_nfsclient_nfs23_done_probe)(probe_id, vp, 639 mreq, cred, probe_procnum, 0); 640 } 641#endif 642 m_freem(mreq); 643 *mrp = mrep; 644 *mdp = md; 645 *dposp = dpos; 646 AUTH_DESTROY(auth); 647 return (0); 648 649nfsmout: 650#ifdef KDTRACE_HOOKS 651 if (dtrace_nfsclient_nfs23_done_probe != NULL) { 652 uint32_t probe_id; 653 int probe_procnum; 654 655 if (nmp->nm_flag & NFSMNT_NFSV3) { 656 probe_id = nfsclient_nfs3_done_probes[procnum]; 657 probe_procnum = procnum; 658 } else { 659 probe_id = nfsclient_nfs2_done_probes[procnum]; 660 probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ? 661 procnum : nfsv2_procid[procnum]; 662 } 663 if (probe_id != 0) 664 (dtrace_nfsclient_nfs23_done_probe)(probe_id, vp, 665 mreq, cred, probe_procnum, error); 666 } 667#endif 668 m_freem(mreq); 669 if (auth) 670 AUTH_DESTROY(auth); 671 return (error); 672} 673 674/* 675 * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and 676 * wait for all requests to complete. This is used by forced unmounts 677 * to terminate any outstanding RPCs. 678 */ 679int 680nfs_nmcancelreqs(struct nfsmount *nmp) 681{ 682 683 if (nmp->nm_client) 684 CLNT_CLOSE(nmp->nm_client); 685 return (0); 686} 687 688/* 689 * Any signal that can interrupt an NFS operation in an intr mount 690 * should be added to this set. SIGSTOP and SIGKILL cannot be masked. 691 */ 692int nfs_sig_set[] = { 693 SIGINT, 694 SIGTERM, 695 SIGHUP, 696 SIGKILL, 697 SIGSTOP, 698 SIGQUIT 699}; 700 701/* 702 * Check to see if one of the signals in our subset is pending on 703 * the process (in an intr mount). 704 */ 705static int 706nfs_sig_pending(sigset_t set) 707{ 708 int i; 709 710 for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) 711 if (SIGISMEMBER(set, nfs_sig_set[i])) 712 return (1); 713 return (0); 714} 715 716/* 717 * The set/restore sigmask functions are used to (temporarily) overwrite 718 * the process p_sigmask during an RPC call (for example). These are also 719 * used in other places in the NFS client that might tsleep(). 720 */ 721void 722nfs_set_sigmask(struct thread *td, sigset_t *oldset) 723{ 724 sigset_t newset; 725 int i; 726 struct proc *p; 727 728 SIGFILLSET(newset); 729 if (td == NULL) 730 td = curthread; /* XXX */ 731 p = td->td_proc; 732 /* Remove the NFS set of signals from newset. */ 733 PROC_LOCK(p); 734 mtx_lock(&p->p_sigacts->ps_mtx); 735 for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) { 736 /* 737 * But make sure we leave the ones already masked 738 * by the process, i.e. remove the signal from the 739 * temporary signalmask only if it wasn't already 740 * in p_sigmask. 741 */ 742 if (!SIGISMEMBER(td->td_sigmask, nfs_sig_set[i]) && 743 !SIGISMEMBER(p->p_sigacts->ps_sigignore, nfs_sig_set[i])) 744 SIGDELSET(newset, nfs_sig_set[i]); 745 } 746 mtx_unlock(&p->p_sigacts->ps_mtx); 747 PROC_UNLOCK(p); 748 kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0); 749} 750 751void 752nfs_restore_sigmask(struct thread *td, sigset_t *set) 753{ 754 if (td == NULL) 755 td = curthread; /* XXX */ 756 kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); 757} 758 759/* 760 * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the 761 * old one after msleep() returns. 762 */ 763int 764nfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, 765 char *wmesg, int timo) 766{ 767 sigset_t oldset; 768 int error; 769 struct proc *p; 770 771 if ((priority & PCATCH) == 0) 772 return msleep(ident, mtx, priority, wmesg, timo); 773 if (td == NULL) 774 td = curthread; /* XXX */ 775 nfs_set_sigmask(td, &oldset); 776 error = msleep(ident, mtx, priority, wmesg, timo); 777 nfs_restore_sigmask(td, &oldset); 778 p = td->td_proc; 779 return (error); 780} 781 782/* 783 * Test for a termination condition pending on the process. 784 * This is used for NFSMNT_INT mounts. 785 */ 786int 787nfs_sigintr(struct nfsmount *nmp, struct thread *td) 788{ 789 struct proc *p; 790 sigset_t tmpset; 791 792 /* Terminate all requests while attempting a forced unmount. */ 793 if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) 794 return (EIO); 795 if (!(nmp->nm_flag & NFSMNT_INT)) 796 return (0); 797 if (td == NULL) 798 return (0); 799 p = td->td_proc; 800 PROC_LOCK(p); 801 tmpset = p->p_siglist; 802 SIGSETOR(tmpset, td->td_siglist); 803 SIGSETNAND(tmpset, td->td_sigmask); 804 mtx_lock(&p->p_sigacts->ps_mtx); 805 SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); 806 mtx_unlock(&p->p_sigacts->ps_mtx); 807 if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) 808 && nfs_sig_pending(tmpset)) { 809 PROC_UNLOCK(p); 810 return (EINTR); 811 } 812 PROC_UNLOCK(p); 813 return (0); 814} 815 816static int 817nfs_msg(struct thread *td, const char *server, const char *msg, int error) 818{ 819 struct proc *p; 820 821 p = td ? td->td_proc : NULL; 822 if (error) 823 tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, 824 msg, error); 825 else 826 tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); 827 return (0); 828} 829 830static void 831nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, 832 int error, int flags) 833{ 834 if (nmp == NULL) 835 return; 836 mtx_lock(&nmp->nm_mtx); 837 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { 838 nmp->nm_state |= NFSSTA_TIMEO; 839 mtx_unlock(&nmp->nm_mtx); 840 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 841 VQ_NOTRESP, 0); 842 } else 843 mtx_unlock(&nmp->nm_mtx); 844 mtx_lock(&nmp->nm_mtx); 845 if ((flags & NFSSTA_LOCKTIMEO) && 846 !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { 847 nmp->nm_state |= NFSSTA_LOCKTIMEO; 848 mtx_unlock(&nmp->nm_mtx); 849 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 850 VQ_NOTRESPLOCK, 0); 851 } else 852 mtx_unlock(&nmp->nm_mtx); 853 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); 854} 855 856static void 857nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, 858 int flags, int tprintfmsg) 859{ 860 if (nmp == NULL) 861 return; 862 if (tprintfmsg) 863 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); 864 865 mtx_lock(&nmp->nm_mtx); 866 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { 867 nmp->nm_state &= ~NFSSTA_TIMEO; 868 mtx_unlock(&nmp->nm_mtx); 869 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 870 VQ_NOTRESP, 1); 871 } else 872 mtx_unlock(&nmp->nm_mtx); 873 874 mtx_lock(&nmp->nm_mtx); 875 if ((flags & NFSSTA_LOCKTIMEO) && 876 (nmp->nm_state & NFSSTA_LOCKTIMEO)) { 877 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 878 mtx_unlock(&nmp->nm_mtx); 879 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 880 VQ_NOTRESPLOCK, 1); 881 } else 882 mtx_unlock(&nmp->nm_mtx); 883} 884