nfs_krpc.c revision 192686
1/*- 2 * Copyright (c) 1989, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_krpc.c 192686 2009-05-24 13:22:00Z dfr $"); 37 38/* 39 * Socket operations for use by nfs 40 */ 41 42#include "opt_inet6.h" 43#include "opt_kdtrace.h" 44#include "opt_kgssapi.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/kernel.h> 49#include <sys/limits.h> 50#include <sys/lock.h> 51#include <sys/malloc.h> 52#include <sys/mbuf.h> 53#include <sys/mount.h> 54#include <sys/mutex.h> 55#include <sys/proc.h> 56#include <sys/signalvar.h> 57#include <sys/syscallsubr.h> 58#include <sys/sysctl.h> 59#include <sys/syslog.h> 60#include <sys/vnode.h> 61 62#include <rpc/rpc.h> 63 64#include <nfs/rpcv2.h> 65#include <nfs/nfsproto.h> 66#include <nfsclient/nfs.h> 67#include <nfs/xdr_subs.h> 68#include <nfsclient/nfsm_subs.h> 69#include <nfsclient/nfsmount.h> 70#include <nfsclient/nfsnode.h> 71 72#ifndef NFS_LEGACYRPC 73 74#ifdef KDTRACE_HOOKS 75#include <sys/dtrace_bsd.h> 76 77dtrace_nfsclient_nfs23_start_probe_func_t 78 dtrace_nfsclient_nfs23_start_probe; 79 80dtrace_nfsclient_nfs23_done_probe_func_t 81 dtrace_nfsclient_nfs23_done_probe; 82 83/* 84 * Registered probes by RPC type. 85 */ 86uint32_t nfsclient_nfs2_start_probes[NFS_NPROCS]; 87uint32_t nfsclient_nfs2_done_probes[NFS_NPROCS]; 88 89uint32_t nfsclient_nfs3_start_probes[NFS_NPROCS]; 90uint32_t nfsclient_nfs3_done_probes[NFS_NPROCS]; 91#endif 92 93static int nfs_realign_test; 94static int nfs_realign_count; 95static int nfs_bufpackets = 4; 96static int nfs_reconnects; 97static int nfs3_jukebox_delay = 10; 98static int nfs_skip_wcc_data_onerr = 1; 99static int fake_wchan; 100 101SYSCTL_DECL(_vfs_nfs); 102 103SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, 104 "Number of realign tests done"); 105SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, 106 "Number of mbuf realignments done"); 107SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, 108 "Buffer reservation size 2 < x < 64"); 109SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, 110 "Number of times the nfs client has had to reconnect"); 111SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0, 112 "Number of seconds to delay a retry after receiving EJUKEBOX"); 113SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0, 114 "Disable weak cache consistency checking when server returns an error"); 115 116static void nfs_down(struct nfsmount *, struct thread *, const char *, 117 int, int); 118static void nfs_up(struct nfsmount *, struct thread *, const char *, 119 int, int); 120static int nfs_msg(struct thread *, const char *, const char *, int); 121 122extern int nfsv2_procid[]; 123 124struct nfs_cached_auth { 125 int ca_refs; /* refcount, including 1 from the cache */ 126 uid_t ca_uid; /* uid that corresponds to this auth */ 127 AUTH *ca_auth; /* RPC auth handle */ 128}; 129 130/* 131 * RTT estimator 132 */ 133 134static enum nfs_rto_timer_t nfs_proct[NFS_NPROCS] = { 135 NFS_DEFAULT_TIMER, /* NULL */ 136 NFS_GETATTR_TIMER, /* GETATTR */ 137 NFS_DEFAULT_TIMER, /* SETATTR */ 138 NFS_LOOKUP_TIMER, /* LOOKUP */ 139 NFS_GETATTR_TIMER, /* ACCESS */ 140 NFS_READ_TIMER, /* READLINK */ 141 NFS_READ_TIMER, /* READ */ 142 NFS_WRITE_TIMER, /* WRITE */ 143 NFS_DEFAULT_TIMER, /* CREATE */ 144 NFS_DEFAULT_TIMER, /* MKDIR */ 145 NFS_DEFAULT_TIMER, /* SYMLINK */ 146 NFS_DEFAULT_TIMER, /* MKNOD */ 147 NFS_DEFAULT_TIMER, /* REMOVE */ 148 NFS_DEFAULT_TIMER, /* RMDIR */ 149 NFS_DEFAULT_TIMER, /* RENAME */ 150 NFS_DEFAULT_TIMER, /* LINK */ 151 NFS_READ_TIMER, /* READDIR */ 152 NFS_READ_TIMER, /* READDIRPLUS */ 153 NFS_DEFAULT_TIMER, /* FSSTAT */ 154 NFS_DEFAULT_TIMER, /* FSINFO */ 155 NFS_DEFAULT_TIMER, /* PATHCONF */ 156 NFS_DEFAULT_TIMER, /* COMMIT */ 157 NFS_DEFAULT_TIMER, /* NOOP */ 158}; 159 160/* 161 * Choose the correct RTT timer for this NFS procedure. 162 */ 163static inline enum nfs_rto_timer_t 164nfs_rto_timer(u_int32_t procnum) 165{ 166 return nfs_proct[procnum]; 167} 168 169/* 170 * Initialize the RTT estimator state for a new mount point. 171 */ 172static void 173nfs_init_rtt(struct nfsmount *nmp) 174{ 175 int i; 176 177 for (i = 0; i < NFS_MAX_TIMER; i++) { 178 nmp->nm_timers[i].rt_srtt = hz; 179 nmp->nm_timers[i].rt_deviate = 0; 180 nmp->nm_timers[i].rt_rtxcur = hz; 181 } 182} 183 184/* 185 * Initialize sockets and congestion for a new NFS connection. 186 * We do not free the sockaddr if error. 187 */ 188int 189nfs_connect(struct nfsmount *nmp, struct nfsreq *rep) 190{ 191 int rcvreserve, sndreserve; 192 int pktscale; 193 struct sockaddr *saddr; 194 struct ucred *origcred; 195 struct thread *td = curthread; 196 CLIENT *client; 197 struct netconfig *nconf; 198 rpcvers_t vers; 199 int one = 1, retries; 200 201 /* 202 * We need to establish the socket using the credentials of 203 * the mountpoint. Some parts of this process (such as 204 * sobind() and soconnect()) will use the curent thread's 205 * credential instead of the socket credential. To work 206 * around this, temporarily change the current thread's 207 * credential to that of the mountpoint. 208 * 209 * XXX: It would be better to explicitly pass the correct 210 * credential to sobind() and soconnect(). 211 */ 212 origcred = td->td_ucred; 213 td->td_ucred = nmp->nm_mountp->mnt_cred; 214 saddr = nmp->nm_nam; 215 216 vers = NFS_VER2; 217 if (nmp->nm_flag & NFSMNT_NFSV3) 218 vers = NFS_VER3; 219 else if (nmp->nm_flag & NFSMNT_NFSV4) 220 vers = NFS_VER4; 221 if (saddr->sa_family == AF_INET) 222 if (nmp->nm_sotype == SOCK_DGRAM) 223 nconf = getnetconfigent("udp"); 224 else 225 nconf = getnetconfigent("tcp"); 226 else 227 if (nmp->nm_sotype == SOCK_DGRAM) 228 nconf = getnetconfigent("udp6"); 229 else 230 nconf = getnetconfigent("tcp6"); 231 232 /* 233 * Get buffer reservation size from sysctl, but impose reasonable 234 * limits. 235 */ 236 pktscale = nfs_bufpackets; 237 if (pktscale < 2) 238 pktscale = 2; 239 if (pktscale > 64) 240 pktscale = 64; 241 mtx_lock(&nmp->nm_mtx); 242 if (nmp->nm_sotype == SOCK_DGRAM) { 243 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; 244 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 245 NFS_MAXPKTHDR) * pktscale; 246 } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 247 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; 248 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 249 NFS_MAXPKTHDR) * pktscale; 250 } else { 251 if (nmp->nm_sotype != SOCK_STREAM) 252 panic("nfscon sotype"); 253 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + 254 sizeof (u_int32_t)) * pktscale; 255 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + 256 sizeof (u_int32_t)) * pktscale; 257 } 258 mtx_unlock(&nmp->nm_mtx); 259 260 client = clnt_reconnect_create(nconf, saddr, NFS_PROG, vers, 261 sndreserve, rcvreserve); 262 CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq"); 263 if (nmp->nm_flag & NFSMNT_INT) 264 CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); 265 if (nmp->nm_flag & NFSMNT_RESVPORT) 266 CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 267 if (nmp->nm_flag & NFSMNT_SOFT) 268 retries = nmp->nm_retry; 269 else 270 retries = INT_MAX; 271 CLNT_CONTROL(client, CLSET_RETRIES, &retries); 272 273 mtx_lock(&nmp->nm_mtx); 274 if (nmp->nm_client) { 275 /* 276 * Someone else already connected. 277 */ 278 CLNT_RELEASE(client); 279 } else { 280 nmp->nm_client = client; 281 } 282 283 /* 284 * Protocols that do not require connections may be optionally left 285 * unconnected for servers that reply from a port other than NFS_PORT. 286 */ 287 if (!(nmp->nm_flag & NFSMNT_NOCONN)) { 288 mtx_unlock(&nmp->nm_mtx); 289 CLNT_CONTROL(client, CLSET_CONNECT, &one); 290 } else { 291 mtx_unlock(&nmp->nm_mtx); 292 } 293 294 /* Restore current thread's credentials. */ 295 td->td_ucred = origcred; 296 297 mtx_lock(&nmp->nm_mtx); 298 /* Initialize other non-zero congestion variables */ 299 nfs_init_rtt(nmp); 300 mtx_unlock(&nmp->nm_mtx); 301 return (0); 302} 303 304/* 305 * NFS disconnect. Clean up and unlink. 306 */ 307void 308nfs_disconnect(struct nfsmount *nmp) 309{ 310 CLIENT *client; 311 312 mtx_lock(&nmp->nm_mtx); 313 if (nmp->nm_client) { 314 client = nmp->nm_client; 315 nmp->nm_client = NULL; 316 mtx_unlock(&nmp->nm_mtx); 317#ifdef KGSSAPI 318 rpc_gss_secpurge(client); 319#endif 320 CLNT_CLOSE(client); 321 CLNT_RELEASE(client); 322 } else { 323 mtx_unlock(&nmp->nm_mtx); 324 } 325} 326 327void 328nfs_safedisconnect(struct nfsmount *nmp) 329{ 330 331 nfs_disconnect(nmp); 332} 333 334static AUTH * 335nfs_getauth(struct nfsmount *nmp, struct ucred *cred) 336{ 337#ifdef KGSSAPI 338 rpc_gss_service_t svc; 339 AUTH *auth; 340#endif 341 342 switch (nmp->nm_secflavor) { 343#ifdef KGSSAPI 344 case RPCSEC_GSS_KRB5: 345 case RPCSEC_GSS_KRB5I: 346 case RPCSEC_GSS_KRB5P: 347 if (!nmp->nm_mech_oid) { 348 if (!rpc_gss_mech_to_oid("kerberosv5", 349 &nmp->nm_mech_oid)) 350 return (NULL); 351 } 352 if (nmp->nm_secflavor == RPCSEC_GSS_KRB5) 353 svc = rpc_gss_svc_none; 354 else if (nmp->nm_secflavor == RPCSEC_GSS_KRB5I) 355 svc = rpc_gss_svc_integrity; 356 else 357 svc = rpc_gss_svc_privacy; 358 auth = rpc_gss_secfind(nmp->nm_client, cred, 359 nmp->nm_principal, nmp->nm_mech_oid, svc); 360 if (auth) 361 return (auth); 362 /* fallthrough */ 363#endif 364 case AUTH_SYS: 365 default: 366 return (authunix_create(cred)); 367 368 } 369} 370 371/* 372 * Callback from the RPC code to generate up/down notifications. 373 */ 374 375struct nfs_feedback_arg { 376 struct nfsmount *nf_mount; 377 int nf_lastmsg; /* last tprintf */ 378 int nf_tprintfmsg; 379 struct thread *nf_td; 380}; 381 382static void 383nfs_feedback(int type, int proc, void *arg) 384{ 385 struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; 386 struct nfsmount *nmp = nf->nf_mount; 387 struct timeval now; 388 389 getmicrouptime(&now); 390 391 switch (type) { 392 case FEEDBACK_REXMIT2: 393 case FEEDBACK_RECONNECT: 394 if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) { 395 nfs_down(nmp, nf->nf_td, 396 "not responding", 0, NFSSTA_TIMEO); 397 nf->nf_tprintfmsg = TRUE; 398 nf->nf_lastmsg = now.tv_sec; 399 } 400 break; 401 402 case FEEDBACK_OK: 403 nfs_up(nf->nf_mount, nf->nf_td, 404 "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); 405 break; 406 } 407} 408 409/* 410 * nfs_realign: 411 * 412 * Check for badly aligned mbuf data and realign by copying the unaligned 413 * portion of the data into a new mbuf chain and freeing the portions 414 * of the old chain that were replaced. 415 * 416 * We cannot simply realign the data within the existing mbuf chain 417 * because the underlying buffers may contain other rpc commands and 418 * we cannot afford to overwrite them. 419 * 420 * We would prefer to avoid this situation entirely. The situation does 421 * not occur with NFS/UDP and is supposed to only occassionally occur 422 * with TCP. Use vfs.nfs.realign_count and realign_test to check this. 423 * 424 */ 425static int 426nfs_realign(struct mbuf **pm, int hsiz) 427{ 428 struct mbuf *m, *n; 429 int off, space; 430 431 ++nfs_realign_test; 432 while ((m = *pm) != NULL) { 433 if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { 434 /* 435 * NB: we can't depend on m_pkthdr.len to help us 436 * decide what to do here. May not be worth doing 437 * the m_length calculation as m_copyback will 438 * expand the mbuf chain below as needed. 439 */ 440 space = m_length(m, NULL); 441 if (space >= MINCLSIZE) { 442 /* NB: m_copyback handles space > MCLBYTES */ 443 n = m_getcl(M_DONTWAIT, MT_DATA, 0); 444 } else 445 n = m_get(M_DONTWAIT, MT_DATA); 446 if (n == NULL) 447 return (ENOMEM); 448 /* 449 * Align the remainder of the mbuf chain. 450 */ 451 n->m_len = 0; 452 off = 0; 453 while (m != NULL) { 454 m_copyback(n, off, m->m_len, mtod(m, caddr_t)); 455 off += m->m_len; 456 m = m->m_next; 457 } 458 m_freem(*pm); 459 *pm = n; 460 ++nfs_realign_count; 461 break; 462 } 463 pm = &m->m_next; 464 } 465 return (0); 466} 467 468/* 469 * nfs_request - goes something like this 470 * - fill in request struct 471 * - links it into list 472 * - calls nfs_send() for first transmit 473 * - calls nfs_receive() to get reply 474 * - break down rpc header and return with nfs reply pointed to 475 * by mrep or error 476 * nb: always frees up mreq mbuf list 477 */ 478int 479nfs_request(struct vnode *vp, struct mbuf *mreq, int procnum, 480 struct thread *td, struct ucred *cred, struct mbuf **mrp, 481 struct mbuf **mdp, caddr_t *dposp) 482{ 483 struct mbuf *mrep; 484 u_int32_t *tl; 485 struct nfsmount *nmp; 486 struct mbuf *md; 487 time_t waituntil; 488 caddr_t dpos; 489 int error = 0; 490 struct timeval now; 491 AUTH *auth = NULL; 492 enum nfs_rto_timer_t timer; 493 struct nfs_feedback_arg nf; 494 struct rpc_callextra ext; 495 enum clnt_stat stat; 496 struct timeval timo; 497 498 /* Reject requests while attempting a forced unmount. */ 499 if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) { 500 m_freem(mreq); 501 return (ESTALE); 502 } 503 nmp = VFSTONFS(vp->v_mount); 504 bzero(&nf, sizeof(struct nfs_feedback_arg)); 505 nf.nf_mount = nmp; 506 nf.nf_td = td; 507 getmicrouptime(&now); 508 nf.nf_lastmsg = now.tv_sec - 509 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); 510 511 /* 512 * XXX if not already connected call nfs_connect now. Longer 513 * term, change nfs_mount to call nfs_connect unconditionally 514 * and let clnt_reconnect_create handle reconnects. 515 */ 516 if (!nmp->nm_client) 517 nfs_connect(nmp, NULL); 518 519 auth = nfs_getauth(nmp, cred); 520 if (!auth) { 521 m_freem(mreq); 522 return (EACCES); 523 } 524 bzero(&ext, sizeof(ext)); 525 ext.rc_auth = auth; 526 527 ext.rc_feedback = nfs_feedback; 528 ext.rc_feedback_arg = &nf; 529 530 /* 531 * Use a conservative timeout for RPCs other than getattr, 532 * lookup, read or write. The justification for doing "other" 533 * this way is that these RPCs happen so infrequently that 534 * timer est. would probably be stale. Also, since many of 535 * these RPCs are non-idempotent, a conservative timeout is 536 * desired. 537 */ 538 timer = nfs_rto_timer(procnum); 539 if (timer != NFS_DEFAULT_TIMER) { 540 ext.rc_timers = &nmp->nm_timers[timer - 1]; 541 } else { 542 ext.rc_timers = NULL; 543 } 544 545#ifdef KDTRACE_HOOKS 546 if (dtrace_nfsclient_nfs23_start_probe != NULL) { 547 uint32_t probe_id; 548 int probe_procnum; 549 550 if (nmp->nm_flag & NFSMNT_NFSV3) { 551 probe_id = nfsclient_nfs3_start_probes[procnum]; 552 probe_procnum = procnum; 553 } else { 554 probe_id = nfsclient_nfs2_start_probes[procnum]; 555 probe_procnum = nfsv2_procid[procnum]; 556 } 557 if (probe_id != 0) 558 (dtrace_nfsclient_nfs23_start_probe)(probe_id, vp, 559 mreq, cred, probe_procnum); 560 } 561#endif 562 563 nfsstats.rpcrequests++; 564tryagain: 565 timo.tv_sec = nmp->nm_timeo / NFS_HZ; 566 timo.tv_usec = (nmp->nm_timeo * 1000000) / NFS_HZ; 567 mrep = NULL; 568 stat = CLNT_CALL_MBUF(nmp->nm_client, &ext, 569 (nmp->nm_flag & NFSMNT_NFSV3) ? procnum : nfsv2_procid[procnum], 570 mreq, &mrep, timo); 571 572 /* 573 * If there was a successful reply and a tprintf msg. 574 * tprintf a response. 575 */ 576 if (stat == RPC_SUCCESS) { 577 error = 0; 578 } else if (stat == RPC_TIMEDOUT) { 579 error = ETIMEDOUT; 580 } else if (stat == RPC_VERSMISMATCH) { 581 error = EOPNOTSUPP; 582 } else if (stat == RPC_PROGVERSMISMATCH) { 583 error = EPROTONOSUPPORT; 584 } else { 585 error = EACCES; 586 } 587 if (error) 588 goto nfsmout; 589 590 KASSERT(mrep != NULL, ("mrep shouldn't be NULL if no error\n")); 591 592 /* 593 * Search for any mbufs that are not a multiple of 4 bytes long 594 * or with m_data not longword aligned. 595 * These could cause pointer alignment problems, so copy them to 596 * well aligned mbufs. 597 */ 598 error = nfs_realign(&mrep, 2 * NFSX_UNSIGNED); 599 if (error == ENOMEM) { 600 m_freem(mrep); 601 AUTH_DESTROY(auth); 602 return (error); 603 } 604 605 md = mrep; 606 dpos = mtod(mrep, caddr_t); 607 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 608 if (*tl != 0) { 609 error = fxdr_unsigned(int, *tl); 610 if ((nmp->nm_flag & NFSMNT_NFSV3) && 611 error == NFSERR_TRYLATER) { 612 m_freem(mrep); 613 error = 0; 614 waituntil = time_second + nfs3_jukebox_delay; 615 while (time_second < waituntil) { 616 (void) tsleep(&fake_wchan, PSOCK, "nqnfstry", hz); 617 } 618 goto tryagain; 619 } 620 621 /* 622 * If the File Handle was stale, invalidate the lookup 623 * cache, just in case. 624 */ 625 if (error == ESTALE) 626 nfs_purgecache(vp); 627 /* 628 * Skip wcc data on NFS errors for now. NetApp filers 629 * return corrupt postop attrs in the wcc data for NFS 630 * err EROFS. Not sure if they could return corrupt 631 * postop attrs for others errors. 632 */ 633 if ((nmp->nm_flag & NFSMNT_NFSV3) && !nfs_skip_wcc_data_onerr) { 634 *mrp = mrep; 635 *mdp = md; 636 *dposp = dpos; 637 error |= NFSERR_RETERR; 638 } else 639 m_freem(mrep); 640 goto nfsmout; 641 } 642 643#ifdef KDTRACE_HOOKS 644 if (dtrace_nfsclient_nfs23_done_probe != NULL) { 645 uint32_t probe_id; 646 int probe_procnum; 647 648 if (nmp->nm_flag & NFSMNT_NFSV3) { 649 probe_id = nfsclient_nfs3_done_probes[procnum]; 650 probe_procnum = procnum; 651 } else { 652 probe_id = nfsclient_nfs2_done_probes[procnum]; 653 probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ? 654 procnum : nfsv2_procid[procnum]; 655 } 656 if (probe_id != 0) 657 (dtrace_nfsclient_nfs23_done_probe)(probe_id, vp, 658 mreq, cred, probe_procnum, 0); 659 } 660#endif 661 m_freem(mreq); 662 *mrp = mrep; 663 *mdp = md; 664 *dposp = dpos; 665 AUTH_DESTROY(auth); 666 return (0); 667 668nfsmout: 669#ifdef KDTRACE_HOOKS 670 if (dtrace_nfsclient_nfs23_done_probe != NULL) { 671 uint32_t probe_id; 672 int probe_procnum; 673 674 if (nmp->nm_flag & NFSMNT_NFSV3) { 675 probe_id = nfsclient_nfs3_done_probes[procnum]; 676 probe_procnum = procnum; 677 } else { 678 probe_id = nfsclient_nfs2_done_probes[procnum]; 679 probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ? 680 procnum : nfsv2_procid[procnum]; 681 } 682 if (probe_id != 0) 683 (dtrace_nfsclient_nfs23_done_probe)(probe_id, vp, 684 mreq, cred, probe_procnum, error); 685 } 686#endif 687 m_freem(mreq); 688 if (auth) 689 AUTH_DESTROY(auth); 690 return (error); 691} 692 693/* 694 * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and 695 * wait for all requests to complete. This is used by forced unmounts 696 * to terminate any outstanding RPCs. 697 */ 698int 699nfs_nmcancelreqs(struct nfsmount *nmp) 700{ 701 702 if (nmp->nm_client) 703 CLNT_CLOSE(nmp->nm_client); 704 return (0); 705} 706 707/* 708 * Any signal that can interrupt an NFS operation in an intr mount 709 * should be added to this set. SIGSTOP and SIGKILL cannot be masked. 710 */ 711int nfs_sig_set[] = { 712 SIGINT, 713 SIGTERM, 714 SIGHUP, 715 SIGKILL, 716 SIGSTOP, 717 SIGQUIT 718}; 719 720/* 721 * Check to see if one of the signals in our subset is pending on 722 * the process (in an intr mount). 723 */ 724static int 725nfs_sig_pending(sigset_t set) 726{ 727 int i; 728 729 for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) 730 if (SIGISMEMBER(set, nfs_sig_set[i])) 731 return (1); 732 return (0); 733} 734 735/* 736 * The set/restore sigmask functions are used to (temporarily) overwrite 737 * the process p_sigmask during an RPC call (for example). These are also 738 * used in other places in the NFS client that might tsleep(). 739 */ 740void 741nfs_set_sigmask(struct thread *td, sigset_t *oldset) 742{ 743 sigset_t newset; 744 int i; 745 struct proc *p; 746 747 SIGFILLSET(newset); 748 if (td == NULL) 749 td = curthread; /* XXX */ 750 p = td->td_proc; 751 /* Remove the NFS set of signals from newset */ 752 PROC_LOCK(p); 753 mtx_lock(&p->p_sigacts->ps_mtx); 754 for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) { 755 /* 756 * But make sure we leave the ones already masked 757 * by the process, ie. remove the signal from the 758 * temporary signalmask only if it wasn't already 759 * in p_sigmask. 760 */ 761 if (!SIGISMEMBER(td->td_sigmask, nfs_sig_set[i]) && 762 !SIGISMEMBER(p->p_sigacts->ps_sigignore, nfs_sig_set[i])) 763 SIGDELSET(newset, nfs_sig_set[i]); 764 } 765 mtx_unlock(&p->p_sigacts->ps_mtx); 766 PROC_UNLOCK(p); 767 kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0); 768} 769 770void 771nfs_restore_sigmask(struct thread *td, sigset_t *set) 772{ 773 if (td == NULL) 774 td = curthread; /* XXX */ 775 kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); 776} 777 778/* 779 * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the 780 * old one after msleep() returns. 781 */ 782int 783nfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo) 784{ 785 sigset_t oldset; 786 int error; 787 struct proc *p; 788 789 if ((priority & PCATCH) == 0) 790 return msleep(ident, mtx, priority, wmesg, timo); 791 if (td == NULL) 792 td = curthread; /* XXX */ 793 nfs_set_sigmask(td, &oldset); 794 error = msleep(ident, mtx, priority, wmesg, timo); 795 nfs_restore_sigmask(td, &oldset); 796 p = td->td_proc; 797 return (error); 798} 799 800/* 801 * Test for a termination condition pending on the process. 802 * This is used for NFSMNT_INT mounts. 803 */ 804int 805nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td) 806{ 807 struct proc *p; 808 sigset_t tmpset; 809 810 /* Terminate all requests while attempting a forced unmount. */ 811 if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) 812 return (EIO); 813 if (!(nmp->nm_flag & NFSMNT_INT)) 814 return (0); 815 if (td == NULL) 816 return (0); 817 p = td->td_proc; 818 PROC_LOCK(p); 819 tmpset = p->p_siglist; 820 SIGSETOR(tmpset, td->td_siglist); 821 SIGSETNAND(tmpset, td->td_sigmask); 822 mtx_lock(&p->p_sigacts->ps_mtx); 823 SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); 824 mtx_unlock(&p->p_sigacts->ps_mtx); 825 if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) 826 && nfs_sig_pending(tmpset)) { 827 PROC_UNLOCK(p); 828 return (EINTR); 829 } 830 PROC_UNLOCK(p); 831 return (0); 832} 833 834static int 835nfs_msg(struct thread *td, const char *server, const char *msg, int error) 836{ 837 struct proc *p; 838 839 p = td ? td->td_proc : NULL; 840 if (error) { 841 tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, 842 msg, error); 843 } else { 844 tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); 845 } 846 return (0); 847} 848 849static void 850nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, 851 int error, int flags) 852{ 853 if (nmp == NULL) 854 return; 855 mtx_lock(&nmp->nm_mtx); 856 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { 857 nmp->nm_state |= NFSSTA_TIMEO; 858 mtx_unlock(&nmp->nm_mtx); 859 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 860 VQ_NOTRESP, 0); 861 } else 862 mtx_unlock(&nmp->nm_mtx); 863 mtx_lock(&nmp->nm_mtx); 864 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { 865 nmp->nm_state |= NFSSTA_LOCKTIMEO; 866 mtx_unlock(&nmp->nm_mtx); 867 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 868 VQ_NOTRESPLOCK, 0); 869 } else 870 mtx_unlock(&nmp->nm_mtx); 871 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); 872} 873 874static void 875nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, 876 int flags, int tprintfmsg) 877{ 878 if (nmp == NULL) 879 return; 880 if (tprintfmsg) { 881 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); 882 } 883 884 mtx_lock(&nmp->nm_mtx); 885 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { 886 nmp->nm_state &= ~NFSSTA_TIMEO; 887 mtx_unlock(&nmp->nm_mtx); 888 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 889 VQ_NOTRESP, 1); 890 } else 891 mtx_unlock(&nmp->nm_mtx); 892 893 mtx_lock(&nmp->nm_mtx); 894 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { 895 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 896 mtx_unlock(&nmp->nm_mtx); 897 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 898 VQ_NOTRESPLOCK, 1); 899 } else 900 mtx_unlock(&nmp->nm_mtx); 901} 902 903#endif /* !NFS_LEGACYRPC */ 904