1184588Sdfr/*- 2184588Sdfr * Copyright (c) 1989, 1991, 1993, 1995 3184588Sdfr * The Regents of the University of California. All rights reserved. 4184588Sdfr * 5184588Sdfr * This code is derived from software contributed to Berkeley by 6184588Sdfr * Rick Macklem at The University of Guelph. 7184588Sdfr * 8184588Sdfr * Redistribution and use in source and binary forms, with or without 9184588Sdfr * modification, are permitted provided that the following conditions 10184588Sdfr * are met: 11184588Sdfr * 1. Redistributions of source code must retain the above copyright 12184588Sdfr * notice, this list of conditions and the following disclaimer. 13184588Sdfr * 2. Redistributions in binary form must reproduce the above copyright 14184588Sdfr * notice, this list of conditions and the following disclaimer in the 15184588Sdfr * documentation and/or other materials provided with the distribution. 16184588Sdfr * 4. Neither the name of the University nor the names of its contributors 17184588Sdfr * may be used to endorse or promote products derived from this software 18184588Sdfr * without specific prior written permission. 19184588Sdfr * 20184588Sdfr * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21184588Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22184588Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23184588Sdfr * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24184588Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25184588Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26184588Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27184588Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28184588Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29184588Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30184588Sdfr * SUCH DAMAGE. 31184588Sdfr * 32184588Sdfr * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 33184588Sdfr */ 34184588Sdfr 35184588Sdfr#include <sys/cdefs.h> 36184588Sdfr__FBSDID("$FreeBSD$"); 37184588Sdfr 38184588Sdfr/* 39184588Sdfr * Socket operations for use by nfs 40184588Sdfr */ 41184588Sdfr 42184588Sdfr#include "opt_inet6.h" 43190293Srwatson#include "opt_kdtrace.h" 44184588Sdfr#include "opt_kgssapi.h" 45184588Sdfr 46184588Sdfr#include <sys/param.h> 47184588Sdfr#include <sys/systm.h> 48184588Sdfr#include <sys/kernel.h> 49184588Sdfr#include <sys/limits.h> 50184588Sdfr#include <sys/lock.h> 51184588Sdfr#include <sys/malloc.h> 52184588Sdfr#include <sys/mbuf.h> 53184588Sdfr#include <sys/mount.h> 54184588Sdfr#include <sys/mutex.h> 55184588Sdfr#include <sys/proc.h> 56184588Sdfr#include <sys/signalvar.h> 57184588Sdfr#include <sys/syscallsubr.h> 58184588Sdfr#include <sys/sysctl.h> 59184588Sdfr#include <sys/syslog.h> 60184588Sdfr#include <sys/vnode.h> 61184588Sdfr 62184588Sdfr#include <rpc/rpc.h> 63184588Sdfr 64184588Sdfr#include <nfs/nfsproto.h> 65184588Sdfr#include <nfsclient/nfs.h> 66184588Sdfr#include <nfs/xdr_subs.h> 67184588Sdfr#include <nfsclient/nfsm_subs.h> 68184588Sdfr#include <nfsclient/nfsmount.h> 69184588Sdfr#include <nfsclient/nfsnode.h> 70184588Sdfr 71190293Srwatson#ifdef KDTRACE_HOOKS 72190293Srwatson#include <sys/dtrace_bsd.h> 73190293Srwatson 74190293Srwatsondtrace_nfsclient_nfs23_start_probe_func_t 75190293Srwatson dtrace_nfsclient_nfs23_start_probe; 76190293Srwatson 77190293Srwatsondtrace_nfsclient_nfs23_done_probe_func_t 78190293Srwatson dtrace_nfsclient_nfs23_done_probe; 79190293Srwatson 80190293Srwatson/* 81190293Srwatson * Registered probes by RPC type. 82190293Srwatson */ 83190293Srwatsonuint32_t nfsclient_nfs2_start_probes[NFS_NPROCS]; 84190293Srwatsonuint32_t nfsclient_nfs2_done_probes[NFS_NPROCS]; 85190293Srwatson 86190293Srwatsonuint32_t nfsclient_nfs3_start_probes[NFS_NPROCS]; 87190293Srwatsonuint32_t nfsclient_nfs3_done_probes[NFS_NPROCS]; 88190293Srwatson#endif 89190293Srwatson 90184588Sdfrstatic int nfs_bufpackets = 4; 91184588Sdfrstatic int nfs_reconnects; 92184588Sdfrstatic int nfs3_jukebox_delay = 10; 93184588Sdfrstatic int nfs_skip_wcc_data_onerr = 1; 94184588Sdfrstatic int fake_wchan; 95184588Sdfr 96221973SrmacklemSYSCTL_DECL(_vfs_oldnfs); 97184588Sdfr 98221973SrmacklemSYSCTL_INT(_vfs_oldnfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, 99184588Sdfr "Buffer reservation size 2 < x < 64"); 100221973SrmacklemSYSCTL_INT(_vfs_oldnfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, 101184588Sdfr "Number of times the nfs client has had to reconnect"); 102221973SrmacklemSYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, 103203731Smarius &nfs3_jukebox_delay, 0, 104184588Sdfr "Number of seconds to delay a retry after receiving EJUKEBOX"); 105221973SrmacklemSYSCTL_INT(_vfs_oldnfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, 106203731Smarius &nfs_skip_wcc_data_onerr, 0, 107184588Sdfr "Disable weak cache consistency checking when server returns an error"); 108184588Sdfr 109184588Sdfrstatic void nfs_down(struct nfsmount *, struct thread *, const char *, 110184588Sdfr int, int); 111184588Sdfrstatic void nfs_up(struct nfsmount *, struct thread *, const char *, 112184588Sdfr int, int); 113184588Sdfrstatic int nfs_msg(struct thread *, const char *, const char *, int); 114184588Sdfr 115184588Sdfrextern int nfsv2_procid[]; 116184588Sdfr 117184588Sdfrstruct nfs_cached_auth { 118184588Sdfr int ca_refs; /* refcount, including 1 from the cache */ 119184588Sdfr uid_t ca_uid; /* uid that corresponds to this auth */ 120184588Sdfr AUTH *ca_auth; /* RPC auth handle */ 121184588Sdfr}; 122184588Sdfr 123184588Sdfr/* 124184588Sdfr * RTT estimator 125184588Sdfr */ 126184588Sdfr 127184588Sdfrstatic enum nfs_rto_timer_t nfs_proct[NFS_NPROCS] = { 128184588Sdfr NFS_DEFAULT_TIMER, /* NULL */ 129184588Sdfr NFS_GETATTR_TIMER, /* GETATTR */ 130184588Sdfr NFS_DEFAULT_TIMER, /* SETATTR */ 131184588Sdfr NFS_LOOKUP_TIMER, /* LOOKUP */ 132184588Sdfr NFS_GETATTR_TIMER, /* ACCESS */ 133184588Sdfr NFS_READ_TIMER, /* READLINK */ 134184588Sdfr NFS_READ_TIMER, /* READ */ 135184588Sdfr NFS_WRITE_TIMER, /* WRITE */ 136184588Sdfr NFS_DEFAULT_TIMER, /* CREATE */ 137184588Sdfr NFS_DEFAULT_TIMER, /* MKDIR */ 138184588Sdfr NFS_DEFAULT_TIMER, /* SYMLINK */ 139184588Sdfr NFS_DEFAULT_TIMER, /* MKNOD */ 140184588Sdfr NFS_DEFAULT_TIMER, /* REMOVE */ 141184588Sdfr NFS_DEFAULT_TIMER, /* RMDIR */ 142184588Sdfr NFS_DEFAULT_TIMER, /* RENAME */ 143184588Sdfr NFS_DEFAULT_TIMER, /* LINK */ 144184588Sdfr NFS_READ_TIMER, /* READDIR */ 145184588Sdfr NFS_READ_TIMER, /* READDIRPLUS */ 146184588Sdfr NFS_DEFAULT_TIMER, /* FSSTAT */ 147184588Sdfr NFS_DEFAULT_TIMER, /* FSINFO */ 148184588Sdfr NFS_DEFAULT_TIMER, /* PATHCONF */ 149184588Sdfr NFS_DEFAULT_TIMER, /* COMMIT */ 150184588Sdfr NFS_DEFAULT_TIMER, /* NOOP */ 151184588Sdfr}; 152184588Sdfr 153184588Sdfr/* 154184588Sdfr * Choose the correct RTT timer for this NFS procedure. 155184588Sdfr */ 156184588Sdfrstatic inline enum nfs_rto_timer_t 157184588Sdfrnfs_rto_timer(u_int32_t procnum) 158184588Sdfr{ 159203731Smarius 160203731Smarius return (nfs_proct[procnum]); 161184588Sdfr} 162184588Sdfr 163184588Sdfr/* 164184588Sdfr * Initialize the RTT estimator state for a new mount point. 165184588Sdfr */ 166184588Sdfrstatic void 167184588Sdfrnfs_init_rtt(struct nfsmount *nmp) 168184588Sdfr{ 169184588Sdfr int i; 170184588Sdfr 171184588Sdfr for (i = 0; i < NFS_MAX_TIMER; i++) { 172184588Sdfr nmp->nm_timers[i].rt_srtt = hz; 173184588Sdfr nmp->nm_timers[i].rt_deviate = 0; 174184588Sdfr nmp->nm_timers[i].rt_rtxcur = hz; 175184588Sdfr } 176184588Sdfr} 177184588Sdfr 178184588Sdfr/* 179184588Sdfr * Initialize sockets and congestion for a new NFS connection. 180184588Sdfr * We do not free the sockaddr if error. 181184588Sdfr */ 182184588Sdfrint 183195203Sdfrnfs_connect(struct nfsmount *nmp) 184184588Sdfr{ 185184588Sdfr int rcvreserve, sndreserve; 186184588Sdfr int pktscale; 187184588Sdfr struct sockaddr *saddr; 188184588Sdfr struct ucred *origcred; 189184588Sdfr struct thread *td = curthread; 190184588Sdfr CLIENT *client; 191184588Sdfr struct netconfig *nconf; 192184588Sdfr rpcvers_t vers; 193184588Sdfr int one = 1, retries; 194228757Srmacklem struct timeval timo; 195184588Sdfr 196184588Sdfr /* 197184588Sdfr * We need to establish the socket using the credentials of 198184588Sdfr * the mountpoint. Some parts of this process (such as 199184588Sdfr * sobind() and soconnect()) will use the curent thread's 200184588Sdfr * credential instead of the socket credential. To work 201184588Sdfr * around this, temporarily change the current thread's 202184588Sdfr * credential to that of the mountpoint. 203184588Sdfr * 204184588Sdfr * XXX: It would be better to explicitly pass the correct 205184588Sdfr * credential to sobind() and soconnect(). 206184588Sdfr */ 207184588Sdfr origcred = td->td_ucred; 208184588Sdfr td->td_ucred = nmp->nm_mountp->mnt_cred; 209184588Sdfr saddr = nmp->nm_nam; 210184588Sdfr 211184588Sdfr vers = NFS_VER2; 212184588Sdfr if (nmp->nm_flag & NFSMNT_NFSV3) 213184588Sdfr vers = NFS_VER3; 214184588Sdfr else if (nmp->nm_flag & NFSMNT_NFSV4) 215184588Sdfr vers = NFS_VER4; 216184588Sdfr if (saddr->sa_family == AF_INET) 217184588Sdfr if (nmp->nm_sotype == SOCK_DGRAM) 218184588Sdfr nconf = getnetconfigent("udp"); 219184588Sdfr else 220184588Sdfr nconf = getnetconfigent("tcp"); 221184588Sdfr else 222184588Sdfr if (nmp->nm_sotype == SOCK_DGRAM) 223184588Sdfr nconf = getnetconfigent("udp6"); 224184588Sdfr else 225184588Sdfr nconf = getnetconfigent("tcp6"); 226203731Smarius 227184588Sdfr /* 228184588Sdfr * Get buffer reservation size from sysctl, but impose reasonable 229184588Sdfr * limits. 230184588Sdfr */ 231184588Sdfr pktscale = nfs_bufpackets; 232184588Sdfr if (pktscale < 2) 233184588Sdfr pktscale = 2; 234184588Sdfr if (pktscale > 64) 235184588Sdfr pktscale = 64; 236184588Sdfr mtx_lock(&nmp->nm_mtx); 237184588Sdfr if (nmp->nm_sotype == SOCK_DGRAM) { 238184588Sdfr sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; 239184588Sdfr rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 240184588Sdfr NFS_MAXPKTHDR) * pktscale; 241184588Sdfr } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 242184588Sdfr sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; 243184588Sdfr rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 244184588Sdfr NFS_MAXPKTHDR) * pktscale; 245184588Sdfr } else { 246184588Sdfr if (nmp->nm_sotype != SOCK_STREAM) 247184588Sdfr panic("nfscon sotype"); 248184588Sdfr sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + 249184588Sdfr sizeof (u_int32_t)) * pktscale; 250184588Sdfr rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + 251184588Sdfr sizeof (u_int32_t)) * pktscale; 252184588Sdfr } 253184588Sdfr mtx_unlock(&nmp->nm_mtx); 254184588Sdfr 255184588Sdfr client = clnt_reconnect_create(nconf, saddr, NFS_PROG, vers, 256184588Sdfr sndreserve, rcvreserve); 257184588Sdfr CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq"); 258184588Sdfr if (nmp->nm_flag & NFSMNT_INT) 259184588Sdfr CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); 260184588Sdfr if (nmp->nm_flag & NFSMNT_RESVPORT) 261184588Sdfr CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 262228757Srmacklem if ((nmp->nm_flag & NFSMNT_SOFT) != 0) { 263228757Srmacklem if (nmp->nm_sotype == SOCK_DGRAM) 264228757Srmacklem /* 265228757Srmacklem * For UDP, the large timeout for a reconnect will 266228757Srmacklem * be set to "nm_retry * nm_timeo / 2", so we only 267228757Srmacklem * want to do 2 reconnect timeout retries. 268228757Srmacklem */ 269228757Srmacklem retries = 2; 270228757Srmacklem else 271228757Srmacklem retries = nmp->nm_retry; 272228757Srmacklem } else 273184588Sdfr retries = INT_MAX; 274184588Sdfr CLNT_CONTROL(client, CLSET_RETRIES, &retries); 275184588Sdfr 276228757Srmacklem /* 277228757Srmacklem * For UDP, there are 2 timeouts: 278228757Srmacklem * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer 279228757Srmacklem * that does a retransmit of an RPC request using the same socket 280228757Srmacklem * and xid. This is what you normally want to do, since NFS 281228757Srmacklem * servers depend on "same xid" for their Duplicate Request Cache. 282228757Srmacklem * - timeout specified in CLNT_CALL_MBUF(), which specifies when 283228757Srmacklem * retransmits on the same socket should fail and a fresh socket 284228757Srmacklem * created. Each of these timeouts counts as one CLSET_RETRIES, 285228757Srmacklem * as set above. 286228757Srmacklem * Set the initial retransmit timeout for UDP. This timeout doesn't 287228757Srmacklem * exist for TCP and the following call just fails, which is ok. 288228757Srmacklem */ 289228757Srmacklem timo.tv_sec = nmp->nm_timeo / NFS_HZ; 290228757Srmacklem timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; 291228757Srmacklem CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); 292228757Srmacklem 293184588Sdfr mtx_lock(&nmp->nm_mtx); 294184588Sdfr if (nmp->nm_client) { 295184588Sdfr /* 296184588Sdfr * Someone else already connected. 297184588Sdfr */ 298184588Sdfr CLNT_RELEASE(client); 299203731Smarius } else 300184588Sdfr nmp->nm_client = client; 301184588Sdfr 302184588Sdfr /* 303184588Sdfr * Protocols that do not require connections may be optionally left 304184588Sdfr * unconnected for servers that reply from a port other than NFS_PORT. 305184588Sdfr */ 306184588Sdfr if (!(nmp->nm_flag & NFSMNT_NOCONN)) { 307184588Sdfr mtx_unlock(&nmp->nm_mtx); 308184588Sdfr CLNT_CONTROL(client, CLSET_CONNECT, &one); 309203731Smarius } else 310184588Sdfr mtx_unlock(&nmp->nm_mtx); 311184588Sdfr 312184588Sdfr /* Restore current thread's credentials. */ 313184588Sdfr td->td_ucred = origcred; 314184588Sdfr 315184588Sdfr mtx_lock(&nmp->nm_mtx); 316203731Smarius /* Initialize other non-zero congestion variables. */ 317184588Sdfr nfs_init_rtt(nmp); 318184588Sdfr mtx_unlock(&nmp->nm_mtx); 319184588Sdfr return (0); 320184588Sdfr} 321184588Sdfr 322184588Sdfr/* 323203731Smarius * NFS disconnect. Clean up and unlink. 324184588Sdfr */ 325184588Sdfrvoid 326184588Sdfrnfs_disconnect(struct nfsmount *nmp) 327184588Sdfr{ 328184588Sdfr CLIENT *client; 329184588Sdfr 330184588Sdfr mtx_lock(&nmp->nm_mtx); 331184588Sdfr if (nmp->nm_client) { 332184588Sdfr client = nmp->nm_client; 333184588Sdfr nmp->nm_client = NULL; 334184588Sdfr mtx_unlock(&nmp->nm_mtx); 335223309Srmacklem rpc_gss_secpurge_call(client); 336184588Sdfr CLNT_CLOSE(client); 337184588Sdfr CLNT_RELEASE(client); 338203731Smarius } else 339184588Sdfr mtx_unlock(&nmp->nm_mtx); 340184588Sdfr} 341184588Sdfr 342184588Sdfrvoid 343184588Sdfrnfs_safedisconnect(struct nfsmount *nmp) 344184588Sdfr{ 345184588Sdfr 346184588Sdfr nfs_disconnect(nmp); 347184588Sdfr} 348184588Sdfr 349184588Sdfrstatic AUTH * 350184588Sdfrnfs_getauth(struct nfsmount *nmp, struct ucred *cred) 351184588Sdfr{ 352184588Sdfr rpc_gss_service_t svc; 353184588Sdfr AUTH *auth; 354184588Sdfr 355184588Sdfr switch (nmp->nm_secflavor) { 356184588Sdfr case RPCSEC_GSS_KRB5: 357184588Sdfr case RPCSEC_GSS_KRB5I: 358184588Sdfr case RPCSEC_GSS_KRB5P: 359203731Smarius if (!nmp->nm_mech_oid) 360223309Srmacklem if (!rpc_gss_mech_to_oid_call("kerberosv5", 361203731Smarius &nmp->nm_mech_oid)) 362184588Sdfr return (NULL); 363184588Sdfr if (nmp->nm_secflavor == RPCSEC_GSS_KRB5) 364184588Sdfr svc = rpc_gss_svc_none; 365184588Sdfr else if (nmp->nm_secflavor == RPCSEC_GSS_KRB5I) 366184588Sdfr svc = rpc_gss_svc_integrity; 367184588Sdfr else 368184588Sdfr svc = rpc_gss_svc_privacy; 369223309Srmacklem auth = rpc_gss_secfind_call(nmp->nm_client, cred, 370184588Sdfr nmp->nm_principal, nmp->nm_mech_oid, svc); 371184588Sdfr if (auth) 372184588Sdfr return (auth); 373184588Sdfr /* fallthrough */ 374184588Sdfr case AUTH_SYS: 375184588Sdfr default: 376184588Sdfr return (authunix_create(cred)); 377184588Sdfr 378184588Sdfr } 379184588Sdfr} 380184588Sdfr 381184588Sdfr/* 382184588Sdfr * Callback from the RPC code to generate up/down notifications. 383184588Sdfr */ 384184588Sdfr 385184588Sdfrstruct nfs_feedback_arg { 386184588Sdfr struct nfsmount *nf_mount; 387184588Sdfr int nf_lastmsg; /* last tprintf */ 388184588Sdfr int nf_tprintfmsg; 389184588Sdfr struct thread *nf_td; 390184588Sdfr}; 391184588Sdfr 392184588Sdfrstatic void 393184588Sdfrnfs_feedback(int type, int proc, void *arg) 394184588Sdfr{ 395184588Sdfr struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; 396184588Sdfr struct nfsmount *nmp = nf->nf_mount; 397245909Sjhb time_t now; 398184588Sdfr 399184588Sdfr switch (type) { 400184588Sdfr case FEEDBACK_REXMIT2: 401184588Sdfr case FEEDBACK_RECONNECT: 402245909Sjhb now = time_uptime; 403245909Sjhb if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now) { 404184588Sdfr nfs_down(nmp, nf->nf_td, 405184588Sdfr "not responding", 0, NFSSTA_TIMEO); 406184588Sdfr nf->nf_tprintfmsg = TRUE; 407245909Sjhb nf->nf_lastmsg = now; 408184588Sdfr } 409184588Sdfr break; 410184588Sdfr 411184588Sdfr case FEEDBACK_OK: 412184588Sdfr nfs_up(nf->nf_mount, nf->nf_td, 413184588Sdfr "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); 414184588Sdfr break; 415184588Sdfr } 416184588Sdfr} 417184588Sdfr 418184588Sdfr/* 419184588Sdfr * nfs_request - goes something like this 420184588Sdfr * - fill in request struct 421184588Sdfr * - links it into list 422184588Sdfr * - calls nfs_send() for first transmit 423184588Sdfr * - calls nfs_receive() to get reply 424184588Sdfr * - break down rpc header and return with nfs reply pointed to 425184588Sdfr * by mrep or error 426184588Sdfr * nb: always frees up mreq mbuf list 427184588Sdfr */ 428184588Sdfrint 429184588Sdfrnfs_request(struct vnode *vp, struct mbuf *mreq, int procnum, 430184588Sdfr struct thread *td, struct ucred *cred, struct mbuf **mrp, 431184588Sdfr struct mbuf **mdp, caddr_t *dposp) 432184588Sdfr{ 433184588Sdfr struct mbuf *mrep; 434184588Sdfr u_int32_t *tl; 435184588Sdfr struct nfsmount *nmp; 436184588Sdfr struct mbuf *md; 437184588Sdfr time_t waituntil; 438184588Sdfr caddr_t dpos; 439228757Srmacklem int error = 0, timeo; 440184588Sdfr AUTH *auth = NULL; 441184588Sdfr enum nfs_rto_timer_t timer; 442184588Sdfr struct nfs_feedback_arg nf; 443184588Sdfr struct rpc_callextra ext; 444184588Sdfr enum clnt_stat stat; 445184588Sdfr struct timeval timo; 446184588Sdfr 447184588Sdfr /* Reject requests while attempting a forced unmount. */ 448184588Sdfr if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) { 449184588Sdfr m_freem(mreq); 450184588Sdfr return (ESTALE); 451184588Sdfr } 452184588Sdfr nmp = VFSTONFS(vp->v_mount); 453184588Sdfr bzero(&nf, sizeof(struct nfs_feedback_arg)); 454184588Sdfr nf.nf_mount = nmp; 455184588Sdfr nf.nf_td = td; 456245909Sjhb nf.nf_lastmsg = time_uptime - 457203731Smarius ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); 458184588Sdfr 459184588Sdfr /* 460203731Smarius * XXX if not already connected call nfs_connect now. Longer 461184588Sdfr * term, change nfs_mount to call nfs_connect unconditionally 462184588Sdfr * and let clnt_reconnect_create handle reconnects. 463184588Sdfr */ 464184588Sdfr if (!nmp->nm_client) 465195203Sdfr nfs_connect(nmp); 466184588Sdfr 467184588Sdfr auth = nfs_getauth(nmp, cred); 468184588Sdfr if (!auth) { 469184588Sdfr m_freem(mreq); 470184588Sdfr return (EACCES); 471184588Sdfr } 472184588Sdfr bzero(&ext, sizeof(ext)); 473184588Sdfr ext.rc_auth = auth; 474184588Sdfr 475184588Sdfr ext.rc_feedback = nfs_feedback; 476184588Sdfr ext.rc_feedback_arg = &nf; 477184588Sdfr 478184588Sdfr /* 479184588Sdfr * Use a conservative timeout for RPCs other than getattr, 480203731Smarius * lookup, read or write. The justification for doing "other" 481184588Sdfr * this way is that these RPCs happen so infrequently that 482184588Sdfr * timer est. would probably be stale. Also, since many of 483184588Sdfr * these RPCs are non-idempotent, a conservative timeout is 484184588Sdfr * desired. 485184588Sdfr */ 486184588Sdfr timer = nfs_rto_timer(procnum); 487203731Smarius if (timer != NFS_DEFAULT_TIMER) 488184588Sdfr ext.rc_timers = &nmp->nm_timers[timer - 1]; 489203731Smarius else 490184588Sdfr ext.rc_timers = NULL; 491184588Sdfr 492190293Srwatson#ifdef KDTRACE_HOOKS 493190293Srwatson if (dtrace_nfsclient_nfs23_start_probe != NULL) { 494190293Srwatson uint32_t probe_id; 495190293Srwatson int probe_procnum; 496190293Srwatson 497190293Srwatson if (nmp->nm_flag & NFSMNT_NFSV3) { 498190293Srwatson probe_id = nfsclient_nfs3_start_probes[procnum]; 499190293Srwatson probe_procnum = procnum; 500190293Srwatson } else { 501190293Srwatson probe_id = nfsclient_nfs2_start_probes[procnum]; 502191777Srwatson probe_procnum = nfsv2_procid[procnum]; 503190293Srwatson } 504190293Srwatson if (probe_id != 0) 505190293Srwatson (dtrace_nfsclient_nfs23_start_probe)(probe_id, vp, 506190293Srwatson mreq, cred, probe_procnum); 507190293Srwatson } 508190293Srwatson#endif 509190293Srwatson 510184588Sdfr nfsstats.rpcrequests++; 511184588Sdfrtryagain: 512228757Srmacklem /* 513228757Srmacklem * This timeout specifies when a new socket should be created, 514228757Srmacklem * along with new xid values. For UDP, this should be done 515228757Srmacklem * infrequently, since retransmits of RPC requests should normally 516228757Srmacklem * use the same xid. 517228757Srmacklem */ 518228757Srmacklem if (nmp->nm_sotype == SOCK_DGRAM) { 519228757Srmacklem if ((nmp->nm_flag & NFSMNT_SOFT) != 0) { 520228757Srmacklem /* 521228757Srmacklem * CLSET_RETRIES is set to 2, so this should be half 522228757Srmacklem * of the total timeout required. 523228757Srmacklem */ 524228757Srmacklem timeo = nmp->nm_retry * nmp->nm_timeo / 2; 525228757Srmacklem if (timeo < 1) 526228757Srmacklem timeo = 1; 527228757Srmacklem timo.tv_sec = timeo / NFS_HZ; 528228757Srmacklem timo.tv_usec = (timeo % NFS_HZ) * 1000000 / NFS_HZ; 529228757Srmacklem } else { 530228757Srmacklem /* For UDP hard mounts, use a large value. */ 531228757Srmacklem timo.tv_sec = NFS_MAXTIMEO / NFS_HZ; 532228757Srmacklem timo.tv_usec = 0; 533228757Srmacklem } 534228757Srmacklem } else { 535228757Srmacklem timo.tv_sec = nmp->nm_timeo / NFS_HZ; 536228757Srmacklem timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; 537228757Srmacklem } 538184588Sdfr mrep = NULL; 539184588Sdfr stat = CLNT_CALL_MBUF(nmp->nm_client, &ext, 540184588Sdfr (nmp->nm_flag & NFSMNT_NFSV3) ? procnum : nfsv2_procid[procnum], 541184588Sdfr mreq, &mrep, timo); 542184588Sdfr 543184588Sdfr /* 544184588Sdfr * If there was a successful reply and a tprintf msg. 545184588Sdfr * tprintf a response. 546184588Sdfr */ 547203731Smarius if (stat == RPC_SUCCESS) 548184588Sdfr error = 0; 549245476Sjhb else if (stat == RPC_TIMEDOUT) { 550245476Sjhb nfsstats.rpctimeouts++; 551184588Sdfr error = ETIMEDOUT; 552245476Sjhb } else if (stat == RPC_VERSMISMATCH) { 553245476Sjhb nfsstats.rpcinvalid++; 554184588Sdfr error = EOPNOTSUPP; 555245476Sjhb } else if (stat == RPC_PROGVERSMISMATCH) { 556245476Sjhb nfsstats.rpcinvalid++; 557184588Sdfr error = EPROTONOSUPPORT; 558245476Sjhb } else if (stat == RPC_INTR) { 559245476Sjhb error = EINTR; 560245476Sjhb } else { 561245476Sjhb nfsstats.rpcinvalid++; 562184588Sdfr error = EACCES; 563245476Sjhb } 564190220Srwatson if (error) 565190220Srwatson goto nfsmout; 566184588Sdfr 567184588Sdfr KASSERT(mrep != NULL, ("mrep shouldn't be NULL if no error\n")); 568184588Sdfr 569192686Sdfr /* 570192686Sdfr * Search for any mbufs that are not a multiple of 4 bytes long 571192686Sdfr * or with m_data not longword aligned. 572192686Sdfr * These could cause pointer alignment problems, so copy them to 573192686Sdfr * well aligned mbufs. 574192686Sdfr */ 575243882Sglebius error = nfs_realign(&mrep, M_NOWAIT); 576192686Sdfr if (error == ENOMEM) { 577192686Sdfr m_freem(mrep); 578192686Sdfr AUTH_DESTROY(auth); 579245476Sjhb nfsstats.rpcinvalid++; 580192686Sdfr return (error); 581192686Sdfr } 582192686Sdfr 583192686Sdfr md = mrep; 584184588Sdfr dpos = mtod(mrep, caddr_t); 585184588Sdfr tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 586184588Sdfr if (*tl != 0) { 587184588Sdfr error = fxdr_unsigned(int, *tl); 588184588Sdfr if ((nmp->nm_flag & NFSMNT_NFSV3) && 589184588Sdfr error == NFSERR_TRYLATER) { 590184588Sdfr m_freem(mrep); 591184588Sdfr error = 0; 592184588Sdfr waituntil = time_second + nfs3_jukebox_delay; 593203731Smarius while (time_second < waituntil) 594203731Smarius (void)tsleep(&fake_wchan, PSOCK, "nqnfstry", 595203731Smarius hz); 596184588Sdfr goto tryagain; 597184588Sdfr } 598227690Srmacklem /* 599227690Srmacklem * Make sure NFSERR_RETERR isn't bogusly set by a server 600227690Srmacklem * such as amd. (No actual NFS error has bit 31 set.) 601227690Srmacklem */ 602227690Srmacklem error &= ~NFSERR_RETERR; 603184588Sdfr 604184588Sdfr /* 605184588Sdfr * If the File Handle was stale, invalidate the lookup 606184588Sdfr * cache, just in case. 607184588Sdfr */ 608184588Sdfr if (error == ESTALE) 609190785Sjhb nfs_purgecache(vp); 610184588Sdfr /* 611232116Sjhb * Skip wcc data on non-ENOENT NFS errors for now. 612232116Sjhb * NetApp filers return corrupt postop attrs in the 613232116Sjhb * wcc data for NFS err EROFS. Not sure if they could 614232116Sjhb * return corrupt postop attrs for others errors. 615232116Sjhb * Blocking ENOENT post-op attributes breaks negative 616232116Sjhb * name caching, so always allow it through. 617184588Sdfr */ 618203731Smarius if ((nmp->nm_flag & NFSMNT_NFSV3) && 619232116Sjhb (!nfs_skip_wcc_data_onerr || error == ENOENT)) { 620184588Sdfr *mrp = mrep; 621184588Sdfr *mdp = md; 622184588Sdfr *dposp = dpos; 623184588Sdfr error |= NFSERR_RETERR; 624184588Sdfr } else 625184588Sdfr m_freem(mrep); 626190220Srwatson goto nfsmout; 627184588Sdfr } 628184588Sdfr 629190293Srwatson#ifdef KDTRACE_HOOKS 630190293Srwatson if (dtrace_nfsclient_nfs23_done_probe != NULL) { 631190293Srwatson uint32_t probe_id; 632190293Srwatson int probe_procnum; 633190293Srwatson 634190293Srwatson if (nmp->nm_flag & NFSMNT_NFSV3) { 635190293Srwatson probe_id = nfsclient_nfs3_done_probes[procnum]; 636190293Srwatson probe_procnum = procnum; 637190293Srwatson } else { 638190293Srwatson probe_id = nfsclient_nfs2_done_probes[procnum]; 639190293Srwatson probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ? 640190293Srwatson procnum : nfsv2_procid[procnum]; 641190293Srwatson } 642190293Srwatson if (probe_id != 0) 643190293Srwatson (dtrace_nfsclient_nfs23_done_probe)(probe_id, vp, 644190293Srwatson mreq, cred, probe_procnum, 0); 645190293Srwatson } 646190293Srwatson#endif 647184588Sdfr m_freem(mreq); 648184588Sdfr *mrp = mrep; 649184588Sdfr *mdp = md; 650184588Sdfr *dposp = dpos; 651184588Sdfr AUTH_DESTROY(auth); 652184588Sdfr return (0); 653184588Sdfr 654184588Sdfrnfsmout: 655190293Srwatson#ifdef KDTRACE_HOOKS 656190293Srwatson if (dtrace_nfsclient_nfs23_done_probe != NULL) { 657190293Srwatson uint32_t probe_id; 658190293Srwatson int probe_procnum; 659190293Srwatson 660190293Srwatson if (nmp->nm_flag & NFSMNT_NFSV3) { 661190293Srwatson probe_id = nfsclient_nfs3_done_probes[procnum]; 662190293Srwatson probe_procnum = procnum; 663190293Srwatson } else { 664190293Srwatson probe_id = nfsclient_nfs2_done_probes[procnum]; 665190293Srwatson probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ? 666190293Srwatson procnum : nfsv2_procid[procnum]; 667190293Srwatson } 668190293Srwatson if (probe_id != 0) 669190293Srwatson (dtrace_nfsclient_nfs23_done_probe)(probe_id, vp, 670190293Srwatson mreq, cred, probe_procnum, error); 671190293Srwatson } 672190293Srwatson#endif 673184588Sdfr m_freem(mreq); 674184588Sdfr if (auth) 675184588Sdfr AUTH_DESTROY(auth); 676184588Sdfr return (error); 677184588Sdfr} 678184588Sdfr 679184588Sdfr/* 680184588Sdfr * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and 681203731Smarius * wait for all requests to complete. This is used by forced unmounts 682184588Sdfr * to terminate any outstanding RPCs. 683184588Sdfr */ 684184588Sdfrint 685184588Sdfrnfs_nmcancelreqs(struct nfsmount *nmp) 686184588Sdfr{ 687184588Sdfr 688184588Sdfr if (nmp->nm_client) 689184588Sdfr CLNT_CLOSE(nmp->nm_client); 690184588Sdfr return (0); 691184588Sdfr} 692184588Sdfr 693184588Sdfr/* 694184588Sdfr * Any signal that can interrupt an NFS operation in an intr mount 695203731Smarius * should be added to this set. SIGSTOP and SIGKILL cannot be masked. 696184588Sdfr */ 697184588Sdfrint nfs_sig_set[] = { 698184588Sdfr SIGINT, 699184588Sdfr SIGTERM, 700184588Sdfr SIGHUP, 701184588Sdfr SIGKILL, 702184588Sdfr SIGQUIT 703184588Sdfr}; 704184588Sdfr 705184588Sdfr/* 706184588Sdfr * Check to see if one of the signals in our subset is pending on 707184588Sdfr * the process (in an intr mount). 708184588Sdfr */ 709184588Sdfrstatic int 710184588Sdfrnfs_sig_pending(sigset_t set) 711184588Sdfr{ 712184588Sdfr int i; 713203731Smarius 714184588Sdfr for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) 715184588Sdfr if (SIGISMEMBER(set, nfs_sig_set[i])) 716184588Sdfr return (1); 717184588Sdfr return (0); 718184588Sdfr} 719203731Smarius 720184588Sdfr/* 721184588Sdfr * The set/restore sigmask functions are used to (temporarily) overwrite 722246417Sjhb * the thread td_sigmask during an RPC call (for example). These are also 723184588Sdfr * used in other places in the NFS client that might tsleep(). 724184588Sdfr */ 725184588Sdfrvoid 726184588Sdfrnfs_set_sigmask(struct thread *td, sigset_t *oldset) 727184588Sdfr{ 728184588Sdfr sigset_t newset; 729184588Sdfr int i; 730184588Sdfr struct proc *p; 731203731Smarius 732184588Sdfr SIGFILLSET(newset); 733184588Sdfr if (td == NULL) 734184588Sdfr td = curthread; /* XXX */ 735184588Sdfr p = td->td_proc; 736203731Smarius /* Remove the NFS set of signals from newset. */ 737184588Sdfr PROC_LOCK(p); 738184588Sdfr mtx_lock(&p->p_sigacts->ps_mtx); 739184588Sdfr for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) { 740184588Sdfr /* 741184588Sdfr * But make sure we leave the ones already masked 742203731Smarius * by the process, i.e. remove the signal from the 743184588Sdfr * temporary signalmask only if it wasn't already 744184588Sdfr * in p_sigmask. 745184588Sdfr */ 746184588Sdfr if (!SIGISMEMBER(td->td_sigmask, nfs_sig_set[i]) && 747184588Sdfr !SIGISMEMBER(p->p_sigacts->ps_sigignore, nfs_sig_set[i])) 748184588Sdfr SIGDELSET(newset, nfs_sig_set[i]); 749184588Sdfr } 750184588Sdfr mtx_unlock(&p->p_sigacts->ps_mtx); 751246417Sjhb kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 752246417Sjhb SIGPROCMASK_PROC_LOCKED); 753184588Sdfr PROC_UNLOCK(p); 754184588Sdfr} 755184588Sdfr 756184588Sdfrvoid 757184588Sdfrnfs_restore_sigmask(struct thread *td, sigset_t *set) 758184588Sdfr{ 759184588Sdfr if (td == NULL) 760184588Sdfr td = curthread; /* XXX */ 761184588Sdfr kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); 762184588Sdfr} 763184588Sdfr 764184588Sdfr/* 765184588Sdfr * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the 766184588Sdfr * old one after msleep() returns. 767184588Sdfr */ 768184588Sdfrint 769203731Smariusnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, 770203731Smarius char *wmesg, int timo) 771184588Sdfr{ 772184588Sdfr sigset_t oldset; 773184588Sdfr int error; 774184588Sdfr struct proc *p; 775203731Smarius 776184588Sdfr if ((priority & PCATCH) == 0) 777184588Sdfr return msleep(ident, mtx, priority, wmesg, timo); 778184588Sdfr if (td == NULL) 779184588Sdfr td = curthread; /* XXX */ 780184588Sdfr nfs_set_sigmask(td, &oldset); 781184588Sdfr error = msleep(ident, mtx, priority, wmesg, timo); 782184588Sdfr nfs_restore_sigmask(td, &oldset); 783184588Sdfr p = td->td_proc; 784184588Sdfr return (error); 785184588Sdfr} 786184588Sdfr 787184588Sdfr/* 788184588Sdfr * Test for a termination condition pending on the process. 789184588Sdfr * This is used for NFSMNT_INT mounts. 790184588Sdfr */ 791184588Sdfrint 792195203Sdfrnfs_sigintr(struct nfsmount *nmp, struct thread *td) 793184588Sdfr{ 794184588Sdfr struct proc *p; 795184588Sdfr sigset_t tmpset; 796203731Smarius 797184588Sdfr /* Terminate all requests while attempting a forced unmount. */ 798184588Sdfr if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) 799184588Sdfr return (EIO); 800184588Sdfr if (!(nmp->nm_flag & NFSMNT_INT)) 801184588Sdfr return (0); 802184588Sdfr if (td == NULL) 803184588Sdfr return (0); 804184588Sdfr p = td->td_proc; 805184588Sdfr PROC_LOCK(p); 806184588Sdfr tmpset = p->p_siglist; 807184588Sdfr SIGSETOR(tmpset, td->td_siglist); 808184588Sdfr SIGSETNAND(tmpset, td->td_sigmask); 809184588Sdfr mtx_lock(&p->p_sigacts->ps_mtx); 810184588Sdfr SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); 811184588Sdfr mtx_unlock(&p->p_sigacts->ps_mtx); 812184588Sdfr if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) 813184588Sdfr && nfs_sig_pending(tmpset)) { 814184588Sdfr PROC_UNLOCK(p); 815184588Sdfr return (EINTR); 816184588Sdfr } 817184588Sdfr PROC_UNLOCK(p); 818184588Sdfr return (0); 819184588Sdfr} 820184588Sdfr 821184588Sdfrstatic int 822184588Sdfrnfs_msg(struct thread *td, const char *server, const char *msg, int error) 823184588Sdfr{ 824184588Sdfr struct proc *p; 825184588Sdfr 826184588Sdfr p = td ? td->td_proc : NULL; 827203731Smarius if (error) 828184588Sdfr tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, 829184588Sdfr msg, error); 830203731Smarius else 831184588Sdfr tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); 832184588Sdfr return (0); 833184588Sdfr} 834184588Sdfr 835184588Sdfrstatic void 836184588Sdfrnfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, 837184588Sdfr int error, int flags) 838184588Sdfr{ 839184588Sdfr if (nmp == NULL) 840184588Sdfr return; 841184588Sdfr mtx_lock(&nmp->nm_mtx); 842184588Sdfr if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { 843184588Sdfr nmp->nm_state |= NFSSTA_TIMEO; 844184588Sdfr mtx_unlock(&nmp->nm_mtx); 845184588Sdfr vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 846184588Sdfr VQ_NOTRESP, 0); 847184588Sdfr } else 848184588Sdfr mtx_unlock(&nmp->nm_mtx); 849184588Sdfr mtx_lock(&nmp->nm_mtx); 850203731Smarius if ((flags & NFSSTA_LOCKTIMEO) && 851203731Smarius !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { 852184588Sdfr nmp->nm_state |= NFSSTA_LOCKTIMEO; 853184588Sdfr mtx_unlock(&nmp->nm_mtx); 854184588Sdfr vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 855184588Sdfr VQ_NOTRESPLOCK, 0); 856184588Sdfr } else 857184588Sdfr mtx_unlock(&nmp->nm_mtx); 858184588Sdfr nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); 859184588Sdfr} 860184588Sdfr 861184588Sdfrstatic void 862184588Sdfrnfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, 863184588Sdfr int flags, int tprintfmsg) 864184588Sdfr{ 865184588Sdfr if (nmp == NULL) 866184588Sdfr return; 867203731Smarius if (tprintfmsg) 868184588Sdfr nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); 869184588Sdfr 870184588Sdfr mtx_lock(&nmp->nm_mtx); 871184588Sdfr if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { 872184588Sdfr nmp->nm_state &= ~NFSSTA_TIMEO; 873184588Sdfr mtx_unlock(&nmp->nm_mtx); 874184588Sdfr vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 875184588Sdfr VQ_NOTRESP, 1); 876184588Sdfr } else 877184588Sdfr mtx_unlock(&nmp->nm_mtx); 878203731Smarius 879184588Sdfr mtx_lock(&nmp->nm_mtx); 880203731Smarius if ((flags & NFSSTA_LOCKTIMEO) && 881203731Smarius (nmp->nm_state & NFSSTA_LOCKTIMEO)) { 882184588Sdfr nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 883184588Sdfr mtx_unlock(&nmp->nm_mtx); 884184588Sdfr vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 885184588Sdfr VQ_NOTRESPLOCK, 1); 886184588Sdfr } else 887184588Sdfr mtx_unlock(&nmp->nm_mtx); 888184588Sdfr} 889