nfs_krpc.c revision 195203
1184588Sdfr/*- 2184588Sdfr * Copyright (c) 1989, 1991, 1993, 1995 3184588Sdfr * The Regents of the University of California. All rights reserved. 4184588Sdfr * 5184588Sdfr * This code is derived from software contributed to Berkeley by 6184588Sdfr * Rick Macklem at The University of Guelph. 7184588Sdfr * 8184588Sdfr * Redistribution and use in source and binary forms, with or without 9184588Sdfr * modification, are permitted provided that the following conditions 10184588Sdfr * are met: 11184588Sdfr * 1. Redistributions of source code must retain the above copyright 12184588Sdfr * notice, this list of conditions and the following disclaimer. 13184588Sdfr * 2. Redistributions in binary form must reproduce the above copyright 14184588Sdfr * notice, this list of conditions and the following disclaimer in the 15184588Sdfr * documentation and/or other materials provided with the distribution. 16184588Sdfr * 4. Neither the name of the University nor the names of its contributors 17184588Sdfr * may be used to endorse or promote products derived from this software 18184588Sdfr * without specific prior written permission. 19184588Sdfr * 20184588Sdfr * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21184588Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22184588Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23184588Sdfr * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24184588Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25184588Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26184588Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27184588Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28184588Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29184588Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30184588Sdfr * SUCH DAMAGE. 31184588Sdfr * 32184588Sdfr * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 33184588Sdfr */ 34184588Sdfr 35184588Sdfr#include <sys/cdefs.h> 36184588Sdfr__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_krpc.c 195203 2009-06-30 19:10:17Z dfr $"); 37184588Sdfr 38184588Sdfr/* 39184588Sdfr * Socket operations for use by nfs 40184588Sdfr */ 41184588Sdfr 42184588Sdfr#include "opt_inet6.h" 43190293Srwatson#include "opt_kdtrace.h" 44184588Sdfr#include "opt_kgssapi.h" 45184588Sdfr 46184588Sdfr#include <sys/param.h> 47184588Sdfr#include <sys/systm.h> 48184588Sdfr#include <sys/kernel.h> 49184588Sdfr#include <sys/limits.h> 50184588Sdfr#include <sys/lock.h> 51184588Sdfr#include <sys/malloc.h> 52184588Sdfr#include <sys/mbuf.h> 53184588Sdfr#include <sys/mount.h> 54184588Sdfr#include <sys/mutex.h> 55184588Sdfr#include <sys/proc.h> 56184588Sdfr#include <sys/signalvar.h> 57184588Sdfr#include <sys/syscallsubr.h> 58184588Sdfr#include <sys/sysctl.h> 59184588Sdfr#include <sys/syslog.h> 60184588Sdfr#include <sys/vnode.h> 61184588Sdfr 62184588Sdfr#include <rpc/rpc.h> 63184588Sdfr 64184588Sdfr#include <nfs/nfsproto.h> 65184588Sdfr#include <nfsclient/nfs.h> 66184588Sdfr#include <nfs/xdr_subs.h> 67184588Sdfr#include <nfsclient/nfsm_subs.h> 68184588Sdfr#include <nfsclient/nfsmount.h> 69184588Sdfr#include <nfsclient/nfsnode.h> 70184588Sdfr 71190293Srwatson#ifdef KDTRACE_HOOKS 72190293Srwatson#include <sys/dtrace_bsd.h> 73190293Srwatson 74190293Srwatsondtrace_nfsclient_nfs23_start_probe_func_t 75190293Srwatson dtrace_nfsclient_nfs23_start_probe; 76190293Srwatson 77190293Srwatsondtrace_nfsclient_nfs23_done_probe_func_t 78190293Srwatson dtrace_nfsclient_nfs23_done_probe; 79190293Srwatson 80190293Srwatson/* 81190293Srwatson * Registered probes by RPC type. 82190293Srwatson */ 83190293Srwatsonuint32_t nfsclient_nfs2_start_probes[NFS_NPROCS]; 84190293Srwatsonuint32_t nfsclient_nfs2_done_probes[NFS_NPROCS]; 85190293Srwatson 86190293Srwatsonuint32_t nfsclient_nfs3_start_probes[NFS_NPROCS]; 87190293Srwatsonuint32_t nfsclient_nfs3_done_probes[NFS_NPROCS]; 88190293Srwatson#endif 89190293Srwatson 90184588Sdfrstatic int nfs_realign_test; 91184588Sdfrstatic int nfs_realign_count; 92184588Sdfrstatic int nfs_bufpackets = 4; 93184588Sdfrstatic int nfs_reconnects; 94184588Sdfrstatic int nfs3_jukebox_delay = 10; 95184588Sdfrstatic int nfs_skip_wcc_data_onerr = 1; 96184588Sdfrstatic int fake_wchan; 97184588Sdfr 98184588SdfrSYSCTL_DECL(_vfs_nfs); 99184588Sdfr 100184588SdfrSYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, 101184588Sdfr "Number of realign tests done"); 102184588SdfrSYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, 103184588Sdfr "Number of mbuf realignments done"); 104184588SdfrSYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, 105184588Sdfr "Buffer reservation size 2 < x < 64"); 106184588SdfrSYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, 107184588Sdfr "Number of times the nfs client has had to reconnect"); 108184588SdfrSYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0, 109184588Sdfr "Number of seconds to delay a retry after receiving EJUKEBOX"); 110184588SdfrSYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0, 111184588Sdfr "Disable weak cache consistency checking when server returns an error"); 112184588Sdfr 113184588Sdfrstatic void nfs_down(struct nfsmount *, struct thread *, const char *, 114184588Sdfr int, int); 115184588Sdfrstatic void nfs_up(struct nfsmount *, struct thread *, const char *, 116184588Sdfr int, int); 117184588Sdfrstatic int nfs_msg(struct thread *, const char *, const char *, int); 118184588Sdfr 119184588Sdfrextern int nfsv2_procid[]; 120184588Sdfr 121184588Sdfrstruct nfs_cached_auth { 122184588Sdfr int ca_refs; /* refcount, including 1 from the cache */ 123184588Sdfr uid_t ca_uid; /* uid that corresponds to this auth */ 124184588Sdfr AUTH *ca_auth; /* RPC auth handle */ 125184588Sdfr}; 126184588Sdfr 127184588Sdfr/* 128184588Sdfr * RTT estimator 129184588Sdfr */ 130184588Sdfr 131184588Sdfrstatic enum nfs_rto_timer_t nfs_proct[NFS_NPROCS] = { 132184588Sdfr NFS_DEFAULT_TIMER, /* NULL */ 133184588Sdfr NFS_GETATTR_TIMER, /* GETATTR */ 134184588Sdfr NFS_DEFAULT_TIMER, /* SETATTR */ 135184588Sdfr NFS_LOOKUP_TIMER, /* LOOKUP */ 136184588Sdfr NFS_GETATTR_TIMER, /* ACCESS */ 137184588Sdfr NFS_READ_TIMER, /* READLINK */ 138184588Sdfr NFS_READ_TIMER, /* READ */ 139184588Sdfr NFS_WRITE_TIMER, /* WRITE */ 140184588Sdfr NFS_DEFAULT_TIMER, /* CREATE */ 141184588Sdfr NFS_DEFAULT_TIMER, /* MKDIR */ 142184588Sdfr NFS_DEFAULT_TIMER, /* SYMLINK */ 143184588Sdfr NFS_DEFAULT_TIMER, /* MKNOD */ 144184588Sdfr NFS_DEFAULT_TIMER, /* REMOVE */ 145184588Sdfr NFS_DEFAULT_TIMER, /* RMDIR */ 146184588Sdfr NFS_DEFAULT_TIMER, /* RENAME */ 147184588Sdfr NFS_DEFAULT_TIMER, /* LINK */ 148184588Sdfr NFS_READ_TIMER, /* READDIR */ 149184588Sdfr NFS_READ_TIMER, /* READDIRPLUS */ 150184588Sdfr NFS_DEFAULT_TIMER, /* FSSTAT */ 151184588Sdfr NFS_DEFAULT_TIMER, /* FSINFO */ 152184588Sdfr NFS_DEFAULT_TIMER, /* PATHCONF */ 153184588Sdfr NFS_DEFAULT_TIMER, /* COMMIT */ 154184588Sdfr NFS_DEFAULT_TIMER, /* NOOP */ 155184588Sdfr}; 156184588Sdfr 157184588Sdfr/* 158184588Sdfr * Choose the correct RTT timer for this NFS procedure. 159184588Sdfr */ 160184588Sdfrstatic inline enum nfs_rto_timer_t 161184588Sdfrnfs_rto_timer(u_int32_t procnum) 162184588Sdfr{ 163184588Sdfr return nfs_proct[procnum]; 164184588Sdfr} 165184588Sdfr 166184588Sdfr/* 167184588Sdfr * Initialize the RTT estimator state for a new mount point. 168184588Sdfr */ 169184588Sdfrstatic void 170184588Sdfrnfs_init_rtt(struct nfsmount *nmp) 171184588Sdfr{ 172184588Sdfr int i; 173184588Sdfr 174184588Sdfr for (i = 0; i < NFS_MAX_TIMER; i++) { 175184588Sdfr nmp->nm_timers[i].rt_srtt = hz; 176184588Sdfr nmp->nm_timers[i].rt_deviate = 0; 177184588Sdfr nmp->nm_timers[i].rt_rtxcur = hz; 178184588Sdfr } 179184588Sdfr} 180184588Sdfr 181184588Sdfr/* 182184588Sdfr * Initialize sockets and congestion for a new NFS connection. 183184588Sdfr * We do not free the sockaddr if error. 184184588Sdfr */ 185184588Sdfrint 186195203Sdfrnfs_connect(struct nfsmount *nmp) 187184588Sdfr{ 188184588Sdfr int rcvreserve, sndreserve; 189184588Sdfr int pktscale; 190184588Sdfr struct sockaddr *saddr; 191184588Sdfr struct ucred *origcred; 192184588Sdfr struct thread *td = curthread; 193184588Sdfr CLIENT *client; 194184588Sdfr struct netconfig *nconf; 195184588Sdfr rpcvers_t vers; 196184588Sdfr int one = 1, retries; 197184588Sdfr 198184588Sdfr /* 199184588Sdfr * We need to establish the socket using the credentials of 200184588Sdfr * the mountpoint. Some parts of this process (such as 201184588Sdfr * sobind() and soconnect()) will use the curent thread's 202184588Sdfr * credential instead of the socket credential. To work 203184588Sdfr * around this, temporarily change the current thread's 204184588Sdfr * credential to that of the mountpoint. 205184588Sdfr * 206184588Sdfr * XXX: It would be better to explicitly pass the correct 207184588Sdfr * credential to sobind() and soconnect(). 208184588Sdfr */ 209184588Sdfr origcred = td->td_ucred; 210184588Sdfr td->td_ucred = nmp->nm_mountp->mnt_cred; 211184588Sdfr saddr = nmp->nm_nam; 212184588Sdfr 213184588Sdfr vers = NFS_VER2; 214184588Sdfr if (nmp->nm_flag & NFSMNT_NFSV3) 215184588Sdfr vers = NFS_VER3; 216184588Sdfr else if (nmp->nm_flag & NFSMNT_NFSV4) 217184588Sdfr vers = NFS_VER4; 218184588Sdfr if (saddr->sa_family == AF_INET) 219184588Sdfr if (nmp->nm_sotype == SOCK_DGRAM) 220184588Sdfr nconf = getnetconfigent("udp"); 221184588Sdfr else 222184588Sdfr nconf = getnetconfigent("tcp"); 223184588Sdfr else 224184588Sdfr if (nmp->nm_sotype == SOCK_DGRAM) 225184588Sdfr nconf = getnetconfigent("udp6"); 226184588Sdfr else 227184588Sdfr nconf = getnetconfigent("tcp6"); 228184588Sdfr 229184588Sdfr /* 230184588Sdfr * Get buffer reservation size from sysctl, but impose reasonable 231184588Sdfr * limits. 232184588Sdfr */ 233184588Sdfr pktscale = nfs_bufpackets; 234184588Sdfr if (pktscale < 2) 235184588Sdfr pktscale = 2; 236184588Sdfr if (pktscale > 64) 237184588Sdfr pktscale = 64; 238184588Sdfr mtx_lock(&nmp->nm_mtx); 239184588Sdfr if (nmp->nm_sotype == SOCK_DGRAM) { 240184588Sdfr sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; 241184588Sdfr rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 242184588Sdfr NFS_MAXPKTHDR) * pktscale; 243184588Sdfr } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 244184588Sdfr sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; 245184588Sdfr rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 246184588Sdfr NFS_MAXPKTHDR) * pktscale; 247184588Sdfr } else { 248184588Sdfr if (nmp->nm_sotype != SOCK_STREAM) 249184588Sdfr panic("nfscon sotype"); 250184588Sdfr sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + 251184588Sdfr sizeof (u_int32_t)) * pktscale; 252184588Sdfr rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + 253184588Sdfr sizeof (u_int32_t)) * pktscale; 254184588Sdfr } 255184588Sdfr mtx_unlock(&nmp->nm_mtx); 256184588Sdfr 257184588Sdfr client = clnt_reconnect_create(nconf, saddr, NFS_PROG, vers, 258184588Sdfr sndreserve, rcvreserve); 259184588Sdfr CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq"); 260184588Sdfr if (nmp->nm_flag & NFSMNT_INT) 261184588Sdfr CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); 262184588Sdfr if (nmp->nm_flag & NFSMNT_RESVPORT) 263184588Sdfr CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 264184588Sdfr if (nmp->nm_flag & NFSMNT_SOFT) 265184588Sdfr retries = nmp->nm_retry; 266184588Sdfr else 267184588Sdfr retries = INT_MAX; 268184588Sdfr CLNT_CONTROL(client, CLSET_RETRIES, &retries); 269184588Sdfr 270184588Sdfr mtx_lock(&nmp->nm_mtx); 271184588Sdfr if (nmp->nm_client) { 272184588Sdfr /* 273184588Sdfr * Someone else already connected. 274184588Sdfr */ 275184588Sdfr CLNT_RELEASE(client); 276184588Sdfr } else { 277184588Sdfr nmp->nm_client = client; 278184588Sdfr } 279184588Sdfr 280184588Sdfr /* 281184588Sdfr * Protocols that do not require connections may be optionally left 282184588Sdfr * unconnected for servers that reply from a port other than NFS_PORT. 283184588Sdfr */ 284184588Sdfr if (!(nmp->nm_flag & NFSMNT_NOCONN)) { 285184588Sdfr mtx_unlock(&nmp->nm_mtx); 286184588Sdfr CLNT_CONTROL(client, CLSET_CONNECT, &one); 287184588Sdfr } else { 288184588Sdfr mtx_unlock(&nmp->nm_mtx); 289184588Sdfr } 290184588Sdfr 291184588Sdfr /* Restore current thread's credentials. */ 292184588Sdfr td->td_ucred = origcred; 293184588Sdfr 294184588Sdfr mtx_lock(&nmp->nm_mtx); 295184588Sdfr /* Initialize other non-zero congestion variables */ 296184588Sdfr nfs_init_rtt(nmp); 297184588Sdfr mtx_unlock(&nmp->nm_mtx); 298184588Sdfr return (0); 299184588Sdfr} 300184588Sdfr 301184588Sdfr/* 302184588Sdfr * NFS disconnect. Clean up and unlink. 303184588Sdfr */ 304184588Sdfrvoid 305184588Sdfrnfs_disconnect(struct nfsmount *nmp) 306184588Sdfr{ 307184588Sdfr CLIENT *client; 308184588Sdfr 309184588Sdfr mtx_lock(&nmp->nm_mtx); 310184588Sdfr if (nmp->nm_client) { 311184588Sdfr client = nmp->nm_client; 312184588Sdfr nmp->nm_client = NULL; 313184588Sdfr mtx_unlock(&nmp->nm_mtx); 314184588Sdfr#ifdef KGSSAPI 315184588Sdfr rpc_gss_secpurge(client); 316184588Sdfr#endif 317184588Sdfr CLNT_CLOSE(client); 318184588Sdfr CLNT_RELEASE(client); 319184588Sdfr } else { 320184588Sdfr mtx_unlock(&nmp->nm_mtx); 321184588Sdfr } 322184588Sdfr} 323184588Sdfr 324184588Sdfrvoid 325184588Sdfrnfs_safedisconnect(struct nfsmount *nmp) 326184588Sdfr{ 327184588Sdfr 328184588Sdfr nfs_disconnect(nmp); 329184588Sdfr} 330184588Sdfr 331184588Sdfrstatic AUTH * 332184588Sdfrnfs_getauth(struct nfsmount *nmp, struct ucred *cred) 333184588Sdfr{ 334184588Sdfr#ifdef KGSSAPI 335184588Sdfr rpc_gss_service_t svc; 336184588Sdfr AUTH *auth; 337184588Sdfr#endif 338184588Sdfr 339184588Sdfr switch (nmp->nm_secflavor) { 340184588Sdfr#ifdef KGSSAPI 341184588Sdfr case RPCSEC_GSS_KRB5: 342184588Sdfr case RPCSEC_GSS_KRB5I: 343184588Sdfr case RPCSEC_GSS_KRB5P: 344184588Sdfr if (!nmp->nm_mech_oid) { 345184588Sdfr if (!rpc_gss_mech_to_oid("kerberosv5", 346184588Sdfr &nmp->nm_mech_oid)) 347184588Sdfr return (NULL); 348184588Sdfr } 349184588Sdfr if (nmp->nm_secflavor == RPCSEC_GSS_KRB5) 350184588Sdfr svc = rpc_gss_svc_none; 351184588Sdfr else if (nmp->nm_secflavor == RPCSEC_GSS_KRB5I) 352184588Sdfr svc = rpc_gss_svc_integrity; 353184588Sdfr else 354184588Sdfr svc = rpc_gss_svc_privacy; 355184588Sdfr auth = rpc_gss_secfind(nmp->nm_client, cred, 356184588Sdfr nmp->nm_principal, nmp->nm_mech_oid, svc); 357184588Sdfr if (auth) 358184588Sdfr return (auth); 359184588Sdfr /* fallthrough */ 360184588Sdfr#endif 361184588Sdfr case AUTH_SYS: 362184588Sdfr default: 363184588Sdfr return (authunix_create(cred)); 364184588Sdfr 365184588Sdfr } 366184588Sdfr} 367184588Sdfr 368184588Sdfr/* 369184588Sdfr * Callback from the RPC code to generate up/down notifications. 370184588Sdfr */ 371184588Sdfr 372184588Sdfrstruct nfs_feedback_arg { 373184588Sdfr struct nfsmount *nf_mount; 374184588Sdfr int nf_lastmsg; /* last tprintf */ 375184588Sdfr int nf_tprintfmsg; 376184588Sdfr struct thread *nf_td; 377184588Sdfr}; 378184588Sdfr 379184588Sdfrstatic void 380184588Sdfrnfs_feedback(int type, int proc, void *arg) 381184588Sdfr{ 382184588Sdfr struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; 383184588Sdfr struct nfsmount *nmp = nf->nf_mount; 384184588Sdfr struct timeval now; 385184588Sdfr 386184588Sdfr getmicrouptime(&now); 387184588Sdfr 388184588Sdfr switch (type) { 389184588Sdfr case FEEDBACK_REXMIT2: 390184588Sdfr case FEEDBACK_RECONNECT: 391184588Sdfr if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) { 392184588Sdfr nfs_down(nmp, nf->nf_td, 393184588Sdfr "not responding", 0, NFSSTA_TIMEO); 394184588Sdfr nf->nf_tprintfmsg = TRUE; 395184588Sdfr nf->nf_lastmsg = now.tv_sec; 396184588Sdfr } 397184588Sdfr break; 398184588Sdfr 399184588Sdfr case FEEDBACK_OK: 400184588Sdfr nfs_up(nf->nf_mount, nf->nf_td, 401184588Sdfr "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); 402184588Sdfr break; 403184588Sdfr } 404184588Sdfr} 405184588Sdfr 406184588Sdfr/* 407192686Sdfr * nfs_realign: 408192686Sdfr * 409192686Sdfr * Check for badly aligned mbuf data and realign by copying the unaligned 410192686Sdfr * portion of the data into a new mbuf chain and freeing the portions 411192686Sdfr * of the old chain that were replaced. 412192686Sdfr * 413192686Sdfr * We cannot simply realign the data within the existing mbuf chain 414192686Sdfr * because the underlying buffers may contain other rpc commands and 415192686Sdfr * we cannot afford to overwrite them. 416192686Sdfr * 417192686Sdfr * We would prefer to avoid this situation entirely. The situation does 418192686Sdfr * not occur with NFS/UDP and is supposed to only occassionally occur 419192686Sdfr * with TCP. Use vfs.nfs.realign_count and realign_test to check this. 420192686Sdfr * 421192686Sdfr */ 422192686Sdfrstatic int 423192686Sdfrnfs_realign(struct mbuf **pm, int hsiz) 424192686Sdfr{ 425192686Sdfr struct mbuf *m, *n; 426192686Sdfr int off, space; 427192686Sdfr 428192686Sdfr ++nfs_realign_test; 429192686Sdfr while ((m = *pm) != NULL) { 430192686Sdfr if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { 431192686Sdfr /* 432192686Sdfr * NB: we can't depend on m_pkthdr.len to help us 433192686Sdfr * decide what to do here. May not be worth doing 434192686Sdfr * the m_length calculation as m_copyback will 435192686Sdfr * expand the mbuf chain below as needed. 436192686Sdfr */ 437192686Sdfr space = m_length(m, NULL); 438192686Sdfr if (space >= MINCLSIZE) { 439192686Sdfr /* NB: m_copyback handles space > MCLBYTES */ 440192686Sdfr n = m_getcl(M_DONTWAIT, MT_DATA, 0); 441192686Sdfr } else 442192686Sdfr n = m_get(M_DONTWAIT, MT_DATA); 443192686Sdfr if (n == NULL) 444192686Sdfr return (ENOMEM); 445192686Sdfr /* 446192686Sdfr * Align the remainder of the mbuf chain. 447192686Sdfr */ 448192686Sdfr n->m_len = 0; 449192686Sdfr off = 0; 450192686Sdfr while (m != NULL) { 451192686Sdfr m_copyback(n, off, m->m_len, mtod(m, caddr_t)); 452192686Sdfr off += m->m_len; 453192686Sdfr m = m->m_next; 454192686Sdfr } 455192686Sdfr m_freem(*pm); 456192686Sdfr *pm = n; 457192686Sdfr ++nfs_realign_count; 458192686Sdfr break; 459192686Sdfr } 460192686Sdfr pm = &m->m_next; 461192686Sdfr } 462192686Sdfr return (0); 463192686Sdfr} 464192686Sdfr 465192686Sdfr/* 466184588Sdfr * nfs_request - goes something like this 467184588Sdfr * - fill in request struct 468184588Sdfr * - links it into list 469184588Sdfr * - calls nfs_send() for first transmit 470184588Sdfr * - calls nfs_receive() to get reply 471184588Sdfr * - break down rpc header and return with nfs reply pointed to 472184588Sdfr * by mrep or error 473184588Sdfr * nb: always frees up mreq mbuf list 474184588Sdfr */ 475184588Sdfrint 476184588Sdfrnfs_request(struct vnode *vp, struct mbuf *mreq, int procnum, 477184588Sdfr struct thread *td, struct ucred *cred, struct mbuf **mrp, 478184588Sdfr struct mbuf **mdp, caddr_t *dposp) 479184588Sdfr{ 480184588Sdfr struct mbuf *mrep; 481184588Sdfr u_int32_t *tl; 482184588Sdfr struct nfsmount *nmp; 483184588Sdfr struct mbuf *md; 484184588Sdfr time_t waituntil; 485184588Sdfr caddr_t dpos; 486184588Sdfr int error = 0; 487184588Sdfr struct timeval now; 488184588Sdfr AUTH *auth = NULL; 489184588Sdfr enum nfs_rto_timer_t timer; 490184588Sdfr struct nfs_feedback_arg nf; 491184588Sdfr struct rpc_callextra ext; 492184588Sdfr enum clnt_stat stat; 493184588Sdfr struct timeval timo; 494184588Sdfr 495184588Sdfr /* Reject requests while attempting a forced unmount. */ 496184588Sdfr if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) { 497184588Sdfr m_freem(mreq); 498184588Sdfr return (ESTALE); 499184588Sdfr } 500184588Sdfr nmp = VFSTONFS(vp->v_mount); 501184588Sdfr bzero(&nf, sizeof(struct nfs_feedback_arg)); 502184588Sdfr nf.nf_mount = nmp; 503184588Sdfr nf.nf_td = td; 504184588Sdfr getmicrouptime(&now); 505184588Sdfr nf.nf_lastmsg = now.tv_sec - 506184588Sdfr ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); 507184588Sdfr 508184588Sdfr /* 509184588Sdfr * XXX if not already connected call nfs_connect now. Longer 510184588Sdfr * term, change nfs_mount to call nfs_connect unconditionally 511184588Sdfr * and let clnt_reconnect_create handle reconnects. 512184588Sdfr */ 513184588Sdfr if (!nmp->nm_client) 514195203Sdfr nfs_connect(nmp); 515184588Sdfr 516184588Sdfr auth = nfs_getauth(nmp, cred); 517184588Sdfr if (!auth) { 518184588Sdfr m_freem(mreq); 519184588Sdfr return (EACCES); 520184588Sdfr } 521184588Sdfr bzero(&ext, sizeof(ext)); 522184588Sdfr ext.rc_auth = auth; 523184588Sdfr 524184588Sdfr ext.rc_feedback = nfs_feedback; 525184588Sdfr ext.rc_feedback_arg = &nf; 526184588Sdfr 527184588Sdfr /* 528184588Sdfr * Use a conservative timeout for RPCs other than getattr, 529184588Sdfr * lookup, read or write. The justification for doing "other" 530184588Sdfr * this way is that these RPCs happen so infrequently that 531184588Sdfr * timer est. would probably be stale. Also, since many of 532184588Sdfr * these RPCs are non-idempotent, a conservative timeout is 533184588Sdfr * desired. 534184588Sdfr */ 535184588Sdfr timer = nfs_rto_timer(procnum); 536184588Sdfr if (timer != NFS_DEFAULT_TIMER) { 537184588Sdfr ext.rc_timers = &nmp->nm_timers[timer - 1]; 538184588Sdfr } else { 539184588Sdfr ext.rc_timers = NULL; 540184588Sdfr } 541184588Sdfr 542190293Srwatson#ifdef KDTRACE_HOOKS 543190293Srwatson if (dtrace_nfsclient_nfs23_start_probe != NULL) { 544190293Srwatson uint32_t probe_id; 545190293Srwatson int probe_procnum; 546190293Srwatson 547190293Srwatson if (nmp->nm_flag & NFSMNT_NFSV3) { 548190293Srwatson probe_id = nfsclient_nfs3_start_probes[procnum]; 549190293Srwatson probe_procnum = procnum; 550190293Srwatson } else { 551190293Srwatson probe_id = nfsclient_nfs2_start_probes[procnum]; 552191777Srwatson probe_procnum = nfsv2_procid[procnum]; 553190293Srwatson } 554190293Srwatson if (probe_id != 0) 555190293Srwatson (dtrace_nfsclient_nfs23_start_probe)(probe_id, vp, 556190293Srwatson mreq, cred, probe_procnum); 557190293Srwatson } 558190293Srwatson#endif 559190293Srwatson 560184588Sdfr nfsstats.rpcrequests++; 561184588Sdfrtryagain: 562184588Sdfr timo.tv_sec = nmp->nm_timeo / NFS_HZ; 563184588Sdfr timo.tv_usec = (nmp->nm_timeo * 1000000) / NFS_HZ; 564184588Sdfr mrep = NULL; 565184588Sdfr stat = CLNT_CALL_MBUF(nmp->nm_client, &ext, 566184588Sdfr (nmp->nm_flag & NFSMNT_NFSV3) ? procnum : nfsv2_procid[procnum], 567184588Sdfr mreq, &mrep, timo); 568184588Sdfr 569184588Sdfr /* 570184588Sdfr * If there was a successful reply and a tprintf msg. 571184588Sdfr * tprintf a response. 572184588Sdfr */ 573184588Sdfr if (stat == RPC_SUCCESS) { 574184588Sdfr error = 0; 575184588Sdfr } else if (stat == RPC_TIMEDOUT) { 576184588Sdfr error = ETIMEDOUT; 577184588Sdfr } else if (stat == RPC_VERSMISMATCH) { 578184588Sdfr error = EOPNOTSUPP; 579184588Sdfr } else if (stat == RPC_PROGVERSMISMATCH) { 580184588Sdfr error = EPROTONOSUPPORT; 581184588Sdfr } else { 582184588Sdfr error = EACCES; 583184588Sdfr } 584190220Srwatson if (error) 585190220Srwatson goto nfsmout; 586184588Sdfr 587184588Sdfr KASSERT(mrep != NULL, ("mrep shouldn't be NULL if no error\n")); 588184588Sdfr 589192686Sdfr /* 590192686Sdfr * Search for any mbufs that are not a multiple of 4 bytes long 591192686Sdfr * or with m_data not longword aligned. 592192686Sdfr * These could cause pointer alignment problems, so copy them to 593192686Sdfr * well aligned mbufs. 594192686Sdfr */ 595192686Sdfr error = nfs_realign(&mrep, 2 * NFSX_UNSIGNED); 596192686Sdfr if (error == ENOMEM) { 597192686Sdfr m_freem(mrep); 598192686Sdfr AUTH_DESTROY(auth); 599192686Sdfr return (error); 600192686Sdfr } 601192686Sdfr 602192686Sdfr md = mrep; 603184588Sdfr dpos = mtod(mrep, caddr_t); 604184588Sdfr tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 605184588Sdfr if (*tl != 0) { 606184588Sdfr error = fxdr_unsigned(int, *tl); 607184588Sdfr if ((nmp->nm_flag & NFSMNT_NFSV3) && 608184588Sdfr error == NFSERR_TRYLATER) { 609184588Sdfr m_freem(mrep); 610184588Sdfr error = 0; 611184588Sdfr waituntil = time_second + nfs3_jukebox_delay; 612184588Sdfr while (time_second < waituntil) { 613184588Sdfr (void) tsleep(&fake_wchan, PSOCK, "nqnfstry", hz); 614184588Sdfr } 615184588Sdfr goto tryagain; 616184588Sdfr } 617184588Sdfr 618184588Sdfr /* 619184588Sdfr * If the File Handle was stale, invalidate the lookup 620184588Sdfr * cache, just in case. 621184588Sdfr */ 622184588Sdfr if (error == ESTALE) 623190785Sjhb nfs_purgecache(vp); 624184588Sdfr /* 625184588Sdfr * Skip wcc data on NFS errors for now. NetApp filers 626184588Sdfr * return corrupt postop attrs in the wcc data for NFS 627184588Sdfr * err EROFS. Not sure if they could return corrupt 628184588Sdfr * postop attrs for others errors. 629184588Sdfr */ 630184588Sdfr if ((nmp->nm_flag & NFSMNT_NFSV3) && !nfs_skip_wcc_data_onerr) { 631184588Sdfr *mrp = mrep; 632184588Sdfr *mdp = md; 633184588Sdfr *dposp = dpos; 634184588Sdfr error |= NFSERR_RETERR; 635184588Sdfr } else 636184588Sdfr m_freem(mrep); 637190220Srwatson goto nfsmout; 638184588Sdfr } 639184588Sdfr 640190293Srwatson#ifdef KDTRACE_HOOKS 641190293Srwatson if (dtrace_nfsclient_nfs23_done_probe != NULL) { 642190293Srwatson uint32_t probe_id; 643190293Srwatson int probe_procnum; 644190293Srwatson 645190293Srwatson if (nmp->nm_flag & NFSMNT_NFSV3) { 646190293Srwatson probe_id = nfsclient_nfs3_done_probes[procnum]; 647190293Srwatson probe_procnum = procnum; 648190293Srwatson } else { 649190293Srwatson probe_id = nfsclient_nfs2_done_probes[procnum]; 650190293Srwatson probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ? 651190293Srwatson procnum : nfsv2_procid[procnum]; 652190293Srwatson } 653190293Srwatson if (probe_id != 0) 654190293Srwatson (dtrace_nfsclient_nfs23_done_probe)(probe_id, vp, 655190293Srwatson mreq, cred, probe_procnum, 0); 656190293Srwatson } 657190293Srwatson#endif 658184588Sdfr m_freem(mreq); 659184588Sdfr *mrp = mrep; 660184588Sdfr *mdp = md; 661184588Sdfr *dposp = dpos; 662184588Sdfr AUTH_DESTROY(auth); 663184588Sdfr return (0); 664184588Sdfr 665184588Sdfrnfsmout: 666190293Srwatson#ifdef KDTRACE_HOOKS 667190293Srwatson if (dtrace_nfsclient_nfs23_done_probe != NULL) { 668190293Srwatson uint32_t probe_id; 669190293Srwatson int probe_procnum; 670190293Srwatson 671190293Srwatson if (nmp->nm_flag & NFSMNT_NFSV3) { 672190293Srwatson probe_id = nfsclient_nfs3_done_probes[procnum]; 673190293Srwatson probe_procnum = procnum; 674190293Srwatson } else { 675190293Srwatson probe_id = nfsclient_nfs2_done_probes[procnum]; 676190293Srwatson probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ? 677190293Srwatson procnum : nfsv2_procid[procnum]; 678190293Srwatson } 679190293Srwatson if (probe_id != 0) 680190293Srwatson (dtrace_nfsclient_nfs23_done_probe)(probe_id, vp, 681190293Srwatson mreq, cred, probe_procnum, error); 682190293Srwatson } 683190293Srwatson#endif 684184588Sdfr m_freem(mreq); 685184588Sdfr if (auth) 686184588Sdfr AUTH_DESTROY(auth); 687184588Sdfr return (error); 688184588Sdfr} 689184588Sdfr 690184588Sdfr/* 691184588Sdfr * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and 692184588Sdfr * wait for all requests to complete. This is used by forced unmounts 693184588Sdfr * to terminate any outstanding RPCs. 694184588Sdfr */ 695184588Sdfrint 696184588Sdfrnfs_nmcancelreqs(struct nfsmount *nmp) 697184588Sdfr{ 698184588Sdfr 699184588Sdfr if (nmp->nm_client) 700184588Sdfr CLNT_CLOSE(nmp->nm_client); 701184588Sdfr return (0); 702184588Sdfr} 703184588Sdfr 704184588Sdfr/* 705184588Sdfr * Any signal that can interrupt an NFS operation in an intr mount 706184588Sdfr * should be added to this set. SIGSTOP and SIGKILL cannot be masked. 707184588Sdfr */ 708184588Sdfrint nfs_sig_set[] = { 709184588Sdfr SIGINT, 710184588Sdfr SIGTERM, 711184588Sdfr SIGHUP, 712184588Sdfr SIGKILL, 713184588Sdfr SIGSTOP, 714184588Sdfr SIGQUIT 715184588Sdfr}; 716184588Sdfr 717184588Sdfr/* 718184588Sdfr * Check to see if one of the signals in our subset is pending on 719184588Sdfr * the process (in an intr mount). 720184588Sdfr */ 721184588Sdfrstatic int 722184588Sdfrnfs_sig_pending(sigset_t set) 723184588Sdfr{ 724184588Sdfr int i; 725184588Sdfr 726184588Sdfr for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) 727184588Sdfr if (SIGISMEMBER(set, nfs_sig_set[i])) 728184588Sdfr return (1); 729184588Sdfr return (0); 730184588Sdfr} 731184588Sdfr 732184588Sdfr/* 733184588Sdfr * The set/restore sigmask functions are used to (temporarily) overwrite 734184588Sdfr * the process p_sigmask during an RPC call (for example). These are also 735184588Sdfr * used in other places in the NFS client that might tsleep(). 736184588Sdfr */ 737184588Sdfrvoid 738184588Sdfrnfs_set_sigmask(struct thread *td, sigset_t *oldset) 739184588Sdfr{ 740184588Sdfr sigset_t newset; 741184588Sdfr int i; 742184588Sdfr struct proc *p; 743184588Sdfr 744184588Sdfr SIGFILLSET(newset); 745184588Sdfr if (td == NULL) 746184588Sdfr td = curthread; /* XXX */ 747184588Sdfr p = td->td_proc; 748184588Sdfr /* Remove the NFS set of signals from newset */ 749184588Sdfr PROC_LOCK(p); 750184588Sdfr mtx_lock(&p->p_sigacts->ps_mtx); 751184588Sdfr for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) { 752184588Sdfr /* 753184588Sdfr * But make sure we leave the ones already masked 754184588Sdfr * by the process, ie. remove the signal from the 755184588Sdfr * temporary signalmask only if it wasn't already 756184588Sdfr * in p_sigmask. 757184588Sdfr */ 758184588Sdfr if (!SIGISMEMBER(td->td_sigmask, nfs_sig_set[i]) && 759184588Sdfr !SIGISMEMBER(p->p_sigacts->ps_sigignore, nfs_sig_set[i])) 760184588Sdfr SIGDELSET(newset, nfs_sig_set[i]); 761184588Sdfr } 762184588Sdfr mtx_unlock(&p->p_sigacts->ps_mtx); 763184588Sdfr PROC_UNLOCK(p); 764184588Sdfr kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0); 765184588Sdfr} 766184588Sdfr 767184588Sdfrvoid 768184588Sdfrnfs_restore_sigmask(struct thread *td, sigset_t *set) 769184588Sdfr{ 770184588Sdfr if (td == NULL) 771184588Sdfr td = curthread; /* XXX */ 772184588Sdfr kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); 773184588Sdfr} 774184588Sdfr 775184588Sdfr/* 776184588Sdfr * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the 777184588Sdfr * old one after msleep() returns. 778184588Sdfr */ 779184588Sdfrint 780184588Sdfrnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo) 781184588Sdfr{ 782184588Sdfr sigset_t oldset; 783184588Sdfr int error; 784184588Sdfr struct proc *p; 785184588Sdfr 786184588Sdfr if ((priority & PCATCH) == 0) 787184588Sdfr return msleep(ident, mtx, priority, wmesg, timo); 788184588Sdfr if (td == NULL) 789184588Sdfr td = curthread; /* XXX */ 790184588Sdfr nfs_set_sigmask(td, &oldset); 791184588Sdfr error = msleep(ident, mtx, priority, wmesg, timo); 792184588Sdfr nfs_restore_sigmask(td, &oldset); 793184588Sdfr p = td->td_proc; 794184588Sdfr return (error); 795184588Sdfr} 796184588Sdfr 797184588Sdfr/* 798184588Sdfr * Test for a termination condition pending on the process. 799184588Sdfr * This is used for NFSMNT_INT mounts. 800184588Sdfr */ 801184588Sdfrint 802195203Sdfrnfs_sigintr(struct nfsmount *nmp, struct thread *td) 803184588Sdfr{ 804184588Sdfr struct proc *p; 805184588Sdfr sigset_t tmpset; 806184588Sdfr 807184588Sdfr /* Terminate all requests while attempting a forced unmount. */ 808184588Sdfr if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) 809184588Sdfr return (EIO); 810184588Sdfr if (!(nmp->nm_flag & NFSMNT_INT)) 811184588Sdfr return (0); 812184588Sdfr if (td == NULL) 813184588Sdfr return (0); 814184588Sdfr p = td->td_proc; 815184588Sdfr PROC_LOCK(p); 816184588Sdfr tmpset = p->p_siglist; 817184588Sdfr SIGSETOR(tmpset, td->td_siglist); 818184588Sdfr SIGSETNAND(tmpset, td->td_sigmask); 819184588Sdfr mtx_lock(&p->p_sigacts->ps_mtx); 820184588Sdfr SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); 821184588Sdfr mtx_unlock(&p->p_sigacts->ps_mtx); 822184588Sdfr if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) 823184588Sdfr && nfs_sig_pending(tmpset)) { 824184588Sdfr PROC_UNLOCK(p); 825184588Sdfr return (EINTR); 826184588Sdfr } 827184588Sdfr PROC_UNLOCK(p); 828184588Sdfr return (0); 829184588Sdfr} 830184588Sdfr 831184588Sdfrstatic int 832184588Sdfrnfs_msg(struct thread *td, const char *server, const char *msg, int error) 833184588Sdfr{ 834184588Sdfr struct proc *p; 835184588Sdfr 836184588Sdfr p = td ? td->td_proc : NULL; 837184588Sdfr if (error) { 838184588Sdfr tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, 839184588Sdfr msg, error); 840184588Sdfr } else { 841184588Sdfr tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); 842184588Sdfr } 843184588Sdfr return (0); 844184588Sdfr} 845184588Sdfr 846184588Sdfrstatic void 847184588Sdfrnfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, 848184588Sdfr int error, int flags) 849184588Sdfr{ 850184588Sdfr if (nmp == NULL) 851184588Sdfr return; 852184588Sdfr mtx_lock(&nmp->nm_mtx); 853184588Sdfr if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { 854184588Sdfr nmp->nm_state |= NFSSTA_TIMEO; 855184588Sdfr mtx_unlock(&nmp->nm_mtx); 856184588Sdfr vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 857184588Sdfr VQ_NOTRESP, 0); 858184588Sdfr } else 859184588Sdfr mtx_unlock(&nmp->nm_mtx); 860184588Sdfr mtx_lock(&nmp->nm_mtx); 861184588Sdfr if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { 862184588Sdfr nmp->nm_state |= NFSSTA_LOCKTIMEO; 863184588Sdfr mtx_unlock(&nmp->nm_mtx); 864184588Sdfr vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 865184588Sdfr VQ_NOTRESPLOCK, 0); 866184588Sdfr } else 867184588Sdfr mtx_unlock(&nmp->nm_mtx); 868184588Sdfr nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); 869184588Sdfr} 870184588Sdfr 871184588Sdfrstatic void 872184588Sdfrnfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, 873184588Sdfr int flags, int tprintfmsg) 874184588Sdfr{ 875184588Sdfr if (nmp == NULL) 876184588Sdfr return; 877184588Sdfr if (tprintfmsg) { 878184588Sdfr nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); 879184588Sdfr } 880184588Sdfr 881184588Sdfr mtx_lock(&nmp->nm_mtx); 882184588Sdfr if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { 883184588Sdfr nmp->nm_state &= ~NFSSTA_TIMEO; 884184588Sdfr mtx_unlock(&nmp->nm_mtx); 885184588Sdfr vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 886184588Sdfr VQ_NOTRESP, 1); 887184588Sdfr } else 888184588Sdfr mtx_unlock(&nmp->nm_mtx); 889184588Sdfr 890184588Sdfr mtx_lock(&nmp->nm_mtx); 891184588Sdfr if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { 892184588Sdfr nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 893184588Sdfr mtx_unlock(&nmp->nm_mtx); 894184588Sdfr vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 895184588Sdfr VQ_NOTRESPLOCK, 1); 896184588Sdfr } else 897184588Sdfr mtx_unlock(&nmp->nm_mtx); 898184588Sdfr} 899