1191783Srmacklem/*- 2191783Srmacklem * Copyright (c) 1989, 1991, 1993, 1995 3191783Srmacklem * The Regents of the University of California. All rights reserved. 4191783Srmacklem * 5191783Srmacklem * This code is derived from software contributed to Berkeley by 6191783Srmacklem * Rick Macklem at The University of Guelph. 7191783Srmacklem * 8191783Srmacklem * Redistribution and use in source and binary forms, with or without 9191783Srmacklem * modification, are permitted provided that the following conditions 10191783Srmacklem * are met: 11191783Srmacklem * 1. Redistributions of source code must retain the above copyright 12191783Srmacklem * notice, this list of conditions and the following disclaimer. 13191783Srmacklem * 2. Redistributions in binary form must reproduce the above copyright 14191783Srmacklem * notice, this list of conditions and the following disclaimer in the 15191783Srmacklem * documentation and/or other materials provided with the distribution. 16191783Srmacklem * 4. Neither the name of the University nor the names of its contributors 17191783Srmacklem * may be used to endorse or promote products derived from this software 18191783Srmacklem * without specific prior written permission. 19191783Srmacklem * 20191783Srmacklem * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21191783Srmacklem * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22191783Srmacklem * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23191783Srmacklem * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24191783Srmacklem * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25191783Srmacklem * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26191783Srmacklem * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27191783Srmacklem * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28191783Srmacklem * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29191783Srmacklem * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30191783Srmacklem * SUCH DAMAGE. 31191783Srmacklem * 32191783Srmacklem */ 33191783Srmacklem 34191783Srmacklem#include <sys/cdefs.h> 35191783Srmacklem__FBSDID("$FreeBSD$"); 36191783Srmacklem 37191783Srmacklem/* 38191783Srmacklem * Socket operations for use by nfs 39191783Srmacklem */ 40191783Srmacklem 41223280Srmacklem#include "opt_kdtrace.h" 42191783Srmacklem#include "opt_kgssapi.h" 43191783Srmacklem#include "opt_nfs.h" 44191783Srmacklem 45191783Srmacklem#include <sys/param.h> 46191783Srmacklem#include <sys/systm.h> 47191783Srmacklem#include <sys/kernel.h> 48191783Srmacklem#include <sys/limits.h> 49191783Srmacklem#include <sys/lock.h> 50191783Srmacklem#include <sys/malloc.h> 51191783Srmacklem#include <sys/mbuf.h> 52191783Srmacklem#include <sys/mount.h> 53191783Srmacklem#include <sys/mutex.h> 54191783Srmacklem#include <sys/proc.h> 55191783Srmacklem#include <sys/signalvar.h> 56191783Srmacklem#include <sys/syscallsubr.h> 57191783Srmacklem#include <sys/sysctl.h> 58191783Srmacklem#include <sys/syslog.h> 59191783Srmacklem#include <sys/vnode.h> 60191783Srmacklem 61191783Srmacklem#include <rpc/rpc.h> 62191783Srmacklem 63191783Srmacklem#include <kgssapi/krb5/kcrypto.h> 64191783Srmacklem 65191783Srmacklem#include <fs/nfs/nfsport.h> 66191783Srmacklem 67223280Srmacklem#ifdef KDTRACE_HOOKS 68223280Srmacklem#include <sys/dtrace_bsd.h> 69223280Srmacklem 70223280Srmacklemdtrace_nfsclient_nfs23_start_probe_func_t 71223280Srmacklem dtrace_nfscl_nfs234_start_probe; 72223280Srmacklem 73223280Srmacklemdtrace_nfsclient_nfs23_done_probe_func_t 74223280Srmacklem dtrace_nfscl_nfs234_done_probe; 75223280Srmacklem 76223280Srmacklem/* 77223280Srmacklem * Registered probes by RPC type. 78223280Srmacklem */ 79244042Srmacklemuint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1]; 80244042Srmacklemuint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1]; 81223280Srmacklem 82244042Srmacklemuint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1]; 83244042Srmacklemuint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1]; 84223280Srmacklem 85244042Srmacklemuint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1]; 86244042Srmacklemuint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; 87223280Srmacklem#endif 88223280Srmacklem 89191783SrmacklemNFSSTATESPINLOCK; 90191783SrmacklemNFSREQSPINLOCK; 91244042SrmacklemNFSDLOCKMUTEX; 92191783Srmacklemextern struct nfsstats newnfsstats; 93191783Srmacklemextern struct nfsreqhead nfsd_reqq; 94191783Srmacklemextern int nfscl_ticks; 95191783Srmacklemextern void (*ncl_call_invalcaches)(struct vnode *); 96244042Srmacklemextern int nfs_numnfscbd; 97244042Srmacklemextern int nfscl_debuglevel; 98191783Srmacklem 99244042SrmacklemSVCPOOL *nfscbd_pool; 100191783Srmacklemstatic int nfsrv_gsscallbackson = 0; 101191783Srmacklemstatic int nfs_bufpackets = 4; 102191783Srmacklemstatic int nfs_reconnects; 103191783Srmacklemstatic int nfs3_jukebox_delay = 10; 104191783Srmacklemstatic int nfs_skip_wcc_data_onerr = 1; 105191783Srmacklem 106221973SrmacklemSYSCTL_DECL(_vfs_nfs); 107191783Srmacklem 108221973SrmacklemSYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, 109191783Srmacklem "Buffer reservation size 2 < x < 64"); 110221973SrmacklemSYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, 111191783Srmacklem "Number of times the nfs client has had to reconnect"); 112221973SrmacklemSYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0, 113191783Srmacklem "Number of seconds to delay a retry after receiving EJUKEBOX"); 114221973SrmacklemSYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0, 115191783Srmacklem "Disable weak cache consistency checking when server returns an error"); 116191783Srmacklem 117191783Srmacklemstatic void nfs_down(struct nfsmount *, struct thread *, const char *, 118191783Srmacklem int, int); 119191783Srmacklemstatic void nfs_up(struct nfsmount *, struct thread *, const char *, 120191783Srmacklem int, int); 121191783Srmacklemstatic int nfs_msg(struct thread *, const char *, const char *, int); 122191783Srmacklem 123191783Srmacklemstruct nfs_cached_auth { 124191783Srmacklem int ca_refs; /* refcount, including 1 from the cache */ 125191783Srmacklem uid_t ca_uid; /* uid that corresponds to this auth */ 126191783Srmacklem AUTH *ca_auth; /* RPC auth handle */ 127191783Srmacklem}; 128191783Srmacklem 129207764Srmacklemstatic int nfsv2_procid[NFS_V3NPROCS] = { 130207764Srmacklem NFSV2PROC_NULL, 131207764Srmacklem NFSV2PROC_GETATTR, 132207764Srmacklem NFSV2PROC_SETATTR, 133207764Srmacklem NFSV2PROC_LOOKUP, 134207764Srmacklem NFSV2PROC_NOOP, 135207764Srmacklem NFSV2PROC_READLINK, 136207764Srmacklem NFSV2PROC_READ, 137207764Srmacklem NFSV2PROC_WRITE, 138207764Srmacklem NFSV2PROC_CREATE, 139207764Srmacklem NFSV2PROC_MKDIR, 140207764Srmacklem NFSV2PROC_SYMLINK, 141207764Srmacklem NFSV2PROC_CREATE, 142207764Srmacklem NFSV2PROC_REMOVE, 143207764Srmacklem NFSV2PROC_RMDIR, 144207764Srmacklem NFSV2PROC_RENAME, 145207764Srmacklem NFSV2PROC_LINK, 146207764Srmacklem NFSV2PROC_READDIR, 147207764Srmacklem NFSV2PROC_NOOP, 148207764Srmacklem NFSV2PROC_STATFS, 149207764Srmacklem NFSV2PROC_NOOP, 150207764Srmacklem NFSV2PROC_NOOP, 151207764Srmacklem NFSV2PROC_NOOP, 152207764Srmacklem}; 153207764Srmacklem 154191783Srmacklem/* 155191783Srmacklem * Initialize sockets and congestion for a new NFS connection. 156191783Srmacklem * We do not free the sockaddr if error. 157191783Srmacklem */ 158191783Srmacklemint 159191783Srmacklemnewnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, 160191783Srmacklem struct ucred *cred, NFSPROC_T *p, int callback_retry_mult) 161191783Srmacklem{ 162191783Srmacklem int rcvreserve, sndreserve; 163191783Srmacklem int pktscale; 164191783Srmacklem struct sockaddr *saddr; 165191783Srmacklem struct ucred *origcred; 166191783Srmacklem CLIENT *client; 167191783Srmacklem struct netconfig *nconf; 168191783Srmacklem struct socket *so; 169224086Szack int one = 1, retries, error = 0; 170191783Srmacklem struct thread *td = curthread; 171244042Srmacklem SVCXPRT *xprt; 172228757Srmacklem struct timeval timo; 173191783Srmacklem 174191783Srmacklem /* 175191783Srmacklem * We need to establish the socket using the credentials of 176191783Srmacklem * the mountpoint. Some parts of this process (such as 177191783Srmacklem * sobind() and soconnect()) will use the curent thread's 178191783Srmacklem * credential instead of the socket credential. To work 179191783Srmacklem * around this, temporarily change the current thread's 180191783Srmacklem * credential to that of the mountpoint. 181191783Srmacklem * 182191783Srmacklem * XXX: It would be better to explicitly pass the correct 183191783Srmacklem * credential to sobind() and soconnect(). 184191783Srmacklem */ 185191783Srmacklem origcred = td->td_ucred; 186191783Srmacklem 187191783Srmacklem /* 188191783Srmacklem * Use the credential in nr_cred, if not NULL. 189191783Srmacklem */ 190191783Srmacklem if (nrp->nr_cred != NULL) 191191783Srmacklem td->td_ucred = nrp->nr_cred; 192191783Srmacklem else 193191783Srmacklem td->td_ucred = cred; 194191783Srmacklem saddr = nrp->nr_nam; 195191783Srmacklem 196191783Srmacklem if (saddr->sa_family == AF_INET) 197191783Srmacklem if (nrp->nr_sotype == SOCK_DGRAM) 198191783Srmacklem nconf = getnetconfigent("udp"); 199191783Srmacklem else 200191783Srmacklem nconf = getnetconfigent("tcp"); 201191783Srmacklem else 202191783Srmacklem if (nrp->nr_sotype == SOCK_DGRAM) 203191783Srmacklem nconf = getnetconfigent("udp6"); 204191783Srmacklem else 205191783Srmacklem nconf = getnetconfigent("tcp6"); 206191783Srmacklem 207191783Srmacklem pktscale = nfs_bufpackets; 208191783Srmacklem if (pktscale < 2) 209191783Srmacklem pktscale = 2; 210191783Srmacklem if (pktscale > 64) 211191783Srmacklem pktscale = 64; 212191783Srmacklem /* 213191783Srmacklem * soreserve() can fail if sb_max is too small, so shrink pktscale 214191783Srmacklem * and try again if there is an error. 215191783Srmacklem * Print a log message suggesting increasing sb_max. 216191783Srmacklem * Creating a socket and doing this is necessary since, if the 217191783Srmacklem * reservation sizes are too large and will make soreserve() fail, 218191783Srmacklem * the connection will work until a large send is attempted and 219191783Srmacklem * then it will loop in the krpc code. 220191783Srmacklem */ 221191783Srmacklem so = NULL; 222191783Srmacklem saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *); 223191783Srmacklem error = socreate(saddr->sa_family, &so, nrp->nr_sotype, 224191783Srmacklem nrp->nr_soproto, td->td_ucred, td); 225191783Srmacklem if (error) { 226191783Srmacklem td->td_ucred = origcred; 227224086Szack goto out; 228191783Srmacklem } 229191783Srmacklem do { 230220752Srmacklem if (error != 0 && pktscale > 2) 231191783Srmacklem pktscale--; 232191783Srmacklem if (nrp->nr_sotype == SOCK_DGRAM) { 233191783Srmacklem if (nmp != NULL) { 234191783Srmacklem sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * 235191783Srmacklem pktscale; 236191783Srmacklem rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * 237191783Srmacklem pktscale; 238191783Srmacklem } else { 239191783Srmacklem sndreserve = rcvreserve = 1024 * pktscale; 240191783Srmacklem } 241191783Srmacklem } else { 242191783Srmacklem if (nrp->nr_sotype != SOCK_STREAM) 243191783Srmacklem panic("nfscon sotype"); 244191783Srmacklem if (nmp != NULL) { 245191783Srmacklem sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + 246191783Srmacklem sizeof (u_int32_t)) * pktscale; 247191783Srmacklem rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + 248191783Srmacklem sizeof (u_int32_t)) * pktscale; 249191783Srmacklem } else { 250191783Srmacklem sndreserve = rcvreserve = 1024 * pktscale; 251191783Srmacklem } 252191783Srmacklem } 253191783Srmacklem error = soreserve(so, sndreserve, rcvreserve); 254191783Srmacklem } while (error != 0 && pktscale > 2); 255191783Srmacklem soclose(so); 256191783Srmacklem if (error) { 257191783Srmacklem td->td_ucred = origcred; 258224086Szack goto out; 259191783Srmacklem } 260191783Srmacklem 261191783Srmacklem client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog, 262191783Srmacklem nrp->nr_vers, sndreserve, rcvreserve); 263191783Srmacklem CLNT_CONTROL(client, CLSET_WAITCHAN, "newnfsreq"); 264191783Srmacklem if (nmp != NULL) { 265191783Srmacklem if ((nmp->nm_flag & NFSMNT_INT)) 266191783Srmacklem CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); 267191783Srmacklem if ((nmp->nm_flag & NFSMNT_RESVPORT)) 268191783Srmacklem CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 269228757Srmacklem if (NFSHASSOFT(nmp)) { 270228757Srmacklem if (nmp->nm_sotype == SOCK_DGRAM) 271228757Srmacklem /* 272228757Srmacklem * For UDP, the large timeout for a reconnect 273228757Srmacklem * will be set to "nm_retry * nm_timeo / 2", so 274228757Srmacklem * we only want to do 2 reconnect timeout 275228757Srmacklem * retries. 276228757Srmacklem */ 277228757Srmacklem retries = 2; 278228757Srmacklem else 279228757Srmacklem retries = nmp->nm_retry; 280228757Srmacklem } else 281191783Srmacklem retries = INT_MAX; 282244042Srmacklem if (NFSHASNFSV4N(nmp)) { 283244042Srmacklem /* 284244042Srmacklem * Make sure the nfscbd_pool doesn't get destroyed 285244042Srmacklem * while doing this. 286244042Srmacklem */ 287244042Srmacklem NFSD_LOCK(); 288244042Srmacklem if (nfs_numnfscbd > 0) { 289244042Srmacklem nfs_numnfscbd++; 290244042Srmacklem NFSD_UNLOCK(); 291244042Srmacklem xprt = svc_vc_create_backchannel(nfscbd_pool); 292244042Srmacklem CLNT_CONTROL(client, CLSET_BACKCHANNEL, xprt); 293244042Srmacklem NFSD_LOCK(); 294244042Srmacklem nfs_numnfscbd--; 295244042Srmacklem if (nfs_numnfscbd == 0) 296244042Srmacklem wakeup(&nfs_numnfscbd); 297244042Srmacklem } 298244042Srmacklem NFSD_UNLOCK(); 299244042Srmacklem } 300191783Srmacklem } else { 301191783Srmacklem /* 302191783Srmacklem * Three cases: 303191783Srmacklem * - Null RPC callback to client 304191783Srmacklem * - Non-Null RPC callback to client, wait a little longer 305191783Srmacklem * - upcalls to nfsuserd and gssd (clp == NULL) 306191783Srmacklem */ 307191783Srmacklem if (callback_retry_mult == 0) { 308191783Srmacklem retries = NFSV4_UPCALLRETRY; 309191783Srmacklem CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 310191783Srmacklem } else { 311191783Srmacklem retries = NFSV4_CALLBACKRETRY * callback_retry_mult; 312191783Srmacklem } 313191783Srmacklem } 314191783Srmacklem CLNT_CONTROL(client, CLSET_RETRIES, &retries); 315191783Srmacklem 316228757Srmacklem if (nmp != NULL) { 317228757Srmacklem /* 318228757Srmacklem * For UDP, there are 2 timeouts: 319228757Srmacklem * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer 320228757Srmacklem * that does a retransmit of an RPC request using the same 321228757Srmacklem * socket and xid. This is what you normally want to do, 322228757Srmacklem * since NFS servers depend on "same xid" for their 323228757Srmacklem * Duplicate Request Cache. 324228757Srmacklem * - timeout specified in CLNT_CALL_MBUF(), which specifies when 325228757Srmacklem * retransmits on the same socket should fail and a fresh 326228757Srmacklem * socket created. Each of these timeouts counts as one 327228757Srmacklem * CLSET_RETRIES as set above. 328228757Srmacklem * Set the initial retransmit timeout for UDP. This timeout 329228757Srmacklem * doesn't exist for TCP and the following call just fails, 330228757Srmacklem * which is ok. 331228757Srmacklem */ 332228757Srmacklem timo.tv_sec = nmp->nm_timeo / NFS_HZ; 333228757Srmacklem timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; 334228757Srmacklem CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); 335228757Srmacklem } 336228757Srmacklem 337191783Srmacklem mtx_lock(&nrp->nr_mtx); 338191783Srmacklem if (nrp->nr_client != NULL) { 339191783Srmacklem /* 340191783Srmacklem * Someone else already connected. 341191783Srmacklem */ 342191783Srmacklem CLNT_RELEASE(client); 343191783Srmacklem } else { 344191783Srmacklem nrp->nr_client = client; 345191783Srmacklem } 346191783Srmacklem 347191783Srmacklem /* 348191783Srmacklem * Protocols that do not require connections may be optionally left 349191783Srmacklem * unconnected for servers that reply from a port other than NFS_PORT. 350191783Srmacklem */ 351191783Srmacklem if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) { 352191783Srmacklem mtx_unlock(&nrp->nr_mtx); 353191783Srmacklem CLNT_CONTROL(client, CLSET_CONNECT, &one); 354191783Srmacklem } else { 355191783Srmacklem mtx_unlock(&nrp->nr_mtx); 356191783Srmacklem } 357191783Srmacklem 358191783Srmacklem /* Restore current thread's credentials. */ 359191783Srmacklem td->td_ucred = origcred; 360224086Szack 361224086Szackout: 362224086Szack NFSEXITCODE(error); 363224086Szack return (error); 364191783Srmacklem} 365191783Srmacklem 366191783Srmacklem/* 367191783Srmacklem * NFS disconnect. Clean up and unlink. 368191783Srmacklem */ 369191783Srmacklemvoid 370191783Srmacklemnewnfs_disconnect(struct nfssockreq *nrp) 371191783Srmacklem{ 372191783Srmacklem CLIENT *client; 373191783Srmacklem 374191783Srmacklem mtx_lock(&nrp->nr_mtx); 375191783Srmacklem if (nrp->nr_client != NULL) { 376191783Srmacklem client = nrp->nr_client; 377191783Srmacklem nrp->nr_client = NULL; 378191783Srmacklem mtx_unlock(&nrp->nr_mtx); 379223309Srmacklem rpc_gss_secpurge_call(client); 380191783Srmacklem CLNT_CLOSE(client); 381191783Srmacklem CLNT_RELEASE(client); 382191783Srmacklem } else { 383191783Srmacklem mtx_unlock(&nrp->nr_mtx); 384191783Srmacklem } 385191783Srmacklem} 386191783Srmacklem 387191783Srmacklemstatic AUTH * 388191783Srmacklemnfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, 389191783Srmacklem char *srv_principal, gss_OID mech_oid, struct ucred *cred) 390191783Srmacklem{ 391191783Srmacklem rpc_gss_service_t svc; 392191783Srmacklem AUTH *auth; 393191783Srmacklem 394191783Srmacklem switch (secflavour) { 395191783Srmacklem case RPCSEC_GSS_KRB5: 396191783Srmacklem case RPCSEC_GSS_KRB5I: 397191783Srmacklem case RPCSEC_GSS_KRB5P: 398191783Srmacklem if (!mech_oid) { 399223309Srmacklem if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid)) 400191783Srmacklem return (NULL); 401191783Srmacklem } 402191783Srmacklem if (secflavour == RPCSEC_GSS_KRB5) 403191783Srmacklem svc = rpc_gss_svc_none; 404191783Srmacklem else if (secflavour == RPCSEC_GSS_KRB5I) 405191783Srmacklem svc = rpc_gss_svc_integrity; 406191783Srmacklem else 407191783Srmacklem svc = rpc_gss_svc_privacy; 408191783Srmacklem 409192616Srmacklem if (clnt_principal == NULL) 410223309Srmacklem auth = rpc_gss_secfind_call(nrp->nr_client, cred, 411192616Srmacklem srv_principal, mech_oid, svc); 412253049Srmacklem else { 413253049Srmacklem auth = rpc_gss_seccreate_call(nrp->nr_client, cred, 414253049Srmacklem clnt_principal, srv_principal, "kerberosv5", 415253049Srmacklem svc, NULL, NULL, NULL); 416253049Srmacklem return (auth); 417253049Srmacklem } 418192675Srmacklem if (auth != NULL) 419192675Srmacklem return (auth); 420192675Srmacklem /* fallthrough */ 421191783Srmacklem case AUTH_SYS: 422191783Srmacklem default: 423191783Srmacklem return (authunix_create(cred)); 424191783Srmacklem 425191783Srmacklem } 426191783Srmacklem} 427191783Srmacklem 428191783Srmacklem/* 429191783Srmacklem * Callback from the RPC code to generate up/down notifications. 430191783Srmacklem */ 431191783Srmacklem 432191783Srmacklemstruct nfs_feedback_arg { 433191783Srmacklem struct nfsmount *nf_mount; 434191783Srmacklem int nf_lastmsg; /* last tprintf */ 435191783Srmacklem int nf_tprintfmsg; 436191783Srmacklem struct thread *nf_td; 437191783Srmacklem}; 438191783Srmacklem 439191783Srmacklemstatic void 440191783Srmacklemnfs_feedback(int type, int proc, void *arg) 441191783Srmacklem{ 442191783Srmacklem struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; 443191783Srmacklem struct nfsmount *nmp = nf->nf_mount; 444245909Sjhb time_t now; 445191783Srmacklem 446191783Srmacklem switch (type) { 447191783Srmacklem case FEEDBACK_REXMIT2: 448191783Srmacklem case FEEDBACK_RECONNECT: 449245909Sjhb now = NFSD_MONOSEC; 450245909Sjhb if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now) { 451191783Srmacklem nfs_down(nmp, nf->nf_td, 452191783Srmacklem "not responding", 0, NFSSTA_TIMEO); 453191783Srmacklem nf->nf_tprintfmsg = TRUE; 454245909Sjhb nf->nf_lastmsg = now; 455191783Srmacklem } 456191783Srmacklem break; 457191783Srmacklem 458191783Srmacklem case FEEDBACK_OK: 459191783Srmacklem nfs_up(nf->nf_mount, nf->nf_td, 460191783Srmacklem "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); 461191783Srmacklem break; 462191783Srmacklem } 463191783Srmacklem} 464191783Srmacklem 465191783Srmacklem/* 466191783Srmacklem * newnfs_request - goes something like this 467191783Srmacklem * - does the rpc by calling the krpc layer 468191783Srmacklem * - break down rpc header and return with nfs reply 469191783Srmacklem * nb: always frees up nd_mreq mbuf list 470191783Srmacklem */ 471191783Srmacklemint 472191783Srmacklemnewnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, 473191783Srmacklem struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp, 474191783Srmacklem struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers, 475244042Srmacklem u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *sep) 476191783Srmacklem{ 477244042Srmacklem u_int32_t retseq, retval, *tl; 478191783Srmacklem time_t waituntil; 479244042Srmacklem int i = 0, j = 0, opcnt, set_sigset = 0, slot; 480191783Srmacklem int trycnt, error = 0, usegssname = 0, secflavour = AUTH_SYS; 481244042Srmacklem int freeslot, timeo; 482191783Srmacklem u_int16_t procnum; 483191783Srmacklem u_int trylater_delay = 1; 484191783Srmacklem struct nfs_feedback_arg nf; 485245909Sjhb struct timeval timo; 486191783Srmacklem AUTH *auth; 487191783Srmacklem struct rpc_callextra ext; 488191783Srmacklem enum clnt_stat stat; 489191783Srmacklem struct nfsreq *rep = NULL; 490253049Srmacklem char *srv_principal = NULL, *clnt_principal = NULL; 491195642Srmacklem sigset_t oldset; 492230345Srmacklem struct ucred *authcred; 493191783Srmacklem 494191783Srmacklem if (xidp != NULL) 495191783Srmacklem *xidp = 0; 496191783Srmacklem /* Reject requests while attempting a forced unmount. */ 497191783Srmacklem if (nmp != NULL && (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)) { 498191783Srmacklem m_freem(nd->nd_mreq); 499191783Srmacklem return (ESTALE); 500191783Srmacklem } 501191783Srmacklem 502230345Srmacklem /* 503230345Srmacklem * Set authcred, which is used to acquire RPC credentials to 504230345Srmacklem * the cred argument, by default. The crhold() should not be 505230345Srmacklem * necessary, but will ensure that some future code change 506230345Srmacklem * doesn't result in the credential being free'd prematurely. 507230345Srmacklem */ 508230345Srmacklem authcred = crhold(cred); 509230345Srmacklem 510195642Srmacklem /* For client side interruptible mounts, mask off the signals. */ 511195642Srmacklem if (nmp != NULL && td != NULL && NFSHASINT(nmp)) { 512195642Srmacklem newnfs_set_sigmask(td, &oldset); 513195642Srmacklem set_sigset = 1; 514195642Srmacklem } 515195642Srmacklem 516191783Srmacklem /* 517192675Srmacklem * XXX if not already connected call nfs_connect now. Longer 518192675Srmacklem * term, change nfs_mount to call nfs_connect unconditionally 519192675Srmacklem * and let clnt_reconnect_create handle reconnects. 520192675Srmacklem */ 521192675Srmacklem if (nrp->nr_client == NULL) 522192675Srmacklem newnfs_connect(nmp, nrp, cred, td, 0); 523192675Srmacklem 524192675Srmacklem /* 525191783Srmacklem * For a client side mount, nmp is != NULL and clp == NULL. For 526191783Srmacklem * server calls (callbacks or upcalls), nmp == NULL. 527191783Srmacklem */ 528191783Srmacklem if (clp != NULL) { 529191783Srmacklem NFSLOCKSTATE(); 530191783Srmacklem if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) { 531191783Srmacklem secflavour = RPCSEC_GSS_KRB5; 532191783Srmacklem if (nd->nd_procnum != NFSPROC_NULL) { 533191783Srmacklem if (clp->lc_flags & LCL_GSSINTEGRITY) 534191783Srmacklem secflavour = RPCSEC_GSS_KRB5I; 535191783Srmacklem else if (clp->lc_flags & LCL_GSSPRIVACY) 536191783Srmacklem secflavour = RPCSEC_GSS_KRB5P; 537191783Srmacklem } 538191783Srmacklem } 539191783Srmacklem NFSUNLOCKSTATE(); 540191783Srmacklem } else if (nmp != NULL && NFSHASKERB(nmp) && 541191783Srmacklem nd->nd_procnum != NFSPROC_NULL) { 542191783Srmacklem if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0) 543191783Srmacklem nd->nd_flag |= ND_USEGSSNAME; 544192675Srmacklem if ((nd->nd_flag & ND_USEGSSNAME) != 0) { 545192675Srmacklem /* 546192675Srmacklem * If there is a client side host based credential, 547192675Srmacklem * use that, otherwise use the system uid, if set. 548230345Srmacklem * The system uid is in the nmp->nm_sockreq.nr_cred 549230345Srmacklem * credentials. 550192675Srmacklem */ 551192675Srmacklem if (nmp->nm_krbnamelen > 0) { 552192675Srmacklem usegssname = 1; 553253049Srmacklem clnt_principal = nmp->nm_krbname; 554192675Srmacklem } else if (nmp->nm_uid != (uid_t)-1) { 555230345Srmacklem KASSERT(nmp->nm_sockreq.nr_cred != NULL, 556230345Srmacklem ("newnfs_request: NULL nr_cred")); 557230345Srmacklem crfree(authcred); 558230345Srmacklem authcred = crhold(nmp->nm_sockreq.nr_cred); 559192675Srmacklem } 560192675Srmacklem } else if (nmp->nm_krbnamelen == 0 && 561192675Srmacklem nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) { 562192675Srmacklem /* 563192675Srmacklem * If there is no host based principal name and 564192675Srmacklem * the system uid is set and this is root, use the 565192675Srmacklem * system uid, since root won't have user 566192675Srmacklem * credentials in a credentials cache file. 567230345Srmacklem * The system uid is in the nmp->nm_sockreq.nr_cred 568230345Srmacklem * credentials. 569192675Srmacklem */ 570230345Srmacklem KASSERT(nmp->nm_sockreq.nr_cred != NULL, 571230345Srmacklem ("newnfs_request: NULL nr_cred")); 572230345Srmacklem crfree(authcred); 573230345Srmacklem authcred = crhold(nmp->nm_sockreq.nr_cred); 574192675Srmacklem } 575191783Srmacklem if (NFSHASINTEGRITY(nmp)) 576191783Srmacklem secflavour = RPCSEC_GSS_KRB5I; 577191783Srmacklem else if (NFSHASPRIVACY(nmp)) 578191783Srmacklem secflavour = RPCSEC_GSS_KRB5P; 579191783Srmacklem else 580191783Srmacklem secflavour = RPCSEC_GSS_KRB5; 581191783Srmacklem srv_principal = NFSMNT_SRVKRBNAME(nmp); 582223436Srmacklem } else if (nmp != NULL && !NFSHASKERB(nmp) && 583223436Srmacklem nd->nd_procnum != NFSPROC_NULL && 584223436Srmacklem (nd->nd_flag & ND_USEGSSNAME) != 0) { 585223436Srmacklem /* 586223436Srmacklem * Use the uid that did the mount when the RPC is doing 587223436Srmacklem * NFSv4 system operations, as indicated by the 588223436Srmacklem * ND_USEGSSNAME flag, for the AUTH_SYS case. 589230345Srmacklem * The credentials in nm_sockreq.nr_cred were used for the 590230345Srmacklem * mount. 591223436Srmacklem */ 592230345Srmacklem KASSERT(nmp->nm_sockreq.nr_cred != NULL, 593230345Srmacklem ("newnfs_request: NULL nr_cred")); 594230345Srmacklem crfree(authcred); 595230345Srmacklem authcred = crhold(nmp->nm_sockreq.nr_cred); 596191783Srmacklem } 597191783Srmacklem 598191783Srmacklem if (nmp != NULL) { 599191783Srmacklem bzero(&nf, sizeof(struct nfs_feedback_arg)); 600191783Srmacklem nf.nf_mount = nmp; 601191783Srmacklem nf.nf_td = td; 602245909Sjhb nf.nf_lastmsg = NFSD_MONOSEC - 603191783Srmacklem ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay)); 604191783Srmacklem } 605191783Srmacklem 606192181Srmacklem if (nd->nd_procnum == NFSPROC_NULL) 607192181Srmacklem auth = authnone_create(); 608253049Srmacklem else if (usegssname) { 609253049Srmacklem /* 610253049Srmacklem * For this case, the authenticator is held in the 611253049Srmacklem * nfssockreq structure, so don't release the reference count 612253049Srmacklem * held on it. --> Don't AUTH_DESTROY() it in this function. 613253049Srmacklem */ 614253049Srmacklem if (nrp->nr_auth == NULL) 615253049Srmacklem nrp->nr_auth = nfs_getauth(nrp, secflavour, 616253049Srmacklem clnt_principal, srv_principal, NULL, authcred); 617253049Srmacklem else 618253049Srmacklem rpc_gss_refresh_auth_call(nrp->nr_auth); 619253049Srmacklem auth = nrp->nr_auth; 620253049Srmacklem } else 621191783Srmacklem auth = nfs_getauth(nrp, secflavour, NULL, 622230345Srmacklem srv_principal, NULL, authcred); 623230345Srmacklem crfree(authcred); 624191783Srmacklem if (auth == NULL) { 625191783Srmacklem m_freem(nd->nd_mreq); 626195642Srmacklem if (set_sigset) 627195642Srmacklem newnfs_restore_sigmask(td, &oldset); 628191783Srmacklem return (EACCES); 629191783Srmacklem } 630191783Srmacklem bzero(&ext, sizeof(ext)); 631191783Srmacklem ext.rc_auth = auth; 632191783Srmacklem if (nmp != NULL) { 633191783Srmacklem ext.rc_feedback = nfs_feedback; 634191783Srmacklem ext.rc_feedback_arg = &nf; 635191783Srmacklem } 636191783Srmacklem 637191783Srmacklem procnum = nd->nd_procnum; 638191783Srmacklem if ((nd->nd_flag & ND_NFSV4) && 639192181Srmacklem nd->nd_procnum != NFSPROC_NULL && 640191783Srmacklem nd->nd_procnum != NFSV4PROC_CBCOMPOUND) 641191783Srmacklem procnum = NFSV4PROC_COMPOUND; 642191783Srmacklem 643191783Srmacklem if (nmp != NULL) { 644191783Srmacklem NFSINCRGLOBAL(newnfsstats.rpcrequests); 645207764Srmacklem 646207764Srmacklem /* Map the procnum to the old NFSv2 one, as required. */ 647207764Srmacklem if ((nd->nd_flag & ND_NFSV2) != 0) { 648207764Srmacklem if (nd->nd_procnum < NFS_V3NPROCS) 649207764Srmacklem procnum = nfsv2_procid[nd->nd_procnum]; 650207764Srmacklem else 651207764Srmacklem procnum = NFSV2PROC_NOOP; 652207764Srmacklem } 653207764Srmacklem 654191783Srmacklem /* 655191783Srmacklem * Now only used for the R_DONTRECOVER case, but until that is 656191783Srmacklem * supported within the krpc code, I need to keep a queue of 657191783Srmacklem * outstanding RPCs for nfsv4 client requests. 658191783Srmacklem */ 659191783Srmacklem if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND) 660191783Srmacklem MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), 661191783Srmacklem M_NFSDREQ, M_WAITOK); 662223280Srmacklem#ifdef KDTRACE_HOOKS 663223280Srmacklem if (dtrace_nfscl_nfs234_start_probe != NULL) { 664223280Srmacklem uint32_t probe_id; 665223280Srmacklem int probe_procnum; 666223280Srmacklem 667223280Srmacklem if (nd->nd_flag & ND_NFSV4) { 668223280Srmacklem probe_id = 669223280Srmacklem nfscl_nfs4_start_probes[nd->nd_procnum]; 670223280Srmacklem probe_procnum = nd->nd_procnum; 671223280Srmacklem } else if (nd->nd_flag & ND_NFSV3) { 672223280Srmacklem probe_id = nfscl_nfs3_start_probes[procnum]; 673223280Srmacklem probe_procnum = procnum; 674223280Srmacklem } else { 675223280Srmacklem probe_id = 676223280Srmacklem nfscl_nfs2_start_probes[nd->nd_procnum]; 677223280Srmacklem probe_procnum = procnum; 678223280Srmacklem } 679223280Srmacklem if (probe_id != 0) 680223280Srmacklem (dtrace_nfscl_nfs234_start_probe) 681223280Srmacklem (probe_id, vp, nd->nd_mreq, cred, 682223280Srmacklem probe_procnum); 683223280Srmacklem } 684223280Srmacklem#endif 685191783Srmacklem } 686191783Srmacklem trycnt = 0; 687244042Srmacklem freeslot = -1; /* Set to slot that needs to be free'd */ 688191783Srmacklemtryagain: 689244042Srmacklem slot = -1; /* Slot that needs a sequence# increment. */ 690228757Srmacklem /* 691228757Srmacklem * This timeout specifies when a new socket should be created, 692228757Srmacklem * along with new xid values. For UDP, this should be done 693228757Srmacklem * infrequently, since retransmits of RPC requests should normally 694228757Srmacklem * use the same xid. 695228757Srmacklem */ 696191783Srmacklem if (nmp == NULL) { 697191783Srmacklem timo.tv_usec = 0; 698191783Srmacklem if (clp == NULL) 699191783Srmacklem timo.tv_sec = NFSV4_UPCALLTIMEO; 700191783Srmacklem else 701191783Srmacklem timo.tv_sec = NFSV4_CALLBACKTIMEO; 702191783Srmacklem } else { 703191783Srmacklem if (nrp->nr_sotype != SOCK_DGRAM) { 704191783Srmacklem timo.tv_usec = 0; 705191783Srmacklem if ((nmp->nm_flag & NFSMNT_NFSV4)) 706191783Srmacklem timo.tv_sec = INT_MAX; 707191783Srmacklem else 708191783Srmacklem timo.tv_sec = NFS_TCPTIMEO; 709191783Srmacklem } else { 710228757Srmacklem if (NFSHASSOFT(nmp)) { 711228757Srmacklem /* 712228757Srmacklem * CLSET_RETRIES is set to 2, so this should be 713228757Srmacklem * half of the total timeout required. 714228757Srmacklem */ 715228757Srmacklem timeo = nmp->nm_retry * nmp->nm_timeo / 2; 716228757Srmacklem if (timeo < 1) 717228757Srmacklem timeo = 1; 718228757Srmacklem timo.tv_sec = timeo / NFS_HZ; 719228757Srmacklem timo.tv_usec = (timeo % NFS_HZ) * 1000000 / 720228757Srmacklem NFS_HZ; 721228757Srmacklem } else { 722228757Srmacklem /* For UDP hard mounts, use a large value. */ 723228757Srmacklem timo.tv_sec = NFS_MAXTIMEO / NFS_HZ; 724228757Srmacklem timo.tv_usec = 0; 725228757Srmacklem } 726191783Srmacklem } 727191783Srmacklem 728191783Srmacklem if (rep != NULL) { 729191783Srmacklem rep->r_flags = 0; 730191783Srmacklem rep->r_nmp = nmp; 731191783Srmacklem /* 732191783Srmacklem * Chain request into list of outstanding requests. 733191783Srmacklem */ 734191783Srmacklem NFSLOCKREQ(); 735191783Srmacklem TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain); 736191783Srmacklem NFSUNLOCKREQ(); 737191783Srmacklem } 738191783Srmacklem } 739191783Srmacklem 740191783Srmacklem nd->nd_mrep = NULL; 741191783Srmacklem stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, nd->nd_mreq, 742191783Srmacklem &nd->nd_mrep, timo); 743191783Srmacklem 744191783Srmacklem if (rep != NULL) { 745191783Srmacklem /* 746191783Srmacklem * RPC done, unlink the request. 747191783Srmacklem */ 748191783Srmacklem NFSLOCKREQ(); 749191783Srmacklem TAILQ_REMOVE(&nfsd_reqq, rep, r_chain); 750191783Srmacklem NFSUNLOCKREQ(); 751191783Srmacklem } 752191783Srmacklem 753191783Srmacklem /* 754191783Srmacklem * If there was a successful reply and a tprintf msg. 755191783Srmacklem * tprintf a response. 756191783Srmacklem */ 757191783Srmacklem if (stat == RPC_SUCCESS) { 758191783Srmacklem error = 0; 759191783Srmacklem } else if (stat == RPC_TIMEDOUT) { 760245476Sjhb NFSINCRGLOBAL(newnfsstats.rpctimeouts); 761191783Srmacklem error = ETIMEDOUT; 762191783Srmacklem } else if (stat == RPC_VERSMISMATCH) { 763245476Sjhb NFSINCRGLOBAL(newnfsstats.rpcinvalid); 764191783Srmacklem error = EOPNOTSUPP; 765191783Srmacklem } else if (stat == RPC_PROGVERSMISMATCH) { 766245476Sjhb NFSINCRGLOBAL(newnfsstats.rpcinvalid); 767191783Srmacklem error = EPROTONOSUPPORT; 768245476Sjhb } else if (stat == RPC_INTR) { 769245476Sjhb error = EINTR; 770191783Srmacklem } else { 771245476Sjhb NFSINCRGLOBAL(newnfsstats.rpcinvalid); 772191783Srmacklem error = EACCES; 773191783Srmacklem } 774191783Srmacklem if (error) { 775191783Srmacklem m_freem(nd->nd_mreq); 776253049Srmacklem if (usegssname == 0) 777253049Srmacklem AUTH_DESTROY(auth); 778191783Srmacklem if (rep != NULL) 779191783Srmacklem FREE((caddr_t)rep, M_NFSDREQ); 780195642Srmacklem if (set_sigset) 781195642Srmacklem newnfs_restore_sigmask(td, &oldset); 782191783Srmacklem return (error); 783191783Srmacklem } 784191783Srmacklem 785191783Srmacklem KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n")); 786191783Srmacklem 787192695Srmacklem /* 788192695Srmacklem * Search for any mbufs that are not a multiple of 4 bytes long 789192695Srmacklem * or with m_data not longword aligned. 790192695Srmacklem * These could cause pointer alignment problems, so copy them to 791192695Srmacklem * well aligned mbufs. 792192695Srmacklem */ 793249592Sken newnfs_realign(&nd->nd_mrep, M_WAITOK); 794191783Srmacklem nd->nd_md = nd->nd_mrep; 795191783Srmacklem nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); 796191783Srmacklem nd->nd_repstat = 0; 797191783Srmacklem if (nd->nd_procnum != NFSPROC_NULL) { 798244042Srmacklem /* If sep == NULL, set it to the default in nmp. */ 799244042Srmacklem if (sep == NULL && nmp != NULL) 800244042Srmacklem sep = NFSMNT_MDSSESSION(nmp); 801191783Srmacklem /* 802191783Srmacklem * and now the actual NFS xdr. 803191783Srmacklem */ 804191783Srmacklem NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 805191783Srmacklem nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl); 806244042Srmacklem if (nd->nd_repstat >= 10000) 807244042Srmacklem NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum, 808244042Srmacklem (int)nd->nd_repstat); 809244042Srmacklem 810244042Srmacklem /* 811244042Srmacklem * Get rid of the tag, return count and SEQUENCE result for 812244042Srmacklem * NFSv4. 813244042Srmacklem */ 814244042Srmacklem if ((nd->nd_flag & ND_NFSV4) != 0) { 815244042Srmacklem NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 816244042Srmacklem i = fxdr_unsigned(int, *tl); 817244042Srmacklem error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 818244042Srmacklem if (error) 819244042Srmacklem goto nfsmout; 820244042Srmacklem NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 821244042Srmacklem opcnt = fxdr_unsigned(int, *tl++); 822244042Srmacklem i = fxdr_unsigned(int, *tl++); 823244042Srmacklem j = fxdr_unsigned(int, *tl); 824244042Srmacklem if (j >= 10000) 825244042Srmacklem NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j); 826244042Srmacklem /* 827244042Srmacklem * If the first op is Sequence, free up the slot. 828244042Srmacklem */ 829244042Srmacklem if (nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) 830244042Srmacklem NFSCL_DEBUG(1, "failed seq=%d\n", j); 831244042Srmacklem if (nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) { 832244042Srmacklem NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 833244042Srmacklem 5 * NFSX_UNSIGNED); 834244042Srmacklem mtx_lock(&sep->nfsess_mtx); 835244042Srmacklem tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; 836244042Srmacklem retseq = fxdr_unsigned(uint32_t, *tl++); 837244042Srmacklem slot = fxdr_unsigned(int, *tl++); 838244042Srmacklem freeslot = slot; 839244042Srmacklem if (retseq != sep->nfsess_slotseq[slot]) 840244042Srmacklem printf("retseq diff 0x%x\n", retseq); 841244042Srmacklem retval = fxdr_unsigned(uint32_t, *++tl); 842244042Srmacklem if ((retval + 1) < sep->nfsess_foreslots) 843244042Srmacklem sep->nfsess_foreslots = (retval + 1); 844244042Srmacklem else if ((retval + 1) > sep->nfsess_foreslots) 845244042Srmacklem sep->nfsess_foreslots = (retval < 64) ? 846244042Srmacklem (retval + 1) : 64; 847244042Srmacklem mtx_unlock(&sep->nfsess_mtx); 848244042Srmacklem 849244042Srmacklem /* Grab the op and status for the next one. */ 850244042Srmacklem if (opcnt > 1) { 851244042Srmacklem NFSM_DISSECT(tl, uint32_t *, 852244042Srmacklem 2 * NFSX_UNSIGNED); 853244042Srmacklem i = fxdr_unsigned(int, *tl++); 854244042Srmacklem j = fxdr_unsigned(int, *tl); 855244042Srmacklem } 856244042Srmacklem } 857244042Srmacklem } 858191783Srmacklem if (nd->nd_repstat != 0) { 859224117Srmacklem if (((nd->nd_repstat == NFSERR_DELAY || 860224117Srmacklem nd->nd_repstat == NFSERR_GRACE) && 861191783Srmacklem (nd->nd_flag & ND_NFSV4) && 862224117Srmacklem nd->nd_procnum != NFSPROC_DELEGRETURN && 863191783Srmacklem nd->nd_procnum != NFSPROC_SETATTR && 864191783Srmacklem nd->nd_procnum != NFSPROC_READ && 865244042Srmacklem nd->nd_procnum != NFSPROC_READDS && 866191783Srmacklem nd->nd_procnum != NFSPROC_WRITE && 867244042Srmacklem nd->nd_procnum != NFSPROC_WRITEDS && 868191783Srmacklem nd->nd_procnum != NFSPROC_OPEN && 869191783Srmacklem nd->nd_procnum != NFSPROC_CREATE && 870191783Srmacklem nd->nd_procnum != NFSPROC_OPENCONFIRM && 871191783Srmacklem nd->nd_procnum != NFSPROC_OPENDOWNGRADE && 872191783Srmacklem nd->nd_procnum != NFSPROC_CLOSE && 873191783Srmacklem nd->nd_procnum != NFSPROC_LOCK && 874191783Srmacklem nd->nd_procnum != NFSPROC_LOCKU) || 875191783Srmacklem (nd->nd_repstat == NFSERR_DELAY && 876191783Srmacklem (nd->nd_flag & ND_NFSV4) == 0) || 877191783Srmacklem nd->nd_repstat == NFSERR_RESOURCE) { 878191783Srmacklem if (trylater_delay > NFS_TRYLATERDEL) 879191783Srmacklem trylater_delay = NFS_TRYLATERDEL; 880191783Srmacklem waituntil = NFSD_MONOSEC + trylater_delay; 881191783Srmacklem while (NFSD_MONOSEC < waituntil) 882207170Srmacklem (void) nfs_catnap(PZERO, 0, "nfstry"); 883191783Srmacklem trylater_delay *= 2; 884244042Srmacklem if (slot != -1) { 885244042Srmacklem mtx_lock(&sep->nfsess_mtx); 886244042Srmacklem sep->nfsess_slotseq[slot]++; 887244042Srmacklem *nd->nd_slotseq = txdr_unsigned( 888244042Srmacklem sep->nfsess_slotseq[slot]); 889244042Srmacklem mtx_unlock(&sep->nfsess_mtx); 890244042Srmacklem } 891223441Srmacklem m_freem(nd->nd_mrep); 892223441Srmacklem nd->nd_mrep = NULL; 893191783Srmacklem goto tryagain; 894191783Srmacklem } 895191783Srmacklem 896191783Srmacklem /* 897191783Srmacklem * If the File Handle was stale, invalidate the 898191783Srmacklem * lookup cache, just in case. 899191783Srmacklem * (vp != NULL implies a client side call) 900191783Srmacklem */ 901191783Srmacklem if (nd->nd_repstat == ESTALE && vp != NULL) { 902191783Srmacklem cache_purge(vp); 903191783Srmacklem if (ncl_call_invalcaches != NULL) 904191783Srmacklem (*ncl_call_invalcaches)(vp); 905191783Srmacklem } 906191783Srmacklem } 907244042Srmacklem if ((nd->nd_flag & ND_NFSV4) != 0) { 908244042Srmacklem /* Free the slot, as required. */ 909244042Srmacklem if (freeslot != -1) 910244042Srmacklem nfsv4_freeslot(sep, freeslot); 911191783Srmacklem /* 912244042Srmacklem * If this op is Putfh, throw its results away. 913191783Srmacklem */ 914244042Srmacklem if (j >= 10000) 915244042Srmacklem NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j); 916244042Srmacklem if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) { 917191783Srmacklem NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED); 918191783Srmacklem i = fxdr_unsigned(int, *tl++); 919191783Srmacklem j = fxdr_unsigned(int, *tl); 920244042Srmacklem if (j >= 10000) 921244042Srmacklem NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i, 922244042Srmacklem j); 923191783Srmacklem /* 924191783Srmacklem * All Compounds that do an Op that must 925191783Srmacklem * be in sequence consist of NFSV4OP_PUTFH 926191783Srmacklem * followed by one of these. As such, we 927191783Srmacklem * can determine if the seqid# should be 928191783Srmacklem * incremented, here. 929191783Srmacklem */ 930191783Srmacklem if ((i == NFSV4OP_OPEN || 931191783Srmacklem i == NFSV4OP_OPENCONFIRM || 932191783Srmacklem i == NFSV4OP_OPENDOWNGRADE || 933191783Srmacklem i == NFSV4OP_CLOSE || 934191783Srmacklem i == NFSV4OP_LOCK || 935191783Srmacklem i == NFSV4OP_LOCKU) && 936191783Srmacklem (j == 0 || 937191783Srmacklem (j != NFSERR_STALECLIENTID && 938191783Srmacklem j != NFSERR_STALESTATEID && 939191783Srmacklem j != NFSERR_BADSTATEID && 940191783Srmacklem j != NFSERR_BADSEQID && 941191783Srmacklem j != NFSERR_BADXDR && 942191783Srmacklem j != NFSERR_RESOURCE && 943191783Srmacklem j != NFSERR_NOFILEHANDLE))) 944191783Srmacklem nd->nd_flag |= ND_INCRSEQID; 945191783Srmacklem } 946244042Srmacklem /* 947244042Srmacklem * If this op's status is non-zero, mark 948244042Srmacklem * that there is no more data to process. 949244042Srmacklem */ 950244042Srmacklem if (j) 951244042Srmacklem nd->nd_flag |= ND_NOMOREDATA; 952191783Srmacklem 953191783Srmacklem /* 954191783Srmacklem * If R_DONTRECOVER is set, replace the stale error 955191783Srmacklem * reply, so that recovery isn't initiated. 956191783Srmacklem */ 957191783Srmacklem if ((nd->nd_repstat == NFSERR_STALECLIENTID || 958244042Srmacklem nd->nd_repstat == NFSERR_BADSESSION || 959191783Srmacklem nd->nd_repstat == NFSERR_STALESTATEID) && 960191783Srmacklem rep != NULL && (rep->r_flags & R_DONTRECOVER)) 961191783Srmacklem nd->nd_repstat = NFSERR_STALEDONTRECOVER; 962191783Srmacklem } 963192181Srmacklem } 964191783Srmacklem 965223280Srmacklem#ifdef KDTRACE_HOOKS 966223280Srmacklem if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) { 967223280Srmacklem uint32_t probe_id; 968223280Srmacklem int probe_procnum; 969223280Srmacklem 970223280Srmacklem if (nd->nd_flag & ND_NFSV4) { 971223280Srmacklem probe_id = nfscl_nfs4_done_probes[nd->nd_procnum]; 972223280Srmacklem probe_procnum = nd->nd_procnum; 973223280Srmacklem } else if (nd->nd_flag & ND_NFSV3) { 974223280Srmacklem probe_id = nfscl_nfs3_done_probes[procnum]; 975223280Srmacklem probe_procnum = procnum; 976223280Srmacklem } else { 977223280Srmacklem probe_id = nfscl_nfs2_done_probes[nd->nd_procnum]; 978223280Srmacklem probe_procnum = procnum; 979223280Srmacklem } 980223280Srmacklem if (probe_id != 0) 981223280Srmacklem (dtrace_nfscl_nfs234_done_probe)(probe_id, vp, 982223280Srmacklem nd->nd_mreq, cred, probe_procnum, 0); 983223280Srmacklem } 984223280Srmacklem#endif 985223280Srmacklem 986192181Srmacklem m_freem(nd->nd_mreq); 987253049Srmacklem if (usegssname == 0) 988253049Srmacklem AUTH_DESTROY(auth); 989192181Srmacklem if (rep != NULL) 990192181Srmacklem FREE((caddr_t)rep, M_NFSDREQ); 991195642Srmacklem if (set_sigset) 992195642Srmacklem newnfs_restore_sigmask(td, &oldset); 993192181Srmacklem return (0); 994191783Srmacklemnfsmout: 995191783Srmacklem mbuf_freem(nd->nd_mrep); 996191783Srmacklem mbuf_freem(nd->nd_mreq); 997253049Srmacklem if (usegssname == 0) 998253049Srmacklem AUTH_DESTROY(auth); 999191783Srmacklem if (rep != NULL) 1000191783Srmacklem FREE((caddr_t)rep, M_NFSDREQ); 1001195642Srmacklem if (set_sigset) 1002195642Srmacklem newnfs_restore_sigmask(td, &oldset); 1003191783Srmacklem return (error); 1004191783Srmacklem} 1005191783Srmacklem 1006191783Srmacklem/* 1007191783Srmacklem * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and 1008191783Srmacklem * wait for all requests to complete. This is used by forced unmounts 1009191783Srmacklem * to terminate any outstanding RPCs. 1010191783Srmacklem */ 1011191783Srmacklemint 1012191783Srmacklemnewnfs_nmcancelreqs(struct nfsmount *nmp) 1013191783Srmacklem{ 1014191783Srmacklem 1015191783Srmacklem if (nmp->nm_sockreq.nr_client != NULL) 1016191783Srmacklem CLNT_CLOSE(nmp->nm_sockreq.nr_client); 1017191783Srmacklem return (0); 1018191783Srmacklem} 1019191783Srmacklem 1020191783Srmacklem/* 1021191783Srmacklem * Any signal that can interrupt an NFS operation in an intr mount 1022191783Srmacklem * should be added to this set. SIGSTOP and SIGKILL cannot be masked. 1023191783Srmacklem */ 1024191783Srmacklemint newnfs_sig_set[] = { 1025191783Srmacklem SIGINT, 1026191783Srmacklem SIGTERM, 1027191783Srmacklem SIGHUP, 1028191783Srmacklem SIGKILL, 1029191783Srmacklem SIGQUIT 1030191783Srmacklem}; 1031191783Srmacklem 1032191783Srmacklem/* 1033191783Srmacklem * Check to see if one of the signals in our subset is pending on 1034191783Srmacklem * the process (in an intr mount). 1035191783Srmacklem */ 1036191783Srmacklemstatic int 1037191783Srmacklemnfs_sig_pending(sigset_t set) 1038191783Srmacklem{ 1039191783Srmacklem int i; 1040191783Srmacklem 1041191783Srmacklem for (i = 0 ; i < sizeof(newnfs_sig_set)/sizeof(int) ; i++) 1042191783Srmacklem if (SIGISMEMBER(set, newnfs_sig_set[i])) 1043191783Srmacklem return (1); 1044191783Srmacklem return (0); 1045191783Srmacklem} 1046191783Srmacklem 1047191783Srmacklem/* 1048191783Srmacklem * The set/restore sigmask functions are used to (temporarily) overwrite 1049246417Sjhb * the thread td_sigmask during an RPC call (for example). These are also 1050191783Srmacklem * used in other places in the NFS client that might tsleep(). 1051191783Srmacklem */ 1052191783Srmacklemvoid 1053191783Srmacklemnewnfs_set_sigmask(struct thread *td, sigset_t *oldset) 1054191783Srmacklem{ 1055191783Srmacklem sigset_t newset; 1056191783Srmacklem int i; 1057191783Srmacklem struct proc *p; 1058191783Srmacklem 1059191783Srmacklem SIGFILLSET(newset); 1060191783Srmacklem if (td == NULL) 1061191783Srmacklem td = curthread; /* XXX */ 1062191783Srmacklem p = td->td_proc; 1063191783Srmacklem /* Remove the NFS set of signals from newset */ 1064191783Srmacklem PROC_LOCK(p); 1065191783Srmacklem mtx_lock(&p->p_sigacts->ps_mtx); 1066191783Srmacklem for (i = 0 ; i < sizeof(newnfs_sig_set)/sizeof(int) ; i++) { 1067191783Srmacklem /* 1068191783Srmacklem * But make sure we leave the ones already masked 1069191783Srmacklem * by the process, ie. remove the signal from the 1070191783Srmacklem * temporary signalmask only if it wasn't already 1071191783Srmacklem * in p_sigmask. 1072191783Srmacklem */ 1073191783Srmacklem if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) && 1074191783Srmacklem !SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i])) 1075191783Srmacklem SIGDELSET(newset, newnfs_sig_set[i]); 1076191783Srmacklem } 1077191783Srmacklem mtx_unlock(&p->p_sigacts->ps_mtx); 1078246417Sjhb kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 1079246417Sjhb SIGPROCMASK_PROC_LOCKED); 1080191783Srmacklem PROC_UNLOCK(p); 1081191783Srmacklem} 1082191783Srmacklem 1083191783Srmacklemvoid 1084191783Srmacklemnewnfs_restore_sigmask(struct thread *td, sigset_t *set) 1085191783Srmacklem{ 1086191783Srmacklem if (td == NULL) 1087191783Srmacklem td = curthread; /* XXX */ 1088191783Srmacklem kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); 1089191783Srmacklem} 1090191783Srmacklem 1091191783Srmacklem/* 1092191783Srmacklem * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the 1093191783Srmacklem * old one after msleep() returns. 1094191783Srmacklem */ 1095191783Srmacklemint 1096191783Srmacklemnewnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo) 1097191783Srmacklem{ 1098191783Srmacklem sigset_t oldset; 1099191783Srmacklem int error; 1100191783Srmacklem struct proc *p; 1101191783Srmacklem 1102191783Srmacklem if ((priority & PCATCH) == 0) 1103191783Srmacklem return msleep(ident, mtx, priority, wmesg, timo); 1104191783Srmacklem if (td == NULL) 1105191783Srmacklem td = curthread; /* XXX */ 1106191783Srmacklem newnfs_set_sigmask(td, &oldset); 1107191783Srmacklem error = msleep(ident, mtx, priority, wmesg, timo); 1108191783Srmacklem newnfs_restore_sigmask(td, &oldset); 1109191783Srmacklem p = td->td_proc; 1110191783Srmacklem return (error); 1111191783Srmacklem} 1112191783Srmacklem 1113191783Srmacklem/* 1114191783Srmacklem * Test for a termination condition pending on the process. 1115191783Srmacklem * This is used for NFSMNT_INT mounts. 1116191783Srmacklem */ 1117191783Srmacklemint 1118191783Srmacklemnewnfs_sigintr(struct nfsmount *nmp, struct thread *td) 1119191783Srmacklem{ 1120191783Srmacklem struct proc *p; 1121191783Srmacklem sigset_t tmpset; 1122191783Srmacklem 1123191783Srmacklem /* Terminate all requests while attempting a forced unmount. */ 1124191783Srmacklem if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) 1125191783Srmacklem return (EIO); 1126191783Srmacklem if (!(nmp->nm_flag & NFSMNT_INT)) 1127191783Srmacklem return (0); 1128191783Srmacklem if (td == NULL) 1129191783Srmacklem return (0); 1130191783Srmacklem p = td->td_proc; 1131191783Srmacklem PROC_LOCK(p); 1132191783Srmacklem tmpset = p->p_siglist; 1133191783Srmacklem SIGSETOR(tmpset, td->td_siglist); 1134191783Srmacklem SIGSETNAND(tmpset, td->td_sigmask); 1135191783Srmacklem mtx_lock(&p->p_sigacts->ps_mtx); 1136191783Srmacklem SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); 1137191783Srmacklem mtx_unlock(&p->p_sigacts->ps_mtx); 1138191783Srmacklem if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) 1139191783Srmacklem && nfs_sig_pending(tmpset)) { 1140191783Srmacklem PROC_UNLOCK(p); 1141191783Srmacklem return (EINTR); 1142191783Srmacklem } 1143191783Srmacklem PROC_UNLOCK(p); 1144191783Srmacklem return (0); 1145191783Srmacklem} 1146191783Srmacklem 1147191783Srmacklemstatic int 1148191783Srmacklemnfs_msg(struct thread *td, const char *server, const char *msg, int error) 1149191783Srmacklem{ 1150191783Srmacklem struct proc *p; 1151191783Srmacklem 1152191783Srmacklem p = td ? td->td_proc : NULL; 1153191783Srmacklem if (error) { 1154191783Srmacklem tprintf(p, LOG_INFO, "newnfs server %s: %s, error %d\n", 1155191783Srmacklem server, msg, error); 1156191783Srmacklem } else { 1157191783Srmacklem tprintf(p, LOG_INFO, "newnfs server %s: %s\n", server, msg); 1158191783Srmacklem } 1159191783Srmacklem return (0); 1160191783Srmacklem} 1161191783Srmacklem 1162191783Srmacklemstatic void 1163191783Srmacklemnfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, 1164191783Srmacklem int error, int flags) 1165191783Srmacklem{ 1166191783Srmacklem if (nmp == NULL) 1167191783Srmacklem return; 1168191783Srmacklem mtx_lock(&nmp->nm_mtx); 1169191783Srmacklem if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { 1170191783Srmacklem nmp->nm_state |= NFSSTA_TIMEO; 1171191783Srmacklem mtx_unlock(&nmp->nm_mtx); 1172191783Srmacklem vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1173191783Srmacklem VQ_NOTRESP, 0); 1174191783Srmacklem } else 1175191783Srmacklem mtx_unlock(&nmp->nm_mtx); 1176191783Srmacklem mtx_lock(&nmp->nm_mtx); 1177191783Srmacklem if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { 1178191783Srmacklem nmp->nm_state |= NFSSTA_LOCKTIMEO; 1179191783Srmacklem mtx_unlock(&nmp->nm_mtx); 1180191783Srmacklem vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1181191783Srmacklem VQ_NOTRESPLOCK, 0); 1182191783Srmacklem } else 1183191783Srmacklem mtx_unlock(&nmp->nm_mtx); 1184191783Srmacklem nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); 1185191783Srmacklem} 1186191783Srmacklem 1187191783Srmacklemstatic void 1188191783Srmacklemnfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, 1189191783Srmacklem int flags, int tprintfmsg) 1190191783Srmacklem{ 1191191783Srmacklem if (nmp == NULL) 1192191783Srmacklem return; 1193191783Srmacklem if (tprintfmsg) { 1194191783Srmacklem nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); 1195191783Srmacklem } 1196191783Srmacklem 1197191783Srmacklem mtx_lock(&nmp->nm_mtx); 1198191783Srmacklem if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { 1199191783Srmacklem nmp->nm_state &= ~NFSSTA_TIMEO; 1200191783Srmacklem mtx_unlock(&nmp->nm_mtx); 1201191783Srmacklem vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1202191783Srmacklem VQ_NOTRESP, 1); 1203191783Srmacklem } else 1204191783Srmacklem mtx_unlock(&nmp->nm_mtx); 1205191783Srmacklem 1206191783Srmacklem mtx_lock(&nmp->nm_mtx); 1207191783Srmacklem if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { 1208191783Srmacklem nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 1209191783Srmacklem mtx_unlock(&nmp->nm_mtx); 1210191783Srmacklem vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1211191783Srmacklem VQ_NOTRESPLOCK, 1); 1212191783Srmacklem } else 1213191783Srmacklem mtx_unlock(&nmp->nm_mtx); 1214191783Srmacklem} 1215191783Srmacklem 1216