1/* 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1989, 1991, 1993, 1995 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Rick Macklem at The University of Guelph. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $ 66 */ 67 68/* 69 * Socket operations for use by nfs 70 */ 71 72#include <sys/param.h> 73#include <sys/systm.h> 74#include <sys/proc.h> 75#include <sys/signalvar.h> 76#include <sys/kauth.h> 77#include <sys/mount_internal.h> 78#include <sys/kernel.h> 79#include <sys/kpi_mbuf.h> 80#include <sys/malloc.h> 81#include <sys/vnode.h> 82#include <sys/domain.h> 83#include <sys/protosw.h> 84#include <sys/socket.h> 85#include <sys/syslog.h> 86#include <sys/tprintf.h> 87#include <libkern/OSAtomic.h> 88 89#include <sys/time.h> 90#include <kern/clock.h> 91#include <kern/task.h> 92#include <kern/thread.h> 93#include <kern/thread_call.h> 94#include <sys/user.h> 95#include <sys/acct.h> 96 97#include <netinet/in.h> 98#include <netinet/tcp.h> 99 100#include <nfs/rpcv2.h> 101#include <nfs/krpc.h> 102#include <nfs/nfsproto.h> 103#include <nfs/nfs.h> 104#include <nfs/xdr_subs.h> 105#include <nfs/nfsm_subs.h> 106#include <nfs/nfs_gss.h> 107#include <nfs/nfsmount.h> 108#include <nfs/nfsnode.h> 109 110/* XXX */ 111boolean_t current_thread_aborted(void); 112kern_return_t thread_terminate(thread_t); 113 114 115#if NFSSERVER 116int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */ 117 118int nfsrv_getstream(struct nfsrv_sock *,int); 119int nfsrv_getreq(struct nfsrv_descript *); 120extern int nfsv3_procid[NFS_NPROCS]; 121#endif /* NFSSERVER */ 122 123/* 124 * compare two sockaddr structures 125 */ 126int 127nfs_sockaddr_cmp(struct sockaddr *sa1, struct sockaddr *sa2) 128{ 129 if (!sa1) 130 return (-1); 131 if (!sa2) 132 return (1); 133 if (sa1->sa_family != sa2->sa_family) 134 return ((sa1->sa_family < sa2->sa_family) ? -1 : 1); 135 if (sa1->sa_len != sa2->sa_len) 136 return ((sa1->sa_len < sa2->sa_len) ? -1 : 1); 137 if (sa1->sa_family == AF_INET) 138 return (bcmp(&((struct sockaddr_in*)sa1)->sin_addr, 139 &((struct sockaddr_in*)sa2)->sin_addr, sizeof(((struct sockaddr_in*)sa1)->sin_addr))); 140 if (sa1->sa_family == AF_INET6) 141 return (bcmp(&((struct sockaddr_in6*)sa1)->sin6_addr, 142 &((struct sockaddr_in6*)sa2)->sin6_addr, sizeof(((struct sockaddr_in6*)sa1)->sin6_addr))); 143 return (-1); 144} 145 146#if NFSCLIENT 147 148int nfs_reconnect(struct nfsmount *); 149int nfs_connect_setup(struct nfsmount *); 150void nfs_mount_sock_thread(void *, wait_result_t); 151void nfs_udp_rcv(socket_t, void*, int); 152void nfs_tcp_rcv(socket_t, void*, int); 153void nfs_sock_poke(struct nfsmount *); 154void nfs_request_match_reply(struct nfsmount *, mbuf_t); 155void nfs_reqdequeue(struct nfsreq *); 156void nfs_reqbusy(struct nfsreq *); 157struct nfsreq *nfs_reqnext(struct nfsreq *); 158int nfs_wait_reply(struct nfsreq *); 159void nfs_softterm(struct nfsreq *); 160int nfs_can_squish(struct nfsmount *); 161int nfs_is_squishy(struct nfsmount *); 162int nfs_is_dead(int, struct nfsmount *); 163 164#ifdef NFS_SOCKET_DEBUGGING 165#define NFS_SOCK_DBG(X) printf X 166#else 167#define NFS_SOCK_DBG(X) 168#endif 169 170/* 171 * Estimate rto for an nfs rpc sent via. an unreliable datagram. 172 * Use the mean and mean deviation of rtt for the appropriate type of rpc 173 * for the frequent rpcs and a default for the others. 174 * The justification for doing "other" this way is that these rpcs 175 * happen so infrequently that timer est. would probably be stale. 176 * Also, since many of these rpcs are 177 * non-idempotent, a conservative timeout is desired. 178 * getattr, lookup - A+2D 179 * read, write - A+4D 180 * other - nm_timeo 181 */ 182#define NFS_RTO(n, t) \ 183 ((t) == 0 ? (n)->nm_timeo : \ 184 ((t) < 3 ? \ 185 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 186 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 187#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 188#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 189 190/* 191 * Defines which timer to use for the procnum. 192 * 0 - default 193 * 1 - getattr 194 * 2 - lookup 195 * 3 - read 196 * 4 - write 197 */ 198static int proct[NFS_NPROCS] = { 199 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0 200}; 201 202/* 203 * There is a congestion window for outstanding rpcs maintained per mount 204 * point. The cwnd size is adjusted in roughly the way that: 205 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 206 * SIGCOMM '88". ACM, August 1988. 207 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 208 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 209 * of rpcs is in progress. 210 * (The sent count and cwnd are scaled for integer arith.) 211 * Variants of "slow start" were tried and were found to be too much of a 212 * performance hit (ave. rtt 3 times larger), 213 * I suspect due to the large rtt that nfs rpcs have. 214 */ 215#define NFS_CWNDSCALE 256 216#define NFS_MAXCWND (NFS_CWNDSCALE * 32) 217static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 218 219/* 220 * Increment location index to next address/server/location. 221 */ 222void 223nfs_location_next(struct nfs_fs_locations *nlp, struct nfs_location_index *nlip) 224{ 225 uint8_t loc = nlip->nli_loc; 226 uint8_t serv = nlip->nli_serv; 227 uint8_t addr = nlip->nli_addr; 228 229 /* move to next address */ 230 addr++; 231 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) { 232 /* no more addresses on current server, go to first address of next server */ 233next_server: 234 addr = 0; 235 serv++; 236 if (serv >= nlp->nl_locations[loc]->nl_servcount) { 237 /* no more servers on current location, go to first server of next location */ 238 serv = 0; 239 loc++; 240 if (loc >= nlp->nl_numlocs) 241 loc = 0; /* after last location, wrap back around to first location */ 242 } 243 } 244 /* 245 * It's possible for this next server to not have any addresses. 246 * Check for that here and go to the next server. 247 * But bail out if we've managed to come back around to the original 248 * location that was passed in. (That would mean no servers had any 249 * addresses. And we don't want to spin here forever.) 250 */ 251 if ((loc == nlip->nli_loc) && (serv == nlip->nli_serv) && (addr == nlip->nli_addr)) 252 return; 253 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) 254 goto next_server; 255 256 nlip->nli_loc = loc; 257 nlip->nli_serv = serv; 258 nlip->nli_addr = addr; 259} 260 261/* 262 * Compare two location indices. 263 */ 264int 265nfs_location_index_cmp(struct nfs_location_index *nlip1, struct nfs_location_index *nlip2) 266{ 267 if (nlip1->nli_loc != nlip2->nli_loc) 268 return (nlip1->nli_loc - nlip2->nli_loc); 269 if (nlip1->nli_serv != nlip2->nli_serv) 270 return (nlip1->nli_serv - nlip2->nli_serv); 271 return (nlip1->nli_addr - nlip2->nli_addr); 272} 273 274/* 275 * Get the mntfromname (or path portion only) for a given location. 276 */ 277void 278nfs_location_mntfromname(struct nfs_fs_locations *locs, struct nfs_location_index idx, char *s, int size, int pathonly) 279{ 280 struct nfs_fs_location *fsl = locs->nl_locations[idx.nli_loc]; 281 char *p; 282 int cnt, i; 283 284 p = s; 285 if (!pathonly) { 286 cnt = snprintf(p, size, "%s:", fsl->nl_servers[idx.nli_serv]->ns_name); 287 p += cnt; 288 size -= cnt; 289 } 290 if (fsl->nl_path.np_compcount == 0) { 291 /* mounting root export on server */ 292 if (size > 0) { 293 *p++ = '/'; 294 *p++ = '\0'; 295 } 296 return; 297 } 298 /* append each server path component */ 299 for (i=0; (size > 0) && (i < (int)fsl->nl_path.np_compcount); i++) { 300 cnt = snprintf(p, size, "/%s", fsl->nl_path.np_components[i]); 301 p += cnt; 302 size -= cnt; 303 } 304} 305 306/* 307 * NFS client connect socket upcall. 308 * (Used only during socket connect/search.) 309 */ 310void 311nfs_connect_upcall(socket_t so, void *arg, __unused int waitflag) 312{ 313 struct nfs_socket *nso = arg; 314 size_t rcvlen; 315 mbuf_t m; 316 int error = 0, recv = 1; 317 318 if (nso->nso_flags & NSO_CONNECTING) { 319 NFS_SOCK_DBG(("nfs connect - socket %p upcall - connecting\n", nso)); 320 wakeup(nso->nso_wake); 321 return; 322 } 323 324 lck_mtx_lock(&nso->nso_lock); 325 if ((nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) || !(nso->nso_flags & NSO_PINGING)) { 326 NFS_SOCK_DBG(("nfs connect - socket %p upcall - nevermind\n", nso)); 327 lck_mtx_unlock(&nso->nso_lock); 328 return; 329 } 330 NFS_SOCK_DBG(("nfs connect - socket %p upcall\n", nso)); 331 nso->nso_flags |= NSO_UPCALL; 332 333 /* loop while we make error-free progress */ 334 while (!error && recv) { 335 /* make sure we're still interested in this socket */ 336 if (nso->nso_flags & (NSO_DISCONNECTING|NSO_DEAD)) 337 break; 338 lck_mtx_unlock(&nso->nso_lock); 339 m = NULL; 340 if (nso->nso_sotype == SOCK_STREAM) { 341 error = nfs_rpc_record_read(so, &nso->nso_rrs, MSG_DONTWAIT, &recv, &m); 342 } else { 343 rcvlen = 1000000; 344 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen); 345 recv = m ? 1 : 0; 346 } 347 lck_mtx_lock(&nso->nso_lock); 348 if (m) { 349 /* match response with request */ 350 struct nfsm_chain nmrep; 351 uint32_t reply = 0, rxid = 0, verf_type, verf_len; 352 uint32_t reply_status, rejected_status, accepted_status; 353 354 nfsm_chain_dissect_init(error, &nmrep, m); 355 nfsm_chain_get_32(error, &nmrep, rxid); 356 nfsm_chain_get_32(error, &nmrep, reply); 357 if (!error && ((reply != RPC_REPLY) || (rxid != nso->nso_pingxid))) 358 error = EBADRPC; 359 nfsm_chain_get_32(error, &nmrep, reply_status); 360 if (!error && (reply_status == RPC_MSGDENIED)) { 361 nfsm_chain_get_32(error, &nmrep, rejected_status); 362 if (!error) 363 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES; 364 } 365 nfsm_chain_get_32(error, &nmrep, verf_type); /* verifier flavor */ 366 nfsm_chain_get_32(error, &nmrep, verf_len); /* verifier length */ 367 nfsmout_if(error); 368 if (verf_len) 369 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len)); 370 nfsm_chain_get_32(error, &nmrep, accepted_status); 371 nfsmout_if(error); 372 if ((accepted_status == RPC_PROGMISMATCH) && !nso->nso_version) { 373 uint32_t minvers, maxvers; 374 nfsm_chain_get_32(error, &nmrep, minvers); 375 nfsm_chain_get_32(error, &nmrep, maxvers); 376 nfsmout_if(error); 377 if (nso->nso_protocol == PMAPPROG) { 378 if ((minvers > RPCBVERS4) || (maxvers < PMAPVERS)) 379 error = EPROGMISMATCH; 380 else if ((nso->nso_saddr->sa_family == AF_INET) && 381 (PMAPVERS >= minvers) && (PMAPVERS <= maxvers)) 382 nso->nso_version = PMAPVERS; 383 else if (nso->nso_saddr->sa_family == AF_INET6) { 384 if ((RPCBVERS4 >= minvers) && (RPCBVERS4 <= maxvers)) 385 nso->nso_version = RPCBVERS4; 386 else if ((RPCBVERS3 >= minvers) && (RPCBVERS3 <= maxvers)) 387 nso->nso_version = RPCBVERS3; 388 } 389 } else if (nso->nso_protocol == NFS_PROG) { 390 if ((minvers > NFS_VER4) || (maxvers < NFS_VER2)) 391 error = EPROGMISMATCH; 392 else if ((NFS_VER3 >= minvers) && (NFS_VER3 <= maxvers)) 393 nso->nso_version = NFS_VER3; 394 else if ((NFS_VER2 >= minvers) && (NFS_VER2 <= maxvers)) 395 nso->nso_version = NFS_VER2; 396 else if ((NFS_VER4 >= minvers) && (NFS_VER4 <= maxvers)) 397 nso->nso_version = NFS_VER4; 398 } 399 if (!error && nso->nso_version) 400 accepted_status = RPC_SUCCESS; 401 } 402 if (!error) { 403 switch (accepted_status) { 404 case RPC_SUCCESS: 405 error = 0; 406 break; 407 case RPC_PROGUNAVAIL: 408 error = EPROGUNAVAIL; 409 break; 410 case RPC_PROGMISMATCH: 411 error = EPROGMISMATCH; 412 break; 413 case RPC_PROCUNAVAIL: 414 error = EPROCUNAVAIL; 415 break; 416 case RPC_GARBAGE: 417 error = EBADRPC; 418 break; 419 case RPC_SYSTEM_ERR: 420 default: 421 error = EIO; 422 break; 423 } 424 } 425nfsmout: 426 nso->nso_flags &= ~NSO_PINGING; 427 if (error) { 428 nso->nso_error = error; 429 nso->nso_flags |= NSO_DEAD; 430 } else { 431 nso->nso_flags |= NSO_VERIFIED; 432 } 433 mbuf_freem(m); 434 /* wake up search thread */ 435 wakeup(nso->nso_wake); 436 break; 437 } 438 } 439 440 nso->nso_flags &= ~NSO_UPCALL; 441 if ((error != EWOULDBLOCK) && (error || !recv)) { 442 /* problems with the socket... */ 443 nso->nso_error = error ? error : EPIPE; 444 nso->nso_flags |= NSO_DEAD; 445 wakeup(nso->nso_wake); 446 } 447 if (nso->nso_flags & NSO_DISCONNECTING) 448 wakeup(&nso->nso_flags); 449 lck_mtx_unlock(&nso->nso_lock); 450} 451 452/* 453 * Create/initialize an nfs_socket structure. 454 */ 455int 456nfs_socket_create( 457 __unused struct nfsmount *nmp, 458 struct sockaddr *sa, 459 int sotype, 460 in_port_t port, 461 uint32_t protocol, 462 uint32_t vers, 463 int resvport, 464 struct nfs_socket **nsop) 465{ 466 struct nfs_socket *nso; 467 struct timeval now; 468 int error; 469#ifdef NFS_SOCKET_DEBUGGING 470 char naddr[MAX_IPv6_STR_LEN]; 471 void *sinaddr; 472 473 if (sa->sa_family == AF_INET) 474 sinaddr = &((struct sockaddr_in*)sa)->sin_addr; 475 else 476 sinaddr = &((struct sockaddr_in6*)sa)->sin6_addr; 477 if (inet_ntop(sa->sa_family, sinaddr, naddr, sizeof(naddr)) != naddr) 478 strlcpy(naddr, "<unknown>", sizeof(naddr)); 479#endif 480 481 *nsop = NULL; 482 483 /* Create the socket. */ 484 MALLOC(nso, struct nfs_socket *, sizeof(struct nfs_socket), M_TEMP, M_WAITOK|M_ZERO); 485 if (nso) 486 MALLOC(nso->nso_saddr, struct sockaddr *, sa->sa_len, M_SONAME, M_WAITOK|M_ZERO); 487 if (!nso || !nso->nso_saddr) { 488 if (nso) 489 FREE(nso, M_TEMP); 490 return (ENOMEM); 491 } 492 lck_mtx_init(&nso->nso_lock, nfs_request_grp, LCK_ATTR_NULL); 493 nso->nso_sotype = sotype; 494 if (nso->nso_sotype == SOCK_STREAM) 495 nfs_rpc_record_state_init(&nso->nso_rrs); 496 microuptime(&now); 497 nso->nso_timestamp = now.tv_sec; 498 bcopy(sa, nso->nso_saddr, sa->sa_len); 499 if (sa->sa_family == AF_INET) 500 ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port); 501 else if (sa->sa_family == AF_INET6) 502 ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port); 503 nso->nso_protocol = protocol; 504 nso->nso_version = vers; 505 506 error = sock_socket(sa->sa_family, nso->nso_sotype, 0, NULL, NULL, &nso->nso_so); 507 508 /* Some servers require that the client port be a reserved port number. */ 509 if (!error && resvport && ((sa->sa_family == AF_INET) || (sa->sa_family == AF_INET6))) { 510 struct sockaddr_storage ss; 511 int level = (sa->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6; 512 int optname = (sa->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE; 513 int portrange = IP_PORTRANGE_LOW; 514 515 error = sock_setsockopt(nso->nso_so, level, optname, &portrange, sizeof(portrange)); 516 if (!error) { /* bind now to check for failure */ 517 ss.ss_len = sa->sa_len; 518 ss.ss_family = sa->sa_family; 519 if (ss.ss_family == AF_INET) { 520 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY; 521 ((struct sockaddr_in*)&ss)->sin_port = htons(0); 522 } else if (ss.ss_family == AF_INET6) { 523 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any; 524 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0); 525 } else { 526 error = EINVAL; 527 } 528 if (!error) 529 error = sock_bind(nso->nso_so, (struct sockaddr*)&ss); 530 } 531 } 532 533 if (error) { 534 NFS_SOCK_DBG(("nfs connect %s error %d creating socket %p %s type %d%s port %d prot %d %d\n", 535 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nso, naddr, sotype, 536 resvport ? "r" : "", port, protocol, vers)); 537 nfs_socket_destroy(nso); 538 } else { 539 NFS_SOCK_DBG(("nfs connect %s created socket %p %s type %d%s port %d prot %d %d\n", 540 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, naddr, 541 sotype, resvport ? "r" : "", port, protocol, vers)); 542 *nsop = nso; 543 } 544 return (error); 545} 546 547/* 548 * Destroy an nfs_socket structure. 549 */ 550void 551nfs_socket_destroy(struct nfs_socket *nso) 552{ 553 struct timespec ts = { 4, 0 }; 554 555 lck_mtx_lock(&nso->nso_lock); 556 nso->nso_flags |= NSO_DISCONNECTING; 557 if (nso->nso_flags & NSO_UPCALL) /* give upcall a chance to complete */ 558 msleep(&nso->nso_flags, &nso->nso_lock, PZERO-1, "nfswaitupcall", &ts); 559 lck_mtx_unlock(&nso->nso_lock); 560 sock_shutdown(nso->nso_so, SHUT_RDWR); 561 sock_close(nso->nso_so); 562 if (nso->nso_sotype == SOCK_STREAM) 563 nfs_rpc_record_state_cleanup(&nso->nso_rrs); 564 lck_mtx_destroy(&nso->nso_lock, nfs_request_grp); 565 if (nso->nso_saddr) 566 FREE(nso->nso_saddr, M_SONAME); 567 if (nso->nso_saddr2) 568 FREE(nso->nso_saddr2, M_SONAME); 569 NFS_SOCK_DBG(("nfs connect - socket %p destroyed\n", nso)); 570 FREE(nso, M_TEMP); 571} 572 573/* 574 * Set common socket options on an nfs_socket. 575 */ 576void 577nfs_socket_options(struct nfsmount *nmp, struct nfs_socket *nso) 578{ 579 /* 580 * Set socket send/receive timeouts 581 * - Receive timeout shouldn't matter because most receives are performed 582 * in the socket upcall non-blocking. 583 * - Send timeout should allow us to react to a blocked socket. 584 * Soft mounts will want to abort sooner. 585 */ 586 struct timeval timeo; 587 int on = 1, proto; 588 589 timeo.tv_usec = 0; 590 timeo.tv_sec = (NMFLAG(nmp, SOFT) || nfs_can_squish(nmp)) ? 5 : 60; 591 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 592 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 593 if (nso->nso_sotype == SOCK_STREAM) { 594 /* Assume that SOCK_STREAM always requires a connection */ 595 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); 596 /* set nodelay for TCP */ 597 sock_gettype(nso->nso_so, NULL, NULL, &proto); 598 if (proto == IPPROTO_TCP) 599 sock_setsockopt(nso->nso_so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 600 } 601 if (nso->nso_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ 602 int reserve = NFS_UDPSOCKBUF; 603 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve)); 604 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve)); 605 } 606 /* set SO_NOADDRERR to detect network changes ASAP */ 607 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); 608 /* just playin' it safe with upcalls */ 609 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); 610 /* socket should be interruptible if the mount is */ 611 if (!NMFLAG(nmp, INTR)) 612 sock_nointerrupt(nso->nso_so, 1); 613} 614 615/* 616 * Release resources held in an nfs_socket_search. 617 */ 618void 619nfs_socket_search_cleanup(struct nfs_socket_search *nss) 620{ 621 struct nfs_socket *nso, *nsonext; 622 623 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) { 624 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); 625 nss->nss_sockcnt--; 626 nfs_socket_destroy(nso); 627 } 628 if (nss->nss_sock) { 629 nfs_socket_destroy(nss->nss_sock); 630 nss->nss_sock = NULL; 631 } 632} 633 634/* 635 * Prefer returning certain errors over others. 636 * This function returns a ranking of the given error. 637 */ 638int 639nfs_connect_error_class(int error) 640{ 641 switch (error) { 642 case 0: 643 return (0); 644 case ETIMEDOUT: 645 case EAGAIN: 646 return (1); 647 case EPIPE: 648 case EADDRNOTAVAIL: 649 case ENETDOWN: 650 case ENETUNREACH: 651 case ENETRESET: 652 case ECONNABORTED: 653 case ECONNRESET: 654 case EISCONN: 655 case ENOTCONN: 656 case ESHUTDOWN: 657 case ECONNREFUSED: 658 case EHOSTDOWN: 659 case EHOSTUNREACH: 660 return (2); 661 case ERPCMISMATCH: 662 case EPROCUNAVAIL: 663 case EPROGMISMATCH: 664 case EPROGUNAVAIL: 665 return (3); 666 case EBADRPC: 667 return (4); 668 default: 669 return (5); 670 } 671} 672 673/* 674 * Make sure a socket search returns the best error. 675 */ 676void 677nfs_socket_search_update_error(struct nfs_socket_search *nss, int error) 678{ 679 if (nfs_connect_error_class(error) >= nfs_connect_error_class(nss->nss_error)) 680 nss->nss_error = error; 681} 682 683/* 684 * Continue the socket search until we have something to report. 685 */ 686int 687nfs_connect_search_loop(struct nfsmount *nmp, struct nfs_socket_search *nss) 688{ 689 struct nfs_socket *nso, *nsonext; 690 struct timeval now; 691 struct nfs_fs_location *fsl; 692 struct nfs_fs_server *fss; 693 struct sockaddr_storage ss; 694 char *addrstr; 695 int error, nomore = 0; 696 697loop: 698 microuptime(&now); 699 NFS_SOCK_DBG(("nfs connect %s search %ld\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, now.tv_sec)); 700 701 /* Time to start another socket? */ 702 while ((nss->nss_last < 0) || (nss->nss_sockcnt == 0) || 703 ((nss->nss_sockcnt < 4) && (now.tv_sec >= (nss->nss_last + 2)))) { 704 if (nmp->nm_sockflags & NMSOCK_UNMOUNT) 705 return (EINTR); 706 /* Find the next address to try... */ 707 /* Have we run out of locations? */ 708 if (!nomore && (nss->nss_last != -1) && !nfs_location_index_cmp(&nss->nss_nextloc, &nss->nss_startloc)) 709 nomore = 1; 710 if (nomore) { 711 if (nss->nss_last < 0) 712 nss->nss_last = now.tv_sec; 713 break; 714 } 715 /* Can we convert the address to a sockaddr? */ 716 fsl = nmp->nm_locations.nl_locations[nss->nss_nextloc.nli_loc]; 717 fss = fsl->nl_servers[nss->nss_nextloc.nli_serv]; 718 addrstr = fss->ns_addresses[nss->nss_nextloc.nli_addr]; 719 if (!nfs_uaddr2sockaddr(addrstr, (struct sockaddr*)&ss)) { 720 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); 721 nss->nss_last = -2; 722 continue; 723 } 724 /* Check that socket family is acceptable. */ 725 if (nmp->nm_sofamily && (ss.ss_family != nmp->nm_sofamily)) { 726 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); 727 nss->nss_last = -2; 728 continue; 729 } 730 731 /* Create the socket. */ 732 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nss->nss_sotype, 733 nss->nss_port, nss->nss_protocol, nss->nss_version, 734 ((nss->nss_protocol == NFS_PROG) && NMFLAG(nmp, RESVPORT)), &nso); 735 if (error) 736 return (error); 737 738 nso->nso_location = nss->nss_nextloc; 739 nso->nso_wake = nss; 740 error = sock_setupcall(nso->nso_so, nfs_connect_upcall, nso); 741 if (error) { 742 lck_mtx_lock(&nso->nso_lock); 743 nso->nso_error = error; 744 nso->nso_flags |= NSO_DEAD; 745 lck_mtx_unlock(&nso->nso_lock); 746 } 747 748 TAILQ_INSERT_TAIL(&nss->nss_socklist, nso, nso_link); 749 nss->nss_sockcnt++; 750 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); 751 752 nss->nss_last = now.tv_sec; 753 } 754 755 /* check each active socket and try to push it along */ 756 TAILQ_FOREACH(nso, &nss->nss_socklist, nso_link) { 757 lck_mtx_lock(&nso->nso_lock); 758 if (!(nso->nso_flags & NSO_CONNECTED)) { 759 if ((nso->nso_sotype != SOCK_STREAM) && NMFLAG(nmp, NOCONNECT)) { 760 /* no connection needed, just say it's already connected */ 761 nso->nso_flags |= NSO_CONNECTED; 762 NFS_SOCK_DBG(("nfs connect %s UDP socket %p noconnect\n", 763 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); 764 } else if (!(nso->nso_flags & NSO_CONNECTING)) { 765 /* initiate the connection */ 766 nso->nso_flags |= NSO_CONNECTING; 767 lck_mtx_unlock(&nso->nso_lock); 768 NFS_SOCK_DBG(("nfs connect %s connecting socket %p\n", 769 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); 770 error = sock_connect(nso->nso_so, nso->nso_saddr, MSG_DONTWAIT); 771 lck_mtx_lock(&nso->nso_lock); 772 if (error && (error != EINPROGRESS)) { 773 nso->nso_error = error; 774 nso->nso_flags |= NSO_DEAD; 775 lck_mtx_unlock(&nso->nso_lock); 776 continue; 777 } 778 } 779 if (nso->nso_flags & NSO_CONNECTING) { 780 /* check the connection */ 781 if (sock_isconnected(nso->nso_so)) { 782 NFS_SOCK_DBG(("nfs connect %s socket %p is connected\n", 783 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); 784 nso->nso_flags &= ~NSO_CONNECTING; 785 nso->nso_flags |= NSO_CONNECTED; 786 } else { 787 int optlen = sizeof(error); 788 error = 0; 789 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &error, &optlen); 790 if (error) { /* we got an error on the socket */ 791 NFS_SOCK_DBG(("nfs connect %s socket %p connection error %d\n", 792 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error)); 793 if (nss->nss_flags & NSS_VERBOSE) 794 log(LOG_INFO, "nfs_connect: socket error %d for %s\n", 795 error, vfs_statfs(nmp->nm_mountp)->f_mntfromname); 796 nso->nso_error = error; 797 nso->nso_flags |= NSO_DEAD; 798 lck_mtx_unlock(&nso->nso_lock); 799 continue; 800 } 801 } 802 } 803 if (nso->nso_flags & NSO_CONNECTED) 804 nfs_socket_options(nmp, nso); 805 } 806 if (!(nso->nso_flags & NSO_CONNECTED)) { 807 lck_mtx_unlock(&nso->nso_lock); 808 continue; 809 } 810 if (!(nso->nso_flags & (NSO_PINGING|NSO_VERIFIED)) || 811 ((nso->nso_sotype == SOCK_DGRAM) && (now.tv_sec >= nso->nso_reqtimestamp+2))) { 812 /* initiate a NULL RPC request */ 813 uint64_t xid = nso->nso_pingxid; 814 mbuf_t m, mreq = NULL; 815 struct msghdr msg; 816 size_t reqlen, sentlen; 817 uint32_t vers; 818 819 if (!(vers = nso->nso_version)) { 820 if (nso->nso_protocol == PMAPPROG) 821 vers = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; 822 else if (nso->nso_protocol == NFS_PROG) 823 vers = NFS_VER3; 824 } 825 lck_mtx_unlock(&nso->nso_lock); 826 error = nfsm_rpchead2(nmp, nso->nso_sotype, nso->nso_protocol, vers, 0, RPCAUTH_SYS, 827 vfs_context_ucred(vfs_context_kernel()), NULL, NULL, &xid, &mreq); 828 lck_mtx_lock(&nso->nso_lock); 829 if (!error) { 830 nso->nso_flags |= NSO_PINGING; 831 nso->nso_pingxid = R_XID32(xid); 832 nso->nso_reqtimestamp = now.tv_sec; 833 bzero(&msg, sizeof(msg)); 834 if ((nso->nso_sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so)) { 835 msg.msg_name = nso->nso_saddr; 836 msg.msg_namelen = nso->nso_saddr->sa_len; 837 } 838 for (reqlen=0, m=mreq; m; m = mbuf_next(m)) 839 reqlen += mbuf_len(m); 840 lck_mtx_unlock(&nso->nso_lock); 841 error = sock_sendmbuf(nso->nso_so, &msg, mreq, 0, &sentlen); 842 NFS_SOCK_DBG(("nfs connect %s verifying socket %p send rv %d\n", 843 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error)); 844 lck_mtx_lock(&nso->nso_lock); 845 if (!error && (sentlen != reqlen)) 846 error = ETIMEDOUT; 847 } 848 if (error) { 849 nso->nso_error = error; 850 nso->nso_flags |= NSO_DEAD; 851 lck_mtx_unlock(&nso->nso_lock); 852 continue; 853 } 854 } 855 if (nso->nso_flags & NSO_VERIFIED) { 856 /* WOOHOO!! This socket looks good! */ 857 NFS_SOCK_DBG(("nfs connect %s socket %p verified\n", 858 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); 859 if (!nso->nso_version) { 860 /* If the version isn't set, the default must have worked. */ 861 if (nso->nso_protocol == PMAPPROG) 862 nso->nso_version = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; 863 if (nso->nso_protocol == NFS_PROG) 864 nso->nso_version = NFS_VER3; 865 } 866 lck_mtx_unlock(&nso->nso_lock); 867 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); 868 nss->nss_sockcnt--; 869 nss->nss_sock = nso; 870 break; 871 } 872 lck_mtx_unlock(&nso->nso_lock); 873 } 874 875 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) { 876 lck_mtx_lock(&nso->nso_lock); 877 if (now.tv_sec >= (nso->nso_timestamp + nss->nss_timeo)) { 878 /* took too long */ 879 NFS_SOCK_DBG(("nfs connect %s socket %p timed out\n", 880 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); 881 nso->nso_error = ETIMEDOUT; 882 nso->nso_flags |= NSO_DEAD; 883 } 884 if (!(nso->nso_flags & NSO_DEAD)) { 885 lck_mtx_unlock(&nso->nso_lock); 886 continue; 887 } 888 lck_mtx_unlock(&nso->nso_lock); 889 NFS_SOCK_DBG(("nfs connect %s reaping socket %p %d\n", 890 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, nso->nso_error)); 891 nfs_socket_search_update_error(nss, nso->nso_error); 892 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); 893 nss->nss_sockcnt--; 894 nfs_socket_destroy(nso); 895 if (!nomore) 896 nss->nss_last = -2; 897 } 898 899 /* 900 * Keep looping if we haven't found a socket yet and we have more 901 * sockets to (continue to) try. 902 */ 903 error = 0; 904 if (!nss->nss_sock && (!TAILQ_EMPTY(&nss->nss_socklist) || !nomore)) { 905 /* log a warning if connect is taking a while */ 906 if (((now.tv_sec - nss->nss_timestamp) >= 30) && ((nss->nss_flags & (NSS_VERBOSE|NSS_WARNED)) == NSS_VERBOSE)) { 907 log(LOG_INFO, "nfs_connect: socket connect taking a while for %s\n", 908 vfs_statfs(nmp->nm_mountp)->f_mntfromname); 909 nss->nss_flags |= NSS_WARNED; 910 } 911 if (nmp->nm_sockflags & NMSOCK_UNMOUNT) 912 return (EINTR); 913 if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0))) 914 return (error); 915 if (nss->nss_last >= 0) 916 tsleep(nss, PSOCK, "nfs_connect_search_wait", hz); 917 goto loop; 918 } 919 920 NFS_SOCK_DBG(("nfs connect %s returning %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, error)); 921 return (error); 922} 923 924/* 925 * Initialize a new NFS connection. 926 * 927 * Search for a location to connect a socket to and initialize the connection. 928 * 929 * An NFS mount may have multiple locations/servers/addresses available. 930 * We attempt to connect to each one asynchronously and will start 931 * several sockets in parallel if other locations are slow to answer. 932 * We'll use the first NFS socket we can successfully set up. 933 * 934 * The search may involve contacting the portmapper service first. 935 * 936 * A mount's initial connection may require negotiating some parameters such 937 * as socket type and NFS version. 938 */ 939int 940nfs_connect(struct nfsmount *nmp, int verbose, int timeo) 941{ 942 struct nfs_socket_search nss; 943 struct nfs_socket *nso, *nsonfs; 944 struct sockaddr_storage ss; 945 struct sockaddr *saddr, *oldsaddr; 946 sock_upcall upcall; 947 struct timeval now, start; 948 int error, savederror, nfsvers; 949 uint8_t sotype = nmp->nm_sotype ? nmp->nm_sotype : SOCK_STREAM; 950 fhandle_t *fh = NULL; 951 char *path = NULL; 952 in_port_t port; 953 954 /* paranoia... check that we have at least one address in the locations */ 955 uint32_t loc, serv; 956 for (loc=0; loc < nmp->nm_locations.nl_numlocs; loc++) { 957 for (serv=0; serv < nmp->nm_locations.nl_locations[loc]->nl_servcount; serv++) { 958 if (nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount) 959 break; 960 NFS_SOCK_DBG(("nfs connect %s search, server %s has no addresses\n", 961 vfs_statfs(nmp->nm_mountp)->f_mntfromname, 962 nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name)); 963 } 964 if (serv < nmp->nm_locations.nl_locations[loc]->nl_servcount) 965 break; 966 } 967 if (loc >= nmp->nm_locations.nl_numlocs) { 968 NFS_SOCK_DBG(("nfs connect %s search failed, no addresses\n", 969 vfs_statfs(nmp->nm_mountp)->f_mntfromname)); 970 return (EINVAL); 971 } 972 973 lck_mtx_lock(&nmp->nm_lock); 974 nmp->nm_sockflags |= NMSOCK_CONNECTING; 975 nmp->nm_nss = &nss; 976 lck_mtx_unlock(&nmp->nm_lock); 977 microuptime(&start); 978 savederror = error = 0; 979 980tryagain: 981 /* initialize socket search state */ 982 bzero(&nss, sizeof(nss)); 983 nss.nss_error = savederror; 984 TAILQ_INIT(&nss.nss_socklist); 985 nss.nss_sotype = sotype; 986 nss.nss_startloc = nmp->nm_locations.nl_current; 987 nss.nss_timestamp = start.tv_sec; 988 nss.nss_timeo = timeo; 989 if (verbose) 990 nss.nss_flags |= NSS_VERBOSE; 991 992 /* First time connecting, we may need to negotiate some things */ 993 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { 994 if (!nmp->nm_vers) { 995 /* No NFS version specified... */ 996 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) { 997 /* ...connect to portmapper first if we (may) need any ports. */ 998 nss.nss_port = PMAPPORT; 999 nss.nss_protocol = PMAPPROG; 1000 nss.nss_version = 0; 1001 } else { 1002 /* ...connect to NFS port first. */ 1003 nss.nss_port = nmp->nm_nfsport; 1004 nss.nss_protocol = NFS_PROG; 1005 nss.nss_version = 0; 1006 } 1007 } else if (nmp->nm_vers >= NFS_VER4) { 1008 /* For NFSv4, we use the given (or default) port. */ 1009 nss.nss_port = nmp->nm_nfsport ? nmp->nm_nfsport : NFS_PORT; 1010 nss.nss_protocol = NFS_PROG; 1011 nss.nss_version = 4; 1012 } else { 1013 /* For NFSv3/v2... */ 1014 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) { 1015 /* ...connect to portmapper first if we need any ports. */ 1016 nss.nss_port = PMAPPORT; 1017 nss.nss_protocol = PMAPPROG; 1018 nss.nss_version = 0; 1019 } else { 1020 /* ...connect to NFS port first. */ 1021 nss.nss_port = nmp->nm_nfsport; 1022 nss.nss_protocol = NFS_PROG; 1023 nss.nss_version = nmp->nm_vers; 1024 } 1025 } 1026 NFS_SOCK_DBG(("nfs connect first %s, so type %d port %d prot %d %d\n", 1027 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port, 1028 nss.nss_protocol, nss.nss_version)); 1029 } else { 1030 /* we've connected before, just connect to NFS port */ 1031 if (!nmp->nm_nfsport) { 1032 /* need to ask portmapper which port that would be */ 1033 nss.nss_port = PMAPPORT; 1034 nss.nss_protocol = PMAPPROG; 1035 nss.nss_version = 0; 1036 } else { 1037 nss.nss_port = nmp->nm_nfsport; 1038 nss.nss_protocol = NFS_PROG; 1039 nss.nss_version = nmp->nm_vers; 1040 } 1041 NFS_SOCK_DBG(("nfs connect %s, so type %d port %d prot %d %d\n", 1042 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port, 1043 nss.nss_protocol, nss.nss_version)); 1044 } 1045 1046 /* Set next location to first valid location. */ 1047 /* If start location is invalid, find next location. */ 1048 nss.nss_nextloc = nss.nss_startloc; 1049 if ((nss.nss_nextloc.nli_serv >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servcount) || 1050 (nss.nss_nextloc.nli_addr >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servers[nss.nss_nextloc.nli_serv]->ns_addrcount)) { 1051 nfs_location_next(&nmp->nm_locations, &nss.nss_nextloc); 1052 if (!nfs_location_index_cmp(&nss.nss_nextloc, &nss.nss_startloc)) { 1053 NFS_SOCK_DBG(("nfs connect %s search failed, couldn't find a valid location index\n", 1054 vfs_statfs(nmp->nm_mountp)->f_mntfromname)); 1055 return (ENOENT); 1056 } 1057 } 1058 nss.nss_last = -1; 1059 1060keepsearching: 1061 1062 error = nfs_connect_search_loop(nmp, &nss); 1063 if (error || !nss.nss_sock) { 1064 /* search failed */ 1065 nfs_socket_search_cleanup(&nss); 1066 if (!error && (nss.nss_sotype == SOCK_STREAM) && !nmp->nm_sotype && (nmp->nm_vers < NFS_VER4)) { 1067 /* Try using UDP */ 1068 sotype = SOCK_DGRAM; 1069 savederror = nss.nss_error; 1070 NFS_SOCK_DBG(("nfs connect %s TCP failed %d %d, trying UDP\n", 1071 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error)); 1072 goto tryagain; 1073 } 1074 if (!error) 1075 error = nss.nss_error ? nss.nss_error : ETIMEDOUT; 1076 lck_mtx_lock(&nmp->nm_lock); 1077 nmp->nm_sockflags &= ~NMSOCK_CONNECTING; 1078 nmp->nm_nss = NULL; 1079 lck_mtx_unlock(&nmp->nm_lock); 1080 if (nss.nss_flags & NSS_WARNED) 1081 log(LOG_INFO, "nfs_connect: socket connect aborted for %s\n", 1082 vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1083 if (fh) 1084 FREE(fh, M_TEMP); 1085 if (path) 1086 FREE_ZONE(path, MAXPATHLEN, M_NAMEI); 1087 NFS_SOCK_DBG(("nfs connect %s search failed, returning %d\n", 1088 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error)); 1089 return (error); 1090 } 1091 1092 /* try to use nss_sock */ 1093 nso = nss.nss_sock; 1094 nss.nss_sock = NULL; 1095 1096 /* We may be speaking to portmap first... to determine port(s). */ 1097 if (nso->nso_saddr->sa_family == AF_INET) 1098 port = ntohs(((struct sockaddr_in*)nso->nso_saddr)->sin_port); 1099 else 1100 port = ntohs(((struct sockaddr_in6*)nso->nso_saddr)->sin6_port); 1101 if (port == PMAPPORT) { 1102 /* Use this portmapper port to get the port #s we need. */ 1103 NFS_SOCK_DBG(("nfs connect %s got portmapper socket %p\n", 1104 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); 1105 1106 /* remove the connect upcall so nfs_portmap_lookup() can use this socket */ 1107 sock_setupcall(nso->nso_so, NULL, NULL); 1108 1109 /* Set up socket address and port for NFS socket. */ 1110 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len); 1111 1112 /* If NFS version not set, try NFSv3 then NFSv2. */ 1113 nfsvers = nmp->nm_vers ? nmp->nm_vers : NFS_VER3; 1114 1115 if (!(port = nmp->nm_nfsport)) { 1116 if (ss.ss_family == AF_INET) 1117 ((struct sockaddr_in*)&ss)->sin_port = htons(0); 1118 else if (ss.ss_family == AF_INET6) 1119 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0); 1120 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, 1121 nso->nso_so, NFS_PROG, nfsvers, 1122 (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo); 1123 if (!error) { 1124 if (ss.ss_family == AF_INET) 1125 port = ntohs(((struct sockaddr_in*)&ss)->sin_port); 1126 else if (ss.ss_family == AF_INET6) 1127 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); 1128 if (!port) 1129 error = EPROGUNAVAIL; 1130 } 1131 if (error && !nmp->nm_vers) { 1132 nfsvers = NFS_VER2; 1133 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, 1134 nso->nso_so, NFS_PROG, nfsvers, 1135 (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo); 1136 if (!error) { 1137 if (ss.ss_family == AF_INET) 1138 port = ntohs(((struct sockaddr_in*)&ss)->sin_port); 1139 else if (ss.ss_family == AF_INET6) 1140 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); 1141 if (!port) 1142 error = EPROGUNAVAIL; 1143 } 1144 } 1145 if (error) { 1146 nfs_socket_search_update_error(&nss, error); 1147 nfs_socket_destroy(nso); 1148 goto keepsearching; 1149 } 1150 } 1151 /* Create NFS protocol socket and add it to the list of sockets. */ 1152 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nso->nso_sotype, port, 1153 NFS_PROG, nfsvers, NMFLAG(nmp, RESVPORT), &nsonfs); 1154 if (error) { 1155 nfs_socket_search_update_error(&nss, error); 1156 nfs_socket_destroy(nso); 1157 goto keepsearching; 1158 } 1159 nsonfs->nso_location = nso->nso_location; 1160 nsonfs->nso_wake = &nss; 1161 error = sock_setupcall(nsonfs->nso_so, nfs_connect_upcall, nsonfs); 1162 if (error) { 1163 nfs_socket_search_update_error(&nss, error); 1164 nfs_socket_destroy(nsonfs); 1165 nfs_socket_destroy(nso); 1166 goto keepsearching; 1167 } 1168 TAILQ_INSERT_TAIL(&nss.nss_socklist, nsonfs, nso_link); 1169 nss.nss_sockcnt++; 1170 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) { 1171 /* Set up socket address and port for MOUNT socket. */ 1172 error = 0; 1173 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len); 1174 port = nmp->nm_mountport; 1175 if (ss.ss_family == AF_INET) 1176 ((struct sockaddr_in*)&ss)->sin_port = htons(port); 1177 else if (ss.ss_family == AF_INET6) 1178 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port); 1179 if (!port) { 1180 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */ 1181 /* If NFS version is unknown, optimistically choose for NFSv3. */ 1182 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3; 1183 int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP; 1184 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, 1185 nso->nso_so, RPCPROG_MNT, mntvers, mntproto, timeo); 1186 } 1187 if (!error) { 1188 if (ss.ss_family == AF_INET) 1189 port = ntohs(((struct sockaddr_in*)&ss)->sin_port); 1190 else if (ss.ss_family == AF_INET6) 1191 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); 1192 if (!port) 1193 error = EPROGUNAVAIL; 1194 } 1195 /* create sockaddr for MOUNT */ 1196 if (!error) 1197 MALLOC(nsonfs->nso_saddr2, struct sockaddr *, ss.ss_len, M_SONAME, M_WAITOK|M_ZERO); 1198 if (!error && !nsonfs->nso_saddr2) 1199 error = ENOMEM; 1200 if (!error) 1201 bcopy(&ss, nsonfs->nso_saddr2, ss.ss_len); 1202 if (error) { 1203 lck_mtx_lock(&nsonfs->nso_lock); 1204 nsonfs->nso_error = error; 1205 nsonfs->nso_flags |= NSO_DEAD; 1206 lck_mtx_unlock(&nsonfs->nso_lock); 1207 } 1208 } 1209 nfs_socket_destroy(nso); 1210 goto keepsearching; 1211 } 1212 1213 /* nso is an NFS socket */ 1214 NFS_SOCK_DBG(("nfs connect %s got NFS socket %p\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); 1215 1216 /* If NFS version wasn't specified, it was determined during the connect. */ 1217 nfsvers = nmp->nm_vers ? nmp->nm_vers : (int)nso->nso_version; 1218 1219 /* Perform MOUNT call for initial NFSv2/v3 connection/mount. */ 1220 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) { 1221 error = 0; 1222 saddr = nso->nso_saddr2; 1223 if (!saddr) { 1224 /* Need sockaddr for MOUNT port */ 1225 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len); 1226 port = nmp->nm_mountport; 1227 if (ss.ss_family == AF_INET) 1228 ((struct sockaddr_in*)&ss)->sin_port = htons(port); 1229 else if (ss.ss_family == AF_INET6) 1230 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port); 1231 if (!port) { 1232 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */ 1233 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3; 1234 int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP; 1235 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, 1236 NULL, RPCPROG_MNT, mntvers, mntproto, timeo); 1237 if (ss.ss_family == AF_INET) 1238 port = ntohs(((struct sockaddr_in*)&ss)->sin_port); 1239 else if (ss.ss_family == AF_INET6) 1240 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); 1241 } 1242 if (!error) { 1243 if (port) 1244 saddr = (struct sockaddr*)&ss; 1245 else 1246 error = EPROGUNAVAIL; 1247 } 1248 } 1249 if (saddr) 1250 MALLOC(fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK|M_ZERO); 1251 if (saddr && fh) 1252 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); 1253 if (!saddr || !fh || !path) { 1254 if (!error) 1255 error = ENOMEM; 1256 if (fh) 1257 FREE(fh, M_TEMP); 1258 if (path) 1259 FREE_ZONE(path, MAXPATHLEN, M_NAMEI); 1260 fh = NULL; 1261 path = NULL; 1262 nfs_socket_search_update_error(&nss, error); 1263 nfs_socket_destroy(nso); 1264 goto keepsearching; 1265 } 1266 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, path, MAXPATHLEN, 1); 1267 error = nfs3_mount_rpc(nmp, saddr, nso->nso_sotype, nfsvers, 1268 path, vfs_context_current(), timeo, fh, &nmp->nm_servsec); 1269 NFS_SOCK_DBG(("nfs connect %s socket %p mount %d\n", 1270 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error)); 1271 if (!error) { 1272 /* Make sure we can agree on a security flavor. */ 1273 int o, s; /* indices into mount option and server security flavor lists */ 1274 int found = 0; 1275 1276 if ((nfsvers == NFS_VER3) && !nmp->nm_servsec.count) { 1277 /* Some servers return an empty list to indicate RPCAUTH_SYS? */ 1278 nmp->nm_servsec.count = 1; 1279 nmp->nm_servsec.flavors[0] = RPCAUTH_SYS; 1280 } 1281 if (nmp->nm_sec.count) { 1282 /* Choose the first flavor in our list that the server supports. */ 1283 if (!nmp->nm_servsec.count) { 1284 /* we don't know what the server supports, just use our first choice */ 1285 nmp->nm_auth = nmp->nm_sec.flavors[0]; 1286 found = 1; 1287 } 1288 for (o=0; !found && (o < nmp->nm_sec.count); o++) 1289 for (s=0; !found && (s < nmp->nm_servsec.count); s++) 1290 if (nmp->nm_sec.flavors[o] == nmp->nm_servsec.flavors[s]) { 1291 nmp->nm_auth = nmp->nm_sec.flavors[o]; 1292 found = 1; 1293 } 1294 } else { 1295 /* Choose the first one we support from the server's list. */ 1296 if (!nmp->nm_servsec.count) { 1297 nmp->nm_auth = RPCAUTH_SYS; 1298 found = 1; 1299 } 1300 for (s=0; s < nmp->nm_servsec.count; s++) 1301 switch (nmp->nm_servsec.flavors[s]) { 1302 case RPCAUTH_SYS: 1303 /* prefer RPCAUTH_SYS to RPCAUTH_NONE */ 1304 if (found && (nmp->nm_auth == RPCAUTH_NONE)) 1305 found = 0; 1306 case RPCAUTH_NONE: 1307 case RPCAUTH_KRB5: 1308 case RPCAUTH_KRB5I: 1309 case RPCAUTH_KRB5P: 1310 if (!found) { 1311 nmp->nm_auth = nmp->nm_servsec.flavors[s]; 1312 found = 1; 1313 } 1314 break; 1315 } 1316 } 1317 error = !found ? EAUTH : 0; 1318 } 1319 FREE_ZONE(path, MAXPATHLEN, M_NAMEI); 1320 path = NULL; 1321 if (error) { 1322 nfs_socket_search_update_error(&nss, error); 1323 FREE(fh, M_TEMP); 1324 fh = NULL; 1325 nfs_socket_destroy(nso); 1326 goto keepsearching; 1327 } 1328 if (nmp->nm_fh) 1329 FREE(nmp->nm_fh, M_TEMP); 1330 nmp->nm_fh = fh; 1331 fh = NULL; 1332 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_CALLUMNT); 1333 } 1334 1335 /* put the real upcall in place */ 1336 upcall = (nso->nso_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv; 1337 error = sock_setupcall(nso->nso_so, upcall, nmp); 1338 if (error) { 1339 nfs_socket_search_update_error(&nss, error); 1340 nfs_socket_destroy(nso); 1341 goto keepsearching; 1342 } 1343 1344 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { 1345 /* set mntfromname to this location */ 1346 if (!NM_OMATTR_GIVEN(nmp, MNTFROM)) 1347 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, 1348 vfs_statfs(nmp->nm_mountp)->f_mntfromname, 1349 sizeof(vfs_statfs(nmp->nm_mountp)->f_mntfromname), 0); 1350 /* some negotiated values need to remain unchanged for the life of the mount */ 1351 if (!nmp->nm_sotype) 1352 nmp->nm_sotype = nso->nso_sotype; 1353 if (!nmp->nm_vers) { 1354 nmp->nm_vers = nfsvers; 1355 /* If we negotiated NFSv4, set nm_nfsport if we ended up on the standard NFS port */ 1356 if ((nfsvers >= NFS_VER4) && !NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) { 1357 if (nso->nso_saddr->sa_family == AF_INET) 1358 port = ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port); 1359 else if (nso->nso_saddr->sa_family == AF_INET6) 1360 port = ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port); 1361 else 1362 port = 0; 1363 if (port == NFS_PORT) 1364 nmp->nm_nfsport = NFS_PORT; 1365 } 1366 } 1367 /* do some version-specific pre-mount set up */ 1368 if (nmp->nm_vers >= NFS_VER4) { 1369 microtime(&now); 1370 nmp->nm_mounttime = ((uint64_t)now.tv_sec << 32) | now.tv_usec; 1371 if (!NMFLAG(nmp, NOCALLBACK)) 1372 nfs4_mount_callback_setup(nmp); 1373 } 1374 } 1375 1376 /* Initialize NFS socket state variables */ 1377 lck_mtx_lock(&nmp->nm_lock); 1378 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = 1379 nmp->nm_srtt[3] = (NFS_TIMEO << 3); 1380 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 1381 nmp->nm_sdrtt[3] = 0; 1382 if (nso->nso_sotype == SOCK_DGRAM) { 1383 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 1384 nmp->nm_sent = 0; 1385 } else if (nso->nso_sotype == SOCK_STREAM) { 1386 nmp->nm_timeouts = 0; 1387 } 1388 nmp->nm_sockflags &= ~NMSOCK_CONNECTING; 1389 nmp->nm_sockflags |= NMSOCK_SETUP; 1390 /* move the socket to the mount structure */ 1391 nmp->nm_nso = nso; 1392 oldsaddr = nmp->nm_saddr; 1393 nmp->nm_saddr = nso->nso_saddr; 1394 lck_mtx_unlock(&nmp->nm_lock); 1395 error = nfs_connect_setup(nmp); 1396 lck_mtx_lock(&nmp->nm_lock); 1397 nmp->nm_sockflags &= ~NMSOCK_SETUP; 1398 if (!error) { 1399 nmp->nm_sockflags |= NMSOCK_READY; 1400 wakeup(&nmp->nm_sockflags); 1401 } 1402 if (error) { 1403 NFS_SOCK_DBG(("nfs connect %s socket %p setup failed %d\n", 1404 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error)); 1405 nfs_socket_search_update_error(&nss, error); 1406 nmp->nm_saddr = oldsaddr; 1407 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { 1408 /* undo settings made prior to setup */ 1409 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_SOCKET_TYPE)) 1410 nmp->nm_sotype = 0; 1411 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_VERSION)) { 1412 if (nmp->nm_vers >= NFS_VER4) { 1413 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) 1414 nmp->nm_nfsport = 0; 1415 if (nmp->nm_cbid) 1416 nfs4_mount_callback_shutdown(nmp); 1417 if (IS_VALID_CRED(nmp->nm_mcred)) 1418 kauth_cred_unref(&nmp->nm_mcred); 1419 bzero(&nmp->nm_un, sizeof(nmp->nm_un)); 1420 } 1421 nmp->nm_vers = 0; 1422 } 1423 } 1424 lck_mtx_unlock(&nmp->nm_lock); 1425 nmp->nm_nso = NULL; 1426 nfs_socket_destroy(nso); 1427 goto keepsearching; 1428 } 1429 1430 /* update current location */ 1431 if ((nmp->nm_locations.nl_current.nli_flags & NLI_VALID) && 1432 (nmp->nm_locations.nl_current.nli_serv != nso->nso_location.nli_serv)) { 1433 /* server has changed, we should initiate failover/recovery */ 1434 // XXX 1435 } 1436 nmp->nm_locations.nl_current = nso->nso_location; 1437 nmp->nm_locations.nl_current.nli_flags |= NLI_VALID; 1438 1439 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { 1440 /* We have now successfully connected... make a note of it. */ 1441 nmp->nm_sockflags |= NMSOCK_HASCONNECTED; 1442 } 1443 1444 lck_mtx_unlock(&nmp->nm_lock); 1445 if (oldsaddr) 1446 FREE(oldsaddr, M_SONAME); 1447 1448 if (nss.nss_flags & NSS_WARNED) 1449 log(LOG_INFO, "nfs_connect: socket connect completed for %s\n", 1450 vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1451 1452 nmp->nm_nss = NULL; 1453 nfs_socket_search_cleanup(&nss); 1454 if (fh) 1455 FREE(fh, M_TEMP); 1456 if (path) 1457 FREE_ZONE(path, MAXPATHLEN, M_NAMEI); 1458 NFS_SOCK_DBG(("nfs connect %s success\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname)); 1459 return (0); 1460} 1461 1462 1463/* setup & confirm socket connection is functional */ 1464int 1465nfs_connect_setup(struct nfsmount *nmp) 1466{ 1467 int error = 0; 1468 1469 if (nmp->nm_vers >= NFS_VER4) { 1470 if (nmp->nm_state & NFSSTA_CLIENTID) { 1471 /* first, try to renew our current state */ 1472 error = nfs4_renew(nmp, R_SETUP); 1473 if ((error == NFSERR_ADMIN_REVOKED) || 1474 (error == NFSERR_CB_PATH_DOWN) || 1475 (error == NFSERR_EXPIRED) || 1476 (error == NFSERR_LEASE_MOVED) || 1477 (error == NFSERR_STALE_CLIENTID)) { 1478 lck_mtx_lock(&nmp->nm_lock); 1479 nfs_need_recover(nmp, error); 1480 lck_mtx_unlock(&nmp->nm_lock); 1481 } 1482 } 1483 error = nfs4_setclientid(nmp); 1484 } 1485 return (error); 1486} 1487 1488/* 1489 * NFS socket reconnect routine: 1490 * Called when a connection is broken. 1491 * - disconnect the old socket 1492 * - nfs_connect() again 1493 * - set R_MUSTRESEND for all outstanding requests on mount point 1494 * If this fails the mount point is DEAD! 1495 */ 1496int 1497nfs_reconnect(struct nfsmount *nmp) 1498{ 1499 struct nfsreq *rq; 1500 struct timeval now; 1501 thread_t thd = current_thread(); 1502 int error, wentdown = 0, verbose = 1; 1503 time_t lastmsg; 1504 int timeo; 1505 1506 microuptime(&now); 1507 lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay); 1508 1509 nfs_disconnect(nmp); 1510 1511 1512 lck_mtx_lock(&nmp->nm_lock); 1513 timeo = nfs_is_squishy(nmp) ? 8 : 30; 1514 lck_mtx_unlock(&nmp->nm_lock); 1515 1516 while ((error = nfs_connect(nmp, verbose, timeo))) { 1517 verbose = 0; 1518 nfs_disconnect(nmp); 1519 if ((error == EINTR) || (error == ERESTART)) 1520 return (EINTR); 1521 if (error == EIO) 1522 return (EIO); 1523 microuptime(&now); 1524 if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) { 1525 lastmsg = now.tv_sec; 1526 nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect"); 1527 wentdown = 1; 1528 } 1529 lck_mtx_lock(&nmp->nm_lock); 1530 if (!(nmp->nm_state & NFSSTA_MOUNTED)) { 1531 /* we're not yet completely mounted and */ 1532 /* we can't reconnect, so we fail */ 1533 lck_mtx_unlock(&nmp->nm_lock); 1534 return (error); 1535 } 1536 nfs_mount_check_dead_timeout(nmp); 1537 if ((error = nfs_sigintr(nmp, NULL, thd, 1))) { 1538 lck_mtx_unlock(&nmp->nm_lock); 1539 return (error); 1540 } 1541 lck_mtx_unlock(&nmp->nm_lock); 1542 tsleep(&lbolt, PSOCK, "nfs_reconnect_delay", 0); 1543 if ((error = nfs_sigintr(nmp, NULL, thd, 0))) 1544 return (error); 1545 } 1546 1547 if (wentdown) 1548 nfs_up(nmp, thd, NFSSTA_TIMEO, "connected"); 1549 1550 /* 1551 * Loop through outstanding request list and mark all requests 1552 * as needing a resend. (Though nfs_need_reconnect() probably 1553 * marked them all already.) 1554 */ 1555 lck_mtx_lock(nfs_request_mutex); 1556 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) { 1557 if (rq->r_nmp == nmp) { 1558 lck_mtx_lock(&rq->r_mtx); 1559 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) { 1560 rq->r_flags |= R_MUSTRESEND; 1561 rq->r_rtt = -1; 1562 wakeup(rq); 1563 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) 1564 nfs_asyncio_resend(rq); 1565 } 1566 lck_mtx_unlock(&rq->r_mtx); 1567 } 1568 } 1569 lck_mtx_unlock(nfs_request_mutex); 1570 return (0); 1571} 1572 1573/* 1574 * NFS disconnect. Clean up and unlink. 1575 */ 1576void 1577nfs_disconnect(struct nfsmount *nmp) 1578{ 1579 struct nfs_socket *nso; 1580 1581 lck_mtx_lock(&nmp->nm_lock); 1582tryagain: 1583 if (nmp->nm_nso) { 1584 struct timespec ts = { 1, 0 }; 1585 if (nmp->nm_state & NFSSTA_SENDING) { /* wait for sending to complete */ 1586 nmp->nm_state |= NFSSTA_WANTSND; 1587 msleep(&nmp->nm_state, &nmp->nm_lock, PZERO-1, "nfswaitsending", &ts); 1588 goto tryagain; 1589 } 1590 if (nmp->nm_sockflags & NMSOCK_POKE) { /* wait for poking to complete */ 1591 msleep(&nmp->nm_sockflags, &nmp->nm_lock, PZERO-1, "nfswaitpoke", &ts); 1592 goto tryagain; 1593 } 1594 nmp->nm_sockflags |= NMSOCK_DISCONNECTING; 1595 nmp->nm_sockflags &= ~NMSOCK_READY; 1596 nso = nmp->nm_nso; 1597 nmp->nm_nso = NULL; 1598 if (nso->nso_saddr == nmp->nm_saddr) 1599 nso->nso_saddr = NULL; 1600 lck_mtx_unlock(&nmp->nm_lock); 1601 nfs_socket_destroy(nso); 1602 lck_mtx_lock(&nmp->nm_lock); 1603 nmp->nm_sockflags &= ~NMSOCK_DISCONNECTING; 1604 lck_mtx_unlock(&nmp->nm_lock); 1605 } else { 1606 lck_mtx_unlock(&nmp->nm_lock); 1607 } 1608} 1609 1610/* 1611 * mark an NFS mount as needing a reconnect/resends. 1612 */ 1613void 1614nfs_need_reconnect(struct nfsmount *nmp) 1615{ 1616 struct nfsreq *rq; 1617 1618 lck_mtx_lock(&nmp->nm_lock); 1619 nmp->nm_sockflags &= ~(NMSOCK_READY|NMSOCK_SETUP); 1620 lck_mtx_unlock(&nmp->nm_lock); 1621 1622 /* 1623 * Loop through outstanding request list and 1624 * mark all requests as needing a resend. 1625 */ 1626 lck_mtx_lock(nfs_request_mutex); 1627 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) { 1628 if (rq->r_nmp == nmp) { 1629 lck_mtx_lock(&rq->r_mtx); 1630 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) { 1631 rq->r_flags |= R_MUSTRESEND; 1632 rq->r_rtt = -1; 1633 wakeup(rq); 1634 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) 1635 nfs_asyncio_resend(rq); 1636 } 1637 lck_mtx_unlock(&rq->r_mtx); 1638 } 1639 } 1640 lck_mtx_unlock(nfs_request_mutex); 1641} 1642 1643 1644/* 1645 * thread to handle miscellaneous async NFS socket work (reconnects/resends) 1646 */ 1647void 1648nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) 1649{ 1650 struct nfsmount *nmp = arg; 1651 struct timespec ts = { 30, 0 }; 1652 thread_t thd = current_thread(); 1653 struct nfsreq *req; 1654 struct timeval now; 1655 int error, dofinish; 1656 nfsnode_t np; 1657 1658 lck_mtx_lock(&nmp->nm_lock); 1659 1660 while (!(nmp->nm_sockflags & NMSOCK_READY) || 1661 !TAILQ_EMPTY(&nmp->nm_resendq) || 1662 !LIST_EMPTY(&nmp->nm_monlist) || 1663 nmp->nm_deadto_start || 1664 (nmp->nm_state & NFSSTA_RECOVER) || 1665 ((nmp->nm_vers >= NFS_VER4) && !TAILQ_EMPTY(&nmp->nm_dreturnq))) 1666 { 1667 if (nmp->nm_sockflags & NMSOCK_UNMOUNT) 1668 break; 1669 /* do reconnect, if necessary */ 1670 if (!(nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_FORCE)) { 1671 if (nmp->nm_reconnect_start <= 0) { 1672 microuptime(&now); 1673 nmp->nm_reconnect_start = now.tv_sec; 1674 } 1675 lck_mtx_unlock(&nmp->nm_lock); 1676 NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname)); 1677 if (nfs_reconnect(nmp) == 0) 1678 nmp->nm_reconnect_start = 0; 1679 lck_mtx_lock(&nmp->nm_lock); 1680 } 1681 if ((nmp->nm_sockflags & NMSOCK_READY) && 1682 (nmp->nm_state & NFSSTA_RECOVER) && 1683 !(nmp->nm_sockflags & NMSOCK_UNMOUNT) && 1684 !(nmp->nm_state & NFSSTA_FORCE)) { 1685 /* perform state recovery */ 1686 lck_mtx_unlock(&nmp->nm_lock); 1687 nfs_recover(nmp); 1688 lck_mtx_lock(&nmp->nm_lock); 1689 } 1690 /* handle NFSv4 delegation returns */ 1691 while ((nmp->nm_vers >= NFS_VER4) && !(nmp->nm_state & NFSSTA_FORCE) && 1692 (nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER) && 1693 ((np = TAILQ_FIRST(&nmp->nm_dreturnq)))) { 1694 lck_mtx_unlock(&nmp->nm_lock); 1695 nfs4_delegation_return(np, R_RECOVER, thd, nmp->nm_mcred); 1696 lck_mtx_lock(&nmp->nm_lock); 1697 } 1698 /* do resends, if necessary/possible */ 1699 while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) || (nmp->nm_state & NFSSTA_FORCE)) && 1700 ((req = TAILQ_FIRST(&nmp->nm_resendq)))) { 1701 if (req->r_resendtime) 1702 microuptime(&now); 1703 while (req && !(nmp->nm_state & NFSSTA_FORCE) && req->r_resendtime && (now.tv_sec < req->r_resendtime)) 1704 req = TAILQ_NEXT(req, r_rchain); 1705 if (!req) 1706 break; 1707 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); 1708 req->r_rchain.tqe_next = NFSREQNOLIST; 1709 lck_mtx_unlock(&nmp->nm_lock); 1710 lck_mtx_lock(&req->r_mtx); 1711 if (req->r_error || req->r_nmrep.nmc_mhead) { 1712 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); 1713 req->r_flags &= ~R_RESENDQ; 1714 wakeup(req); 1715 lck_mtx_unlock(&req->r_mtx); 1716 if (dofinish) 1717 nfs_asyncio_finish(req); 1718 lck_mtx_lock(&nmp->nm_lock); 1719 continue; 1720 } 1721 if ((req->r_flags & R_RESTART) || nfs_request_using_gss(req)) { 1722 req->r_flags &= ~R_RESTART; 1723 req->r_resendtime = 0; 1724 lck_mtx_unlock(&req->r_mtx); 1725 /* async RPCs on GSS mounts need to be rebuilt and resent. */ 1726 nfs_reqdequeue(req); 1727 if (nfs_request_using_gss(req)) { 1728 nfs_gss_clnt_rpcdone(req); 1729 error = nfs_gss_clnt_args_restore(req); 1730 if (error == ENEEDAUTH) 1731 req->r_xid = 0; 1732 } 1733 NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n", 1734 nfs_request_using_gss(req) ? " gss" : "", req->r_procnum, req->r_xid, 1735 req->r_flags, req->r_rtt)); 1736 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */ 1737 if (!error) 1738 error = nfs_sigintr(nmp, req, req->r_thread, 0); 1739 if (!error) 1740 error = nfs_request_add_header(req); 1741 if (!error) 1742 error = nfs_request_send(req, 0); 1743 lck_mtx_lock(&req->r_mtx); 1744 if (req->r_flags & R_RESENDQ) 1745 req->r_flags &= ~R_RESENDQ; 1746 if (error) 1747 req->r_error = error; 1748 wakeup(req); 1749 dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); 1750 lck_mtx_unlock(&req->r_mtx); 1751 if (dofinish) 1752 nfs_asyncio_finish(req); 1753 lck_mtx_lock(&nmp->nm_lock); 1754 error = 0; 1755 continue; 1756 } 1757 NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n", 1758 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt)); 1759 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */ 1760 if (!error) 1761 error = nfs_sigintr(nmp, req, req->r_thread, 0); 1762 if (!error) { 1763 req->r_flags |= R_SENDING; 1764 lck_mtx_unlock(&req->r_mtx); 1765 error = nfs_send(req, 0); 1766 lck_mtx_lock(&req->r_mtx); 1767 if (!error) { 1768 if (req->r_flags & R_RESENDQ) 1769 req->r_flags &= ~R_RESENDQ; 1770 wakeup(req); 1771 lck_mtx_unlock(&req->r_mtx); 1772 lck_mtx_lock(&nmp->nm_lock); 1773 continue; 1774 } 1775 } 1776 req->r_error = error; 1777 if (req->r_flags & R_RESENDQ) 1778 req->r_flags &= ~R_RESENDQ; 1779 wakeup(req); 1780 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); 1781 lck_mtx_unlock(&req->r_mtx); 1782 if (dofinish) 1783 nfs_asyncio_finish(req); 1784 lck_mtx_lock(&nmp->nm_lock); 1785 } 1786 if (nmp->nm_deadto_start) 1787 nfs_mount_check_dead_timeout(nmp); 1788 if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) 1789 break; 1790 /* check monitored nodes, if necessary/possible */ 1791 if (!LIST_EMPTY(&nmp->nm_monlist)) { 1792 nmp->nm_state |= NFSSTA_MONITOR_SCAN; 1793 LIST_FOREACH(np, &nmp->nm_monlist, n_monlink) { 1794 if (!(nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE))) 1795 break; 1796 np->n_mflag |= NMMONSCANINPROG; 1797 lck_mtx_unlock(&nmp->nm_lock); 1798 error = nfs_getattr(np, NULL, vfs_context_kernel(), (NGA_UNCACHED|NGA_MONITOR)); 1799 if (!error && ISSET(np->n_flag, NUPDATESIZE)) /* update quickly to avoid multiple events */ 1800 nfs_data_update_size(np, 0); 1801 lck_mtx_lock(&nmp->nm_lock); 1802 np->n_mflag &= ~NMMONSCANINPROG; 1803 if (np->n_mflag & NMMONSCANWANT) { 1804 np->n_mflag &= ~NMMONSCANWANT; 1805 wakeup(&np->n_mflag); 1806 } 1807 if (error || !(nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE))) 1808 break; 1809 } 1810 nmp->nm_state &= ~NFSSTA_MONITOR_SCAN; 1811 if (nmp->nm_state & NFSSTA_UNMOUNTING) 1812 wakeup(&nmp->nm_state); /* let unmounting thread know scan is done */ 1813 } 1814 if ((nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING))) { 1815 if (nmp->nm_deadto_start || !TAILQ_EMPTY(&nmp->nm_resendq) || 1816 (nmp->nm_state & NFSSTA_RECOVER)) 1817 ts.tv_sec = 1; 1818 else 1819 ts.tv_sec = 5; 1820 msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts); 1821 } 1822 } 1823 1824 /* If we're unmounting, send the unmount RPC, if requested/appropriate. */ 1825 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) && 1826 (nmp->nm_state & NFSSTA_MOUNTED) && NMFLAG(nmp, CALLUMNT) && 1827 (nmp->nm_vers < NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) { 1828 lck_mtx_unlock(&nmp->nm_lock); 1829 nfs3_umount_rpc(nmp, vfs_context_kernel(), 1830 (nmp->nm_sockflags & NMSOCK_READY) ? 6 : 2); 1831 lck_mtx_lock(&nmp->nm_lock); 1832 } 1833 1834 if (nmp->nm_sockthd == thd) 1835 nmp->nm_sockthd = NULL; 1836 lck_mtx_unlock(&nmp->nm_lock); 1837 wakeup(&nmp->nm_sockthd); 1838 thread_terminate(thd); 1839} 1840 1841/* start or wake a mount's socket thread */ 1842void 1843nfs_mount_sock_thread_wake(struct nfsmount *nmp) 1844{ 1845 if (nmp->nm_sockthd) 1846 wakeup(&nmp->nm_sockthd); 1847 else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS) 1848 thread_deallocate(nmp->nm_sockthd); 1849} 1850 1851/* 1852 * Check if we should mark the mount dead because the 1853 * unresponsive mount has reached the dead timeout. 1854 * (must be called with nmp locked) 1855 */ 1856void 1857nfs_mount_check_dead_timeout(struct nfsmount *nmp) 1858{ 1859 struct timeval now; 1860 1861 if (nmp->nm_deadto_start == 0) 1862 return; 1863 if (nmp->nm_state & NFSSTA_DEAD) 1864 return; 1865 nfs_is_squishy(nmp); 1866 if (nmp->nm_curdeadtimeout <= 0) 1867 return; 1868 microuptime(&now); 1869 if ((now.tv_sec - nmp->nm_deadto_start) < nmp->nm_curdeadtimeout) 1870 return; 1871 printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, 1872 (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : ""); 1873 nmp->nm_state |= NFSSTA_DEAD; 1874 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0); 1875} 1876 1877/* 1878 * NFS callback channel socket state 1879 */ 1880struct nfs_callback_socket 1881{ 1882 TAILQ_ENTRY(nfs_callback_socket) ncbs_link; 1883 socket_t ncbs_so; /* the socket */ 1884 struct sockaddr_storage ncbs_saddr; /* socket address */ 1885 struct nfs_rpc_record_state ncbs_rrs; /* RPC record parsing state */ 1886 time_t ncbs_stamp; /* last accessed at */ 1887 uint32_t ncbs_flags; /* see below */ 1888}; 1889#define NCBSOCK_UPCALL 0x0001 1890#define NCBSOCK_UPCALLWANT 0x0002 1891#define NCBSOCK_DEAD 0x0004 1892 1893/* 1894 * NFS callback channel state 1895 * 1896 * One listening socket for accepting socket connections from servers and 1897 * a list of connected sockets to handle callback requests on. 1898 * Mounts registered with the callback channel are assigned IDs and 1899 * put on a list so that the callback request handling code can match 1900 * the requests up with mounts. 1901 */ 1902socket_t nfs4_cb_so = NULL; 1903socket_t nfs4_cb_so6 = NULL; 1904in_port_t nfs4_cb_port = 0; 1905in_port_t nfs4_cb_port6 = 0; 1906uint32_t nfs4_cb_id = 0; 1907uint32_t nfs4_cb_so_usecount = 0; 1908TAILQ_HEAD(nfs4_cb_sock_list,nfs_callback_socket) nfs4_cb_socks; 1909TAILQ_HEAD(nfs4_cb_mount_list,nfsmount) nfs4_cb_mounts; 1910 1911int nfs4_cb_handler(struct nfs_callback_socket *, mbuf_t); 1912 1913/* 1914 * Set up the callback channel for the NFS mount. 1915 * 1916 * Initializes the callback channel socket state and 1917 * assigns a callback ID to the mount. 1918 */ 1919void 1920nfs4_mount_callback_setup(struct nfsmount *nmp) 1921{ 1922 struct sockaddr_in sin; 1923 struct sockaddr_in6 sin6; 1924 socket_t so = NULL; 1925 socket_t so6 = NULL; 1926 struct timeval timeo; 1927 int error, on = 1; 1928 in_port_t port; 1929 1930 lck_mtx_lock(nfs_global_mutex); 1931 if (nfs4_cb_id == 0) { 1932 TAILQ_INIT(&nfs4_cb_mounts); 1933 TAILQ_INIT(&nfs4_cb_socks); 1934 nfs4_cb_id++; 1935 } 1936 nmp->nm_cbid = nfs4_cb_id++; 1937 if (nmp->nm_cbid == 0) 1938 nmp->nm_cbid = nfs4_cb_id++; 1939 nfs4_cb_so_usecount++; 1940 TAILQ_INSERT_HEAD(&nfs4_cb_mounts, nmp, nm_cblink); 1941 1942 if (nfs4_cb_so) { 1943 lck_mtx_unlock(nfs_global_mutex); 1944 return; 1945 } 1946 1947 /* IPv4 */ 1948 error = sock_socket(AF_INET, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so); 1949 if (error) { 1950 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv4 socket\n", error); 1951 goto fail; 1952 } 1953 so = nfs4_cb_so; 1954 1955 sock_setsockopt(so, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); 1956 sin.sin_len = sizeof(struct sockaddr_in); 1957 sin.sin_family = AF_INET; 1958 sin.sin_addr.s_addr = htonl(INADDR_ANY); 1959 sin.sin_port = htons(nfs_callback_port); /* try to use specified port */ 1960 error = sock_bind(so, (struct sockaddr *)&sin); 1961 if (error) { 1962 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv4 socket\n", error); 1963 goto fail; 1964 } 1965 error = sock_getsockname(so, (struct sockaddr *)&sin, sin.sin_len); 1966 if (error) { 1967 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv4 socket port\n", error); 1968 goto fail; 1969 } 1970 nfs4_cb_port = ntohs(sin.sin_port); 1971 1972 error = sock_listen(so, 32); 1973 if (error) { 1974 log(LOG_INFO, "nfs callback setup: error %d on IPv4 listen\n", error); 1975 goto fail; 1976 } 1977 1978 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ 1979 timeo.tv_usec = 0; 1980 timeo.tv_sec = 60; 1981 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 1982 if (error) 1983 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket rx timeout\n", error); 1984 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 1985 if (error) 1986 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket tx timeout\n", error); 1987 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 1988 sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); 1989 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); 1990 error = 0; 1991 1992 /* IPv6 */ 1993 error = sock_socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so6); 1994 if (error) { 1995 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv6 socket\n", error); 1996 goto fail; 1997 } 1998 so6 = nfs4_cb_so6; 1999 2000 sock_setsockopt(so6, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); 2001 sock_setsockopt(so6, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)); 2002 /* try to use specified port or same port as IPv4 */ 2003 port = nfs_callback_port ? nfs_callback_port : nfs4_cb_port; 2004ipv6_bind_again: 2005 sin6.sin6_len = sizeof(struct sockaddr_in6); 2006 sin6.sin6_family = AF_INET6; 2007 sin6.sin6_addr = in6addr_any; 2008 sin6.sin6_port = htons(port); 2009 error = sock_bind(so6, (struct sockaddr *)&sin6); 2010 if (error) { 2011 if (port != nfs_callback_port) { 2012 /* if we simply tried to match the IPv4 port, then try any port */ 2013 port = 0; 2014 goto ipv6_bind_again; 2015 } 2016 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv6 socket\n", error); 2017 goto fail; 2018 } 2019 error = sock_getsockname(so6, (struct sockaddr *)&sin6, sin6.sin6_len); 2020 if (error) { 2021 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv6 socket port\n", error); 2022 goto fail; 2023 } 2024 nfs4_cb_port6 = ntohs(sin6.sin6_port); 2025 2026 error = sock_listen(so6, 32); 2027 if (error) { 2028 log(LOG_INFO, "nfs callback setup: error %d on IPv6 listen\n", error); 2029 goto fail; 2030 } 2031 2032 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ 2033 timeo.tv_usec = 0; 2034 timeo.tv_sec = 60; 2035 error = sock_setsockopt(so6, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 2036 if (error) 2037 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket rx timeout\n", error); 2038 error = sock_setsockopt(so6, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 2039 if (error) 2040 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket tx timeout\n", error); 2041 sock_setsockopt(so6, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 2042 sock_setsockopt(so6, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); 2043 sock_setsockopt(so6, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); 2044 error = 0; 2045 2046fail: 2047 if (error) { 2048 nfs4_cb_so = nfs4_cb_so6 = NULL; 2049 lck_mtx_unlock(nfs_global_mutex); 2050 if (so) { 2051 sock_shutdown(so, SHUT_RDWR); 2052 sock_close(so); 2053 } 2054 if (so6) { 2055 sock_shutdown(so6, SHUT_RDWR); 2056 sock_close(so6); 2057 } 2058 } else { 2059 lck_mtx_unlock(nfs_global_mutex); 2060 } 2061} 2062 2063/* 2064 * Shut down the callback channel for the NFS mount. 2065 * 2066 * Clears the mount's callback ID and releases the mounts 2067 * reference on the callback socket. Last reference dropped 2068 * will also shut down the callback socket(s). 2069 */ 2070void 2071nfs4_mount_callback_shutdown(struct nfsmount *nmp) 2072{ 2073 struct nfs_callback_socket *ncbsp; 2074 socket_t so, so6; 2075 struct nfs4_cb_sock_list cb_socks; 2076 struct timespec ts = {1,0}; 2077 2078 lck_mtx_lock(nfs_global_mutex); 2079 TAILQ_REMOVE(&nfs4_cb_mounts, nmp, nm_cblink); 2080 /* wait for any callbacks in progress to complete */ 2081 while (nmp->nm_cbrefs) 2082 msleep(&nmp->nm_cbrefs, nfs_global_mutex, PSOCK, "cbshutwait", &ts); 2083 nmp->nm_cbid = 0; 2084 if (--nfs4_cb_so_usecount) { 2085 lck_mtx_unlock(nfs_global_mutex); 2086 return; 2087 } 2088 so = nfs4_cb_so; 2089 so6 = nfs4_cb_so6; 2090 nfs4_cb_so = nfs4_cb_so6 = NULL; 2091 TAILQ_INIT(&cb_socks); 2092 TAILQ_CONCAT(&cb_socks, &nfs4_cb_socks, ncbs_link); 2093 lck_mtx_unlock(nfs_global_mutex); 2094 if (so) { 2095 sock_shutdown(so, SHUT_RDWR); 2096 sock_close(so); 2097 } 2098 if (so6) { 2099 sock_shutdown(so6, SHUT_RDWR); 2100 sock_close(so6); 2101 } 2102 while ((ncbsp = TAILQ_FIRST(&cb_socks))) { 2103 TAILQ_REMOVE(&cb_socks, ncbsp, ncbs_link); 2104 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR); 2105 sock_close(ncbsp->ncbs_so); 2106 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs); 2107 FREE(ncbsp, M_TEMP); 2108 } 2109} 2110 2111/* 2112 * Check periodically for stale/unused nfs callback sockets 2113 */ 2114#define NFS4_CB_TIMER_PERIOD 30 2115#define NFS4_CB_IDLE_MAX 300 2116void 2117nfs4_callback_timer(__unused void *param0, __unused void *param1) 2118{ 2119 struct nfs_callback_socket *ncbsp, *nextncbsp; 2120 struct timeval now; 2121 2122loop: 2123 lck_mtx_lock(nfs_global_mutex); 2124 if (TAILQ_EMPTY(&nfs4_cb_socks)) { 2125 nfs4_callback_timer_on = 0; 2126 lck_mtx_unlock(nfs_global_mutex); 2127 return; 2128 } 2129 microuptime(&now); 2130 TAILQ_FOREACH_SAFE(ncbsp, &nfs4_cb_socks, ncbs_link, nextncbsp) { 2131 if (!(ncbsp->ncbs_flags & NCBSOCK_DEAD) && 2132 (now.tv_sec < (ncbsp->ncbs_stamp + NFS4_CB_IDLE_MAX))) 2133 continue; 2134 TAILQ_REMOVE(&nfs4_cb_socks, ncbsp, ncbs_link); 2135 lck_mtx_unlock(nfs_global_mutex); 2136 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR); 2137 sock_close(ncbsp->ncbs_so); 2138 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs); 2139 FREE(ncbsp, M_TEMP); 2140 goto loop; 2141 } 2142 nfs4_callback_timer_on = 1; 2143 nfs_interval_timer_start(nfs4_callback_timer_call, 2144 NFS4_CB_TIMER_PERIOD * 1000); 2145 lck_mtx_unlock(nfs_global_mutex); 2146} 2147 2148/* 2149 * Accept a new callback socket. 2150 */ 2151void 2152nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag) 2153{ 2154 socket_t newso = NULL; 2155 struct nfs_callback_socket *ncbsp; 2156 struct nfsmount *nmp; 2157 struct timeval timeo, now; 2158 int error, on = 1, ip; 2159 2160 if (so == nfs4_cb_so) 2161 ip = 4; 2162 else if (so == nfs4_cb_so6) 2163 ip = 6; 2164 else 2165 return; 2166 2167 /* allocate/initialize a new nfs_callback_socket */ 2168 MALLOC(ncbsp, struct nfs_callback_socket *, sizeof(struct nfs_callback_socket), M_TEMP, M_WAITOK); 2169 if (!ncbsp) { 2170 log(LOG_ERR, "nfs callback accept: no memory for new socket\n"); 2171 return; 2172 } 2173 bzero(ncbsp, sizeof(*ncbsp)); 2174 ncbsp->ncbs_saddr.ss_len = (ip == 4) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); 2175 nfs_rpc_record_state_init(&ncbsp->ncbs_rrs); 2176 2177 /* accept a new socket */ 2178 error = sock_accept(so, (struct sockaddr*)&ncbsp->ncbs_saddr, 2179 ncbsp->ncbs_saddr.ss_len, MSG_DONTWAIT, 2180 nfs4_cb_rcv, ncbsp, &newso); 2181 if (error) { 2182 log(LOG_INFO, "nfs callback accept: error %d accepting IPv%d socket\n", error, ip); 2183 FREE(ncbsp, M_TEMP); 2184 return; 2185 } 2186 2187 /* set up the new socket */ 2188 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ 2189 timeo.tv_usec = 0; 2190 timeo.tv_sec = 60; 2191 error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 2192 if (error) 2193 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket rx timeout\n", error, ip); 2194 error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 2195 if (error) 2196 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket tx timeout\n", error, ip); 2197 sock_setsockopt(newso, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 2198 sock_setsockopt(newso, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); 2199 sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); 2200 sock_setsockopt(newso, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); 2201 2202 ncbsp->ncbs_so = newso; 2203 microuptime(&now); 2204 ncbsp->ncbs_stamp = now.tv_sec; 2205 2206 lck_mtx_lock(nfs_global_mutex); 2207 2208 /* add it to the list */ 2209 TAILQ_INSERT_HEAD(&nfs4_cb_socks, ncbsp, ncbs_link); 2210 2211 /* verify it's from a host we have mounted */ 2212 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) { 2213 /* check if socket's source address matches this mount's server address */ 2214 if (!nmp->nm_saddr) 2215 continue; 2216 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0) 2217 break; 2218 } 2219 if (!nmp) /* we don't want this socket, mark it dead */ 2220 ncbsp->ncbs_flags |= NCBSOCK_DEAD; 2221 2222 /* make sure the callback socket cleanup timer is running */ 2223 /* (shorten the timer if we've got a socket we don't want) */ 2224 if (!nfs4_callback_timer_on) { 2225 nfs4_callback_timer_on = 1; 2226 nfs_interval_timer_start(nfs4_callback_timer_call, 2227 !nmp ? 500 : (NFS4_CB_TIMER_PERIOD * 1000)); 2228 } else if (!nmp && (nfs4_callback_timer_on < 2)) { 2229 nfs4_callback_timer_on = 2; 2230 thread_call_cancel(nfs4_callback_timer_call); 2231 nfs_interval_timer_start(nfs4_callback_timer_call, 500); 2232 } 2233 2234 lck_mtx_unlock(nfs_global_mutex); 2235} 2236 2237/* 2238 * Receive mbufs from callback sockets into RPC records and process each record. 2239 * Detect connection has been closed and shut down. 2240 */ 2241void 2242nfs4_cb_rcv(socket_t so, void *arg, __unused int waitflag) 2243{ 2244 struct nfs_callback_socket *ncbsp = arg; 2245 struct timespec ts = {1,0}; 2246 struct timeval now; 2247 mbuf_t m; 2248 int error = 0, recv = 1; 2249 2250 lck_mtx_lock(nfs_global_mutex); 2251 while (ncbsp->ncbs_flags & NCBSOCK_UPCALL) { 2252 /* wait if upcall is already in progress */ 2253 ncbsp->ncbs_flags |= NCBSOCK_UPCALLWANT; 2254 msleep(ncbsp, nfs_global_mutex, PSOCK, "cbupcall", &ts); 2255 } 2256 ncbsp->ncbs_flags |= NCBSOCK_UPCALL; 2257 lck_mtx_unlock(nfs_global_mutex); 2258 2259 /* loop while we make error-free progress */ 2260 while (!error && recv) { 2261 error = nfs_rpc_record_read(so, &ncbsp->ncbs_rrs, MSG_DONTWAIT, &recv, &m); 2262 if (m) /* handle the request */ 2263 error = nfs4_cb_handler(ncbsp, m); 2264 } 2265 2266 /* note: no error and no data indicates server closed its end */ 2267 if ((error != EWOULDBLOCK) && (error || !recv)) { 2268 /* 2269 * Socket is either being closed or should be. 2270 * We can't close the socket in the context of the upcall. 2271 * So we mark it as dead and leave it for the cleanup timer to reap. 2272 */ 2273 ncbsp->ncbs_stamp = 0; 2274 ncbsp->ncbs_flags |= NCBSOCK_DEAD; 2275 } else { 2276 microuptime(&now); 2277 ncbsp->ncbs_stamp = now.tv_sec; 2278 } 2279 2280 lck_mtx_lock(nfs_global_mutex); 2281 ncbsp->ncbs_flags &= ~NCBSOCK_UPCALL; 2282 lck_mtx_unlock(nfs_global_mutex); 2283 wakeup(ncbsp); 2284} 2285 2286/* 2287 * Handle an NFS callback channel request. 2288 */ 2289int 2290nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq) 2291{ 2292 socket_t so = ncbsp->ncbs_so; 2293 struct nfsm_chain nmreq, nmrep; 2294 mbuf_t mhead = NULL, mrest = NULL, m; 2295 struct msghdr msg; 2296 struct nfsmount *nmp; 2297 fhandle_t fh; 2298 nfsnode_t np; 2299 nfs_stateid stateid; 2300 uint32_t bitmap[NFS_ATTR_BITMAP_LEN], rbitmap[NFS_ATTR_BITMAP_LEN], bmlen, truncate, attrbytes; 2301 uint32_t val, xid, procnum, taglen, cbid, numops, op, status; 2302 uint32_t auth_type, auth_len; 2303 uint32_t numres, *pnumres; 2304 int error = 0, replen, len; 2305 size_t sentlen = 0; 2306 2307 xid = numops = op = status = procnum = taglen = cbid = 0; 2308 2309 nfsm_chain_dissect_init(error, &nmreq, mreq); 2310 nfsm_chain_get_32(error, &nmreq, xid); // RPC XID 2311 nfsm_chain_get_32(error, &nmreq, val); // RPC Call 2312 nfsm_assert(error, (val == RPC_CALL), EBADRPC); 2313 nfsm_chain_get_32(error, &nmreq, val); // RPC Version 2314 nfsm_assert(error, (val == RPC_VER2), ERPCMISMATCH); 2315 nfsm_chain_get_32(error, &nmreq, val); // RPC Program Number 2316 nfsm_assert(error, (val == NFS4_CALLBACK_PROG), EPROGUNAVAIL); 2317 nfsm_chain_get_32(error, &nmreq, val); // NFS Callback Program Version Number 2318 nfsm_assert(error, (val == NFS4_CALLBACK_PROG_VERSION), EPROGMISMATCH); 2319 nfsm_chain_get_32(error, &nmreq, procnum); // NFS Callback Procedure Number 2320 nfsm_assert(error, (procnum <= NFSPROC4_CB_COMPOUND), EPROCUNAVAIL); 2321 2322 /* Handle authentication */ 2323 /* XXX just ignore auth for now - handling kerberos may be tricky */ 2324 nfsm_chain_get_32(error, &nmreq, auth_type); // RPC Auth Flavor 2325 nfsm_chain_get_32(error, &nmreq, auth_len); // RPC Auth Length 2326 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC); 2327 if (!error && (auth_len > 0)) 2328 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len)); 2329 nfsm_chain_adv(error, &nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE) 2330 nfsm_chain_get_32(error, &nmreq, auth_len); // verifier length 2331 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC); 2332 if (!error && (auth_len > 0)) 2333 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len)); 2334 if (error) { 2335 status = error; 2336 error = 0; 2337 goto nfsmout; 2338 } 2339 2340 switch (procnum) { 2341 case NFSPROC4_CB_NULL: 2342 status = NFSERR_RETVOID; 2343 break; 2344 case NFSPROC4_CB_COMPOUND: 2345 /* tag, minorversion, cb ident, numops, op array */ 2346 nfsm_chain_get_32(error, &nmreq, taglen); /* tag length */ 2347 nfsm_assert(error, (val <= NFS4_OPAQUE_LIMIT), EBADRPC); 2348 2349 /* start building the body of the response */ 2350 nfsm_mbuf_get(error, &mrest, nfsm_rndup(taglen) + 5*NFSX_UNSIGNED); 2351 nfsm_chain_init(&nmrep, mrest); 2352 2353 /* copy tag from request to response */ 2354 nfsm_chain_add_32(error, &nmrep, taglen); /* tag length */ 2355 for (len = (int)taglen; !error && (len > 0); len -= NFSX_UNSIGNED) { 2356 nfsm_chain_get_32(error, &nmreq, val); 2357 nfsm_chain_add_32(error, &nmrep, val); 2358 } 2359 2360 /* insert number of results placeholder */ 2361 numres = 0; 2362 nfsm_chain_add_32(error, &nmrep, numres); 2363 pnumres = (uint32_t*)(nmrep.nmc_ptr - NFSX_UNSIGNED); 2364 2365 nfsm_chain_get_32(error, &nmreq, val); /* minorversion */ 2366 nfsm_assert(error, (val == 0), NFSERR_MINOR_VERS_MISMATCH); 2367 nfsm_chain_get_32(error, &nmreq, cbid); /* callback ID */ 2368 nfsm_chain_get_32(error, &nmreq, numops); /* number of operations */ 2369 if (error) { 2370 if ((error == EBADRPC) || (error == NFSERR_MINOR_VERS_MISMATCH)) 2371 status = error; 2372 else if ((error == ENOBUFS) || (error == ENOMEM)) 2373 status = NFSERR_RESOURCE; 2374 else 2375 status = NFSERR_SERVERFAULT; 2376 error = 0; 2377 nfsm_chain_null(&nmrep); 2378 goto nfsmout; 2379 } 2380 /* match the callback ID to a registered mount */ 2381 lck_mtx_lock(nfs_global_mutex); 2382 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) { 2383 if (nmp->nm_cbid != cbid) 2384 continue; 2385 /* verify socket's source address matches this mount's server address */ 2386 if (!nmp->nm_saddr) 2387 continue; 2388 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0) 2389 break; 2390 } 2391 /* mark the NFS mount as busy */ 2392 if (nmp) 2393 nmp->nm_cbrefs++; 2394 lck_mtx_unlock(nfs_global_mutex); 2395 if (!nmp) { 2396 /* if no mount match, just drop socket. */ 2397 error = EPERM; 2398 nfsm_chain_null(&nmrep); 2399 goto out; 2400 } 2401 2402 /* process ops, adding results to mrest */ 2403 while (numops > 0) { 2404 numops--; 2405 nfsm_chain_get_32(error, &nmreq, op); 2406 if (error) 2407 break; 2408 switch (op) { 2409 case NFS_OP_CB_GETATTR: 2410 // (FH, BITMAP) -> (STATUS, BITMAP, ATTRS) 2411 np = NULL; 2412 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh); 2413 bmlen = NFS_ATTR_BITMAP_LEN; 2414 nfsm_chain_get_bitmap(error, &nmreq, bitmap, bmlen); 2415 if (error) { 2416 status = error; 2417 error = 0; 2418 numops = 0; /* don't process any more ops */ 2419 } else { 2420 /* find the node for the file handle */ 2421 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np); 2422 if (error || !np) { 2423 status = NFSERR_BADHANDLE; 2424 error = 0; 2425 np = NULL; 2426 numops = 0; /* don't process any more ops */ 2427 } 2428 } 2429 nfsm_chain_add_32(error, &nmrep, op); 2430 nfsm_chain_add_32(error, &nmrep, status); 2431 if (!error && (status == EBADRPC)) 2432 error = status; 2433 if (np) { 2434 /* only allow returning size, change, and mtime attrs */ 2435 NFS_CLEAR_ATTRIBUTES(&rbitmap); 2436 attrbytes = 0; 2437 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) { 2438 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_CHANGE); 2439 attrbytes += 2 * NFSX_UNSIGNED; 2440 } 2441 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) { 2442 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_SIZE); 2443 attrbytes += 2 * NFSX_UNSIGNED; 2444 } 2445 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) { 2446 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_TIME_MODIFY); 2447 attrbytes += 3 * NFSX_UNSIGNED; 2448 } 2449 nfsm_chain_add_bitmap(error, &nmrep, rbitmap, NFS_ATTR_BITMAP_LEN); 2450 nfsm_chain_add_32(error, &nmrep, attrbytes); 2451 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) 2452 nfsm_chain_add_64(error, &nmrep, 2453 np->n_vattr.nva_change + ((np->n_flag & NMODIFIED) ? 1 : 0)); 2454 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) 2455 nfsm_chain_add_64(error, &nmrep, np->n_size); 2456 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) { 2457 nfsm_chain_add_64(error, &nmrep, np->n_vattr.nva_timesec[NFSTIME_MODIFY]); 2458 nfsm_chain_add_32(error, &nmrep, np->n_vattr.nva_timensec[NFSTIME_MODIFY]); 2459 } 2460 nfs_node_unlock(np); 2461 vnode_put(NFSTOV(np)); 2462 np = NULL; 2463 } 2464 /* 2465 * If we hit an error building the reply, we can't easily back up. 2466 * So we'll just update the status and hope the server ignores the 2467 * extra garbage. 2468 */ 2469 break; 2470 case NFS_OP_CB_RECALL: 2471 // (STATEID, TRUNCATE, FH) -> (STATUS) 2472 np = NULL; 2473 nfsm_chain_get_stateid(error, &nmreq, &stateid); 2474 nfsm_chain_get_32(error, &nmreq, truncate); 2475 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh); 2476 if (error) { 2477 status = error; 2478 error = 0; 2479 numops = 0; /* don't process any more ops */ 2480 } else { 2481 /* find the node for the file handle */ 2482 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np); 2483 if (error || !np) { 2484 status = NFSERR_BADHANDLE; 2485 error = 0; 2486 np = NULL; 2487 numops = 0; /* don't process any more ops */ 2488 } else if (!(np->n_openflags & N_DELEG_MASK) || 2489 bcmp(&np->n_dstateid, &stateid, sizeof(stateid))) { 2490 /* delegation stateid state doesn't match */ 2491 status = NFSERR_BAD_STATEID; 2492 numops = 0; /* don't process any more ops */ 2493 } 2494 if (!status) /* add node to recall queue, and wake socket thread */ 2495 nfs4_delegation_return_enqueue(np); 2496 if (np) { 2497 nfs_node_unlock(np); 2498 vnode_put(NFSTOV(np)); 2499 } 2500 } 2501 nfsm_chain_add_32(error, &nmrep, op); 2502 nfsm_chain_add_32(error, &nmrep, status); 2503 if (!error && (status == EBADRPC)) 2504 error = status; 2505 break; 2506 case NFS_OP_CB_ILLEGAL: 2507 default: 2508 nfsm_chain_add_32(error, &nmrep, NFS_OP_CB_ILLEGAL); 2509 status = NFSERR_OP_ILLEGAL; 2510 nfsm_chain_add_32(error, &nmrep, status); 2511 numops = 0; /* don't process any more ops */ 2512 break; 2513 } 2514 numres++; 2515 } 2516 2517 if (!status && error) { 2518 if (error == EBADRPC) 2519 status = error; 2520 else if ((error == ENOBUFS) || (error == ENOMEM)) 2521 status = NFSERR_RESOURCE; 2522 else 2523 status = NFSERR_SERVERFAULT; 2524 error = 0; 2525 } 2526 2527 /* Now, set the numres field */ 2528 *pnumres = txdr_unsigned(numres); 2529 nfsm_chain_build_done(error, &nmrep); 2530 nfsm_chain_null(&nmrep); 2531 2532 /* drop the callback reference on the mount */ 2533 lck_mtx_lock(nfs_global_mutex); 2534 nmp->nm_cbrefs--; 2535 if (!nmp->nm_cbid) 2536 wakeup(&nmp->nm_cbrefs); 2537 lck_mtx_unlock(nfs_global_mutex); 2538 break; 2539 } 2540 2541nfsmout: 2542 if (status == EBADRPC) 2543 OSAddAtomic64(1, &nfsstats.rpcinvalid); 2544 2545 /* build reply header */ 2546 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mhead); 2547 nfsm_chain_init(&nmrep, mhead); 2548 nfsm_chain_add_32(error, &nmrep, 0); /* insert space for an RPC record mark */ 2549 nfsm_chain_add_32(error, &nmrep, xid); 2550 nfsm_chain_add_32(error, &nmrep, RPC_REPLY); 2551 if ((status == ERPCMISMATCH) || (status & NFSERR_AUTHERR)) { 2552 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED); 2553 if (status & NFSERR_AUTHERR) { 2554 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR); 2555 nfsm_chain_add_32(error, &nmrep, (status & ~NFSERR_AUTHERR)); 2556 } else { 2557 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH); 2558 nfsm_chain_add_32(error, &nmrep, RPC_VER2); 2559 nfsm_chain_add_32(error, &nmrep, RPC_VER2); 2560 } 2561 } else { 2562 /* reply status */ 2563 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED); 2564 /* XXX RPCAUTH_NULL verifier */ 2565 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL); 2566 nfsm_chain_add_32(error, &nmrep, 0); 2567 /* accepted status */ 2568 switch (status) { 2569 case EPROGUNAVAIL: 2570 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL); 2571 break; 2572 case EPROGMISMATCH: 2573 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH); 2574 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION); 2575 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION); 2576 break; 2577 case EPROCUNAVAIL: 2578 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL); 2579 break; 2580 case EBADRPC: 2581 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE); 2582 break; 2583 default: 2584 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS); 2585 if (status != NFSERR_RETVOID) 2586 nfsm_chain_add_32(error, &nmrep, status); 2587 break; 2588 } 2589 } 2590 nfsm_chain_build_done(error, &nmrep); 2591 if (error) { 2592 nfsm_chain_null(&nmrep); 2593 goto out; 2594 } 2595 error = mbuf_setnext(nmrep.nmc_mcur, mrest); 2596 if (error) { 2597 printf("nfs cb: mbuf_setnext failed %d\n", error); 2598 goto out; 2599 } 2600 mrest = NULL; 2601 /* Calculate the size of the reply */ 2602 replen = 0; 2603 for (m = nmrep.nmc_mhead; m; m = mbuf_next(m)) 2604 replen += mbuf_len(m); 2605 mbuf_pkthdr_setlen(mhead, replen); 2606 error = mbuf_pkthdr_setrcvif(mhead, NULL); 2607 nfsm_chain_set_recmark(error, &nmrep, (replen - NFSX_UNSIGNED) | 0x80000000); 2608 nfsm_chain_null(&nmrep); 2609 2610 /* send the reply */ 2611 bzero(&msg, sizeof(msg)); 2612 error = sock_sendmbuf(so, &msg, mhead, 0, &sentlen); 2613 mhead = NULL; 2614 if (!error && ((int)sentlen != replen)) 2615 error = EWOULDBLOCK; 2616 if (error == EWOULDBLOCK) /* inability to send response is considered fatal */ 2617 error = ETIMEDOUT; 2618out: 2619 if (error) 2620 nfsm_chain_cleanup(&nmrep); 2621 if (mhead) 2622 mbuf_freem(mhead); 2623 if (mrest) 2624 mbuf_freem(mrest); 2625 if (mreq) 2626 mbuf_freem(mreq); 2627 return (error); 2628} 2629 2630 2631/* 2632 * Initialize an nfs_rpc_record_state structure. 2633 */ 2634void 2635nfs_rpc_record_state_init(struct nfs_rpc_record_state *nrrsp) 2636{ 2637 bzero(nrrsp, sizeof(*nrrsp)); 2638 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft); 2639} 2640 2641/* 2642 * Clean up an nfs_rpc_record_state structure. 2643 */ 2644void 2645nfs_rpc_record_state_cleanup(struct nfs_rpc_record_state *nrrsp) 2646{ 2647 if (nrrsp->nrrs_m) { 2648 mbuf_freem(nrrsp->nrrs_m); 2649 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL; 2650 } 2651} 2652 2653/* 2654 * Read the next (marked) RPC record from the socket. 2655 * 2656 * *recvp returns if any data was received. 2657 * *mp returns the next complete RPC record 2658 */ 2659int 2660nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int flags, int *recvp, mbuf_t *mp) 2661{ 2662 struct iovec aio; 2663 struct msghdr msg; 2664 size_t rcvlen; 2665 int error = 0; 2666 mbuf_t m; 2667 2668 *recvp = 0; 2669 *mp = NULL; 2670 2671 /* read the TCP RPC record marker */ 2672 while (!error && nrrsp->nrrs_markerleft) { 2673 aio.iov_base = ((char*)&nrrsp->nrrs_fragleft + 2674 sizeof(nrrsp->nrrs_fragleft) - nrrsp->nrrs_markerleft); 2675 aio.iov_len = nrrsp->nrrs_markerleft; 2676 bzero(&msg, sizeof(msg)); 2677 msg.msg_iov = &aio; 2678 msg.msg_iovlen = 1; 2679 error = sock_receive(so, &msg, flags, &rcvlen); 2680 if (error || !rcvlen) 2681 break; 2682 *recvp = 1; 2683 nrrsp->nrrs_markerleft -= rcvlen; 2684 if (nrrsp->nrrs_markerleft) 2685 continue; 2686 /* record marker complete */ 2687 nrrsp->nrrs_fragleft = ntohl(nrrsp->nrrs_fragleft); 2688 if (nrrsp->nrrs_fragleft & 0x80000000) { 2689 nrrsp->nrrs_lastfrag = 1; 2690 nrrsp->nrrs_fragleft &= ~0x80000000; 2691 } 2692 nrrsp->nrrs_reclen += nrrsp->nrrs_fragleft; 2693 if (nrrsp->nrrs_reclen > NFS_MAXPACKET) { 2694 /* This is SERIOUS! We are out of sync with the sender. */ 2695 log(LOG_ERR, "impossible RPC record length (%d) on callback", nrrsp->nrrs_reclen); 2696 error = EFBIG; 2697 } 2698 } 2699 2700 /* read the TCP RPC record fragment */ 2701 while (!error && !nrrsp->nrrs_markerleft && nrrsp->nrrs_fragleft) { 2702 m = NULL; 2703 rcvlen = nrrsp->nrrs_fragleft; 2704 error = sock_receivembuf(so, NULL, &m, flags, &rcvlen); 2705 if (error || !rcvlen || !m) 2706 break; 2707 *recvp = 1; 2708 /* append mbufs to list */ 2709 nrrsp->nrrs_fragleft -= rcvlen; 2710 if (!nrrsp->nrrs_m) { 2711 nrrsp->nrrs_m = m; 2712 } else { 2713 error = mbuf_setnext(nrrsp->nrrs_mlast, m); 2714 if (error) { 2715 printf("nfs tcp rcv: mbuf_setnext failed %d\n", error); 2716 mbuf_freem(m); 2717 break; 2718 } 2719 } 2720 while (mbuf_next(m)) 2721 m = mbuf_next(m); 2722 nrrsp->nrrs_mlast = m; 2723 } 2724 2725 /* done reading fragment? */ 2726 if (!error && !nrrsp->nrrs_markerleft && !nrrsp->nrrs_fragleft) { 2727 /* reset socket fragment parsing state */ 2728 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft); 2729 if (nrrsp->nrrs_lastfrag) { 2730 /* RPC record complete */ 2731 *mp = nrrsp->nrrs_m; 2732 /* reset socket record parsing state */ 2733 nrrsp->nrrs_reclen = 0; 2734 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL; 2735 nrrsp->nrrs_lastfrag = 0; 2736 } 2737 } 2738 2739 return (error); 2740} 2741 2742 2743 2744/* 2745 * The NFS client send routine. 2746 * 2747 * Send the given NFS request out the mount's socket. 2748 * Holds nfs_sndlock() for the duration of this call. 2749 * 2750 * - check for request termination (sigintr) 2751 * - wait for reconnect, if necessary 2752 * - UDP: check the congestion window 2753 * - make a copy of the request to send 2754 * - UDP: update the congestion window 2755 * - send the request 2756 * 2757 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared. 2758 * rexmit count is also updated if this isn't the first send. 2759 * 2760 * If the send is not successful, make sure R_MUSTRESEND is set. 2761 * If this wasn't the first transmit, set R_RESENDERR. 2762 * Also, undo any UDP congestion window changes made. 2763 * 2764 * If the error appears to indicate that the socket should 2765 * be reconnected, mark the socket for reconnection. 2766 * 2767 * Only return errors when the request should be aborted. 2768 */ 2769int 2770nfs_send(struct nfsreq *req, int wait) 2771{ 2772 struct nfsmount *nmp; 2773 struct nfs_socket *nso; 2774 int error, error2, sotype, rexmit, slpflag = 0, needrecon; 2775 struct msghdr msg; 2776 struct sockaddr *sendnam; 2777 mbuf_t mreqcopy; 2778 size_t sentlen = 0; 2779 struct timespec ts = { 2, 0 }; 2780 2781again: 2782 error = nfs_sndlock(req); 2783 if (error) { 2784 lck_mtx_lock(&req->r_mtx); 2785 req->r_error = error; 2786 req->r_flags &= ~R_SENDING; 2787 lck_mtx_unlock(&req->r_mtx); 2788 return (error); 2789 } 2790 2791 error = nfs_sigintr(req->r_nmp, req, NULL, 0); 2792 if (error) { 2793 nfs_sndunlock(req); 2794 lck_mtx_lock(&req->r_mtx); 2795 req->r_error = error; 2796 req->r_flags &= ~R_SENDING; 2797 lck_mtx_unlock(&req->r_mtx); 2798 return (error); 2799 } 2800 nmp = req->r_nmp; 2801 sotype = nmp->nm_sotype; 2802 2803 /* 2804 * If it's a setup RPC but we're not in SETUP... must need reconnect. 2805 * If it's a recovery RPC but the socket's not ready... must need reconnect. 2806 */ 2807 if (((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) || 2808 ((req->r_flags & R_RECOVER) && !(nmp->nm_sockflags & NMSOCK_READY))) { 2809 error = ETIMEDOUT; 2810 nfs_sndunlock(req); 2811 lck_mtx_lock(&req->r_mtx); 2812 req->r_error = error; 2813 req->r_flags &= ~R_SENDING; 2814 lck_mtx_unlock(&req->r_mtx); 2815 return (error); 2816 } 2817 2818 /* If the socket needs reconnection, do that now. */ 2819 /* wait until socket is ready - unless this request is part of setup */ 2820 lck_mtx_lock(&nmp->nm_lock); 2821 if (!(nmp->nm_sockflags & NMSOCK_READY) && 2822 !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) { 2823 if (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) 2824 slpflag |= PCATCH; 2825 lck_mtx_unlock(&nmp->nm_lock); 2826 nfs_sndunlock(req); 2827 if (!wait) { 2828 lck_mtx_lock(&req->r_mtx); 2829 req->r_flags &= ~R_SENDING; 2830 req->r_flags |= R_MUSTRESEND; 2831 req->r_rtt = 0; 2832 lck_mtx_unlock(&req->r_mtx); 2833 return (0); 2834 } 2835 NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req->r_xid)); 2836 lck_mtx_lock(&req->r_mtx); 2837 req->r_flags &= ~R_MUSTRESEND; 2838 req->r_rtt = 0; 2839 lck_mtx_unlock(&req->r_mtx); 2840 lck_mtx_lock(&nmp->nm_lock); 2841 while (!(nmp->nm_sockflags & NMSOCK_READY)) { 2842 /* don't bother waiting if the socket thread won't be reconnecting it */ 2843 if (nmp->nm_state & NFSSTA_FORCE) { 2844 error = EIO; 2845 break; 2846 } 2847 if (NMFLAG(nmp, SOFT) && (nmp->nm_reconnect_start > 0)) { 2848 struct timeval now; 2849 microuptime(&now); 2850 if ((now.tv_sec - nmp->nm_reconnect_start) >= 8) { 2851 /* soft mount in reconnect for a while... terminate ASAP */ 2852 OSAddAtomic64(1, &nfsstats.rpctimeouts); 2853 req->r_flags |= R_SOFTTERM; 2854 req->r_error = error = ETIMEDOUT; 2855 break; 2856 } 2857 } 2858 /* make sure socket thread is running, then wait */ 2859 nfs_mount_sock_thread_wake(nmp); 2860 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1))) 2861 break; 2862 msleep(req, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectwait", &ts); 2863 slpflag = 0; 2864 } 2865 lck_mtx_unlock(&nmp->nm_lock); 2866 if (error) { 2867 lck_mtx_lock(&req->r_mtx); 2868 req->r_error = error; 2869 req->r_flags &= ~R_SENDING; 2870 lck_mtx_unlock(&req->r_mtx); 2871 return (error); 2872 } 2873 goto again; 2874 } 2875 nso = nmp->nm_nso; 2876 /* note that we're using the mount's socket to do the send */ 2877 nmp->nm_state |= NFSSTA_SENDING; /* will be cleared by nfs_sndunlock() */ 2878 lck_mtx_unlock(&nmp->nm_lock); 2879 if (!nso) { 2880 nfs_sndunlock(req); 2881 lck_mtx_lock(&req->r_mtx); 2882 req->r_flags &= ~R_SENDING; 2883 req->r_flags |= R_MUSTRESEND; 2884 req->r_rtt = 0; 2885 lck_mtx_unlock(&req->r_mtx); 2886 return (0); 2887 } 2888 2889 lck_mtx_lock(&req->r_mtx); 2890 rexmit = (req->r_flags & R_SENT); 2891 2892 if (sotype == SOCK_DGRAM) { 2893 lck_mtx_lock(&nmp->nm_lock); 2894 if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) { 2895 /* if we can't send this out yet, wait on the cwnd queue */ 2896 slpflag = (NMFLAG(nmp, INTR) && req->r_thread) ? PCATCH : 0; 2897 lck_mtx_unlock(&nmp->nm_lock); 2898 nfs_sndunlock(req); 2899 req->r_flags &= ~R_SENDING; 2900 req->r_flags |= R_MUSTRESEND; 2901 lck_mtx_unlock(&req->r_mtx); 2902 if (!wait) { 2903 req->r_rtt = 0; 2904 return (0); 2905 } 2906 lck_mtx_lock(&nmp->nm_lock); 2907 while (nmp->nm_sent >= nmp->nm_cwnd) { 2908 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1))) 2909 break; 2910 TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain); 2911 msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd", &ts); 2912 slpflag = 0; 2913 if ((req->r_cchain.tqe_next != NFSREQNOLIST)) { 2914 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain); 2915 req->r_cchain.tqe_next = NFSREQNOLIST; 2916 } 2917 } 2918 lck_mtx_unlock(&nmp->nm_lock); 2919 goto again; 2920 } 2921 /* 2922 * We update these *before* the send to avoid racing 2923 * against others who may be looking to send requests. 2924 */ 2925 if (!rexmit) { 2926 /* first transmit */ 2927 req->r_flags |= R_CWND; 2928 nmp->nm_sent += NFS_CWNDSCALE; 2929 } else { 2930 /* 2931 * When retransmitting, turn timing off 2932 * and divide congestion window by 2. 2933 */ 2934 req->r_flags &= ~R_TIMING; 2935 nmp->nm_cwnd >>= 1; 2936 if (nmp->nm_cwnd < NFS_CWNDSCALE) 2937 nmp->nm_cwnd = NFS_CWNDSCALE; 2938 } 2939 lck_mtx_unlock(&nmp->nm_lock); 2940 } 2941 2942 req->r_flags &= ~R_MUSTRESEND; 2943 lck_mtx_unlock(&req->r_mtx); 2944 2945 error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL, 2946 wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy); 2947 if (error) { 2948 if (wait) 2949 log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error); 2950 nfs_sndunlock(req); 2951 lck_mtx_lock(&req->r_mtx); 2952 req->r_flags &= ~R_SENDING; 2953 req->r_flags |= R_MUSTRESEND; 2954 req->r_rtt = 0; 2955 lck_mtx_unlock(&req->r_mtx); 2956 return (0); 2957 } 2958 2959 bzero(&msg, sizeof(msg)); 2960 if ((sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so) && ((sendnam = nmp->nm_saddr))) { 2961 msg.msg_name = (caddr_t)sendnam; 2962 msg.msg_namelen = sendnam->sa_len; 2963 } 2964 error = sock_sendmbuf(nso->nso_so, &msg, mreqcopy, 0, &sentlen); 2965#ifdef NFS_SOCKET_DEBUGGING 2966 if (error || (sentlen != req->r_mreqlen)) 2967 NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n", 2968 req->r_xid, (int)sentlen, (int)req->r_mreqlen, error)); 2969#endif 2970 if (!error && (sentlen != req->r_mreqlen)) 2971 error = EWOULDBLOCK; 2972 needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen)); 2973 2974 lck_mtx_lock(&req->r_mtx); 2975 req->r_flags &= ~R_SENDING; 2976 req->r_rtt = 0; 2977 if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT)) 2978 req->r_rexmit = NFS_MAXREXMIT; 2979 2980 if (!error) { 2981 /* SUCCESS */ 2982 req->r_flags &= ~R_RESENDERR; 2983 if (rexmit) 2984 OSAddAtomic64(1, &nfsstats.rpcretries); 2985 req->r_flags |= R_SENT; 2986 if (req->r_flags & R_WAITSENT) { 2987 req->r_flags &= ~R_WAITSENT; 2988 wakeup(req); 2989 } 2990 nfs_sndunlock(req); 2991 lck_mtx_unlock(&req->r_mtx); 2992 return (0); 2993 } 2994 2995 /* send failed */ 2996 req->r_flags |= R_MUSTRESEND; 2997 if (rexmit) 2998 req->r_flags |= R_RESENDERR; 2999 if ((error == EINTR) || (error == ERESTART)) 3000 req->r_error = error; 3001 lck_mtx_unlock(&req->r_mtx); 3002 3003 if (sotype == SOCK_DGRAM) { 3004 /* 3005 * Note: even though a first send may fail, we consider 3006 * the request sent for congestion window purposes. 3007 * So we don't need to undo any of the changes made above. 3008 */ 3009 /* 3010 * Socket errors ignored for connectionless sockets?? 3011 * For now, ignore them all 3012 */ 3013 if ((error != EINTR) && (error != ERESTART) && 3014 (error != EWOULDBLOCK) && (error != EIO) && (nso == nmp->nm_nso)) { 3015 int clearerror = 0, optlen = sizeof(clearerror); 3016 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen); 3017#ifdef NFS_SOCKET_DEBUGGING 3018 if (clearerror) 3019 NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n", 3020 error, clearerror)); 3021#endif 3022 } 3023 } 3024 3025 /* check if it appears we should reconnect the socket */ 3026 switch (error) { 3027 case EWOULDBLOCK: 3028 /* if send timed out, reconnect if on TCP */ 3029 if (sotype != SOCK_STREAM) 3030 break; 3031 case EPIPE: 3032 case EADDRNOTAVAIL: 3033 case ENETDOWN: 3034 case ENETUNREACH: 3035 case ENETRESET: 3036 case ECONNABORTED: 3037 case ECONNRESET: 3038 case ENOTCONN: 3039 case ESHUTDOWN: 3040 case ECONNREFUSED: 3041 case EHOSTDOWN: 3042 case EHOSTUNREACH: 3043 needrecon = 1; 3044 break; 3045 } 3046 if (needrecon && (nso == nmp->nm_nso)) { /* mark socket as needing reconnect */ 3047 NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error)); 3048 nfs_need_reconnect(nmp); 3049 } 3050 3051 nfs_sndunlock(req); 3052 3053 /* 3054 * Don't log some errors: 3055 * EPIPE errors may be common with servers that drop idle connections. 3056 * EADDRNOTAVAIL may occur on network transitions. 3057 * ENOTCONN may occur under some network conditions. 3058 */ 3059 if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN)) 3060 error = 0; 3061 if (error && (error != EINTR) && (error != ERESTART)) 3062 log(LOG_INFO, "nfs send error %d for server %s\n", error, 3063 !req->r_nmp ? "<unmounted>" : 3064 vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname); 3065 3066 if (nfs_is_dead(error, nmp)) 3067 error = EIO; 3068 3069 /* prefer request termination error over other errors */ 3070 error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0); 3071 if (error2) 3072 error = error2; 3073 3074 /* only allow the following errors to be returned */ 3075 if ((error != EINTR) && (error != ERESTART) && (error != EIO) && 3076 (error != ENXIO) && (error != ETIMEDOUT)) 3077 error = 0; 3078 return (error); 3079} 3080 3081/* 3082 * NFS client socket upcalls 3083 * 3084 * Pull RPC replies out of an NFS mount's socket and match them 3085 * up with the pending request. 3086 * 3087 * The datagram code is simple because we always get whole 3088 * messages out of the socket. 3089 * 3090 * The stream code is more involved because we have to parse 3091 * the RPC records out of the stream. 3092 */ 3093 3094/* NFS client UDP socket upcall */ 3095void 3096nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag) 3097{ 3098 struct nfsmount *nmp = arg; 3099 struct nfs_socket *nso = nmp->nm_nso; 3100 size_t rcvlen; 3101 mbuf_t m; 3102 int error = 0; 3103 3104 if (nmp->nm_sockflags & NMSOCK_CONNECTING) 3105 return; 3106 3107 do { 3108 /* make sure we're on the current socket */ 3109 if (!nso || (nso->nso_so != so)) 3110 return; 3111 3112 m = NULL; 3113 rcvlen = 1000000; 3114 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen); 3115 if (m) 3116 nfs_request_match_reply(nmp, m); 3117 } while (m && !error); 3118 3119 if (error && (error != EWOULDBLOCK)) { 3120 /* problems with the socket... mark for reconnection */ 3121 NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error)); 3122 nfs_need_reconnect(nmp); 3123 } 3124} 3125 3126/* NFS client TCP socket upcall */ 3127void 3128nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag) 3129{ 3130 struct nfsmount *nmp = arg; 3131 struct nfs_socket *nso = nmp->nm_nso; 3132 struct nfs_rpc_record_state nrrs; 3133 mbuf_t m; 3134 int error = 0; 3135 int recv = 1; 3136 3137 if (nmp->nm_sockflags & NMSOCK_CONNECTING) 3138 return; 3139 3140 /* make sure we're on the current socket */ 3141 lck_mtx_lock(&nmp->nm_lock); 3142 nso = nmp->nm_nso; 3143 if (!nso || (nso->nso_so != so) || (nmp->nm_sockflags & (NMSOCK_DISCONNECTING))) { 3144 lck_mtx_unlock(&nmp->nm_lock); 3145 return; 3146 } 3147 lck_mtx_unlock(&nmp->nm_lock); 3148 3149 /* make sure this upcall should be trying to do work */ 3150 lck_mtx_lock(&nso->nso_lock); 3151 if (nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) { 3152 lck_mtx_unlock(&nso->nso_lock); 3153 return; 3154 } 3155 nso->nso_flags |= NSO_UPCALL; 3156 nrrs = nso->nso_rrs; 3157 lck_mtx_unlock(&nso->nso_lock); 3158 3159 /* loop while we make error-free progress */ 3160 while (!error && recv) { 3161 error = nfs_rpc_record_read(so, &nrrs, MSG_DONTWAIT, &recv, &m); 3162 if (m) /* match completed response with request */ 3163 nfs_request_match_reply(nmp, m); 3164 } 3165 3166 lck_mtx_lock(&nmp->nm_lock); 3167 if (nmp->nm_nso == nso) { 3168 /* still the same socket, so update socket's RPC parsing state */ 3169 lck_mtx_unlock(&nmp->nm_lock); 3170 lck_mtx_lock(&nso->nso_lock); 3171 nso->nso_rrs = nrrs; 3172 nso->nso_flags &= ~NSO_UPCALL; 3173 lck_mtx_unlock(&nso->nso_lock); 3174 if (nmp->nm_sockflags & NMSOCK_DISCONNECTING) 3175 wakeup(&nmp->nm_sockflags); 3176 } else { 3177 lck_mtx_unlock(&nmp->nm_lock); 3178 } 3179#ifdef NFS_SOCKET_DEBUGGING 3180 if (!recv && (error != EWOULDBLOCK)) 3181 NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error)); 3182#endif 3183 /* note: no error and no data indicates server closed its end */ 3184 if ((error != EWOULDBLOCK) && (error || !recv)) { 3185 /* problems with the socket... mark for reconnection */ 3186 NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error)); 3187 nfs_need_reconnect(nmp); 3188 } 3189} 3190 3191/* 3192 * "poke" a socket to try to provoke any pending errors 3193 */ 3194void 3195nfs_sock_poke(struct nfsmount *nmp) 3196{ 3197 struct iovec aio; 3198 struct msghdr msg; 3199 size_t len; 3200 int error = 0; 3201 int dummy; 3202 3203 lck_mtx_lock(&nmp->nm_lock); 3204 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) || 3205 !(nmp->nm_sockflags & NMSOCK_READY) || !nmp->nm_nso || !nmp->nm_nso->nso_so) { 3206 lck_mtx_unlock(&nmp->nm_lock); 3207 return; 3208 } 3209 lck_mtx_unlock(&nmp->nm_lock); 3210 aio.iov_base = &dummy; 3211 aio.iov_len = 0; 3212 len = 0; 3213 bzero(&msg, sizeof(msg)); 3214 msg.msg_iov = &aio; 3215 msg.msg_iovlen = 1; 3216 error = sock_send(nmp->nm_nso->nso_so, &msg, MSG_DONTWAIT, &len); 3217 NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error)); 3218 nfs_is_dead(error, nmp); 3219} 3220 3221/* 3222 * Match an RPC reply with the corresponding request 3223 */ 3224void 3225nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep) 3226{ 3227 struct nfsreq *req; 3228 struct nfsm_chain nmrep; 3229 u_int32_t reply = 0, rxid = 0; 3230 int error = 0, asyncioq, t1; 3231 3232 /* Get the xid and check that it is an rpc reply */ 3233 nfsm_chain_dissect_init(error, &nmrep, mrep); 3234 nfsm_chain_get_32(error, &nmrep, rxid); 3235 nfsm_chain_get_32(error, &nmrep, reply); 3236 if (error || (reply != RPC_REPLY)) { 3237 OSAddAtomic64(1, &nfsstats.rpcinvalid); 3238 mbuf_freem(mrep); 3239 return; 3240 } 3241 3242 /* 3243 * Loop through the request list to match up the reply 3244 * Iff no match, just drop it. 3245 */ 3246 lck_mtx_lock(nfs_request_mutex); 3247 TAILQ_FOREACH(req, &nfs_reqq, r_chain) { 3248 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) 3249 continue; 3250 /* looks like we have it, grab lock and double check */ 3251 lck_mtx_lock(&req->r_mtx); 3252 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) { 3253 lck_mtx_unlock(&req->r_mtx); 3254 continue; 3255 } 3256 /* Found it.. */ 3257 req->r_nmrep = nmrep; 3258 lck_mtx_lock(&nmp->nm_lock); 3259 if (nmp->nm_sotype == SOCK_DGRAM) { 3260 /* 3261 * Update congestion window. 3262 * Do the additive increase of one rpc/rtt. 3263 */ 3264 FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd); 3265 if (nmp->nm_cwnd <= nmp->nm_sent) { 3266 nmp->nm_cwnd += 3267 ((NFS_CWNDSCALE * NFS_CWNDSCALE) + 3268 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 3269 if (nmp->nm_cwnd > NFS_MAXCWND) 3270 nmp->nm_cwnd = NFS_MAXCWND; 3271 } 3272 if (req->r_flags & R_CWND) { 3273 nmp->nm_sent -= NFS_CWNDSCALE; 3274 req->r_flags &= ~R_CWND; 3275 } 3276 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { 3277 /* congestion window is open, poke the cwnd queue */ 3278 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); 3279 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); 3280 req2->r_cchain.tqe_next = NFSREQNOLIST; 3281 wakeup(req2); 3282 } 3283 } 3284 /* 3285 * Update rtt using a gain of 0.125 on the mean 3286 * and a gain of 0.25 on the deviation. 3287 */ 3288 if (req->r_flags & R_TIMING) { 3289 /* 3290 * Since the timer resolution of 3291 * NFS_HZ is so course, it can often 3292 * result in r_rtt == 0. Since 3293 * r_rtt == N means that the actual 3294 * rtt is between N+dt and N+2-dt ticks, 3295 * add 1. 3296 */ 3297 if (proct[req->r_procnum] == 0) 3298 panic("nfs_request_match_reply: proct[%d] is zero", req->r_procnum); 3299 t1 = req->r_rtt + 1; 3300 t1 -= (NFS_SRTT(req) >> 3); 3301 NFS_SRTT(req) += t1; 3302 if (t1 < 0) 3303 t1 = -t1; 3304 t1 -= (NFS_SDRTT(req) >> 2); 3305 NFS_SDRTT(req) += t1; 3306 } 3307 nmp->nm_timeouts = 0; 3308 lck_mtx_unlock(&nmp->nm_lock); 3309 /* signal anyone waiting on this request */ 3310 wakeup(req); 3311 asyncioq = (req->r_callback.rcb_func != NULL); 3312 if (nfs_request_using_gss(req)) 3313 nfs_gss_clnt_rpcdone(req); 3314 lck_mtx_unlock(&req->r_mtx); 3315 lck_mtx_unlock(nfs_request_mutex); 3316 /* if it's an async RPC with a callback, queue it up */ 3317 if (asyncioq) 3318 nfs_asyncio_finish(req); 3319 break; 3320 } 3321 3322 if (!req) { 3323 /* not matched to a request, so drop it. */ 3324 lck_mtx_unlock(nfs_request_mutex); 3325 OSAddAtomic64(1, &nfsstats.rpcunexpected); 3326 mbuf_freem(mrep); 3327 } 3328} 3329 3330/* 3331 * Wait for the reply for a given request... 3332 * ...potentially resending the request if necessary. 3333 */ 3334int 3335nfs_wait_reply(struct nfsreq *req) 3336{ 3337 struct timespec ts = { 2, 0 }; 3338 int error = 0, slpflag, first = 1; 3339 3340 if (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) 3341 slpflag = PCATCH; 3342 else 3343 slpflag = 0; 3344 3345 lck_mtx_lock(&req->r_mtx); 3346 while (!req->r_nmrep.nmc_mhead) { 3347 if ((error = nfs_sigintr(req->r_nmp, req, first ? NULL : req->r_thread, 0))) 3348 break; 3349 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead) 3350 break; 3351 /* check if we need to resend */ 3352 if (req->r_flags & R_MUSTRESEND) { 3353 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n", 3354 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt)); 3355 req->r_flags |= R_SENDING; 3356 lck_mtx_unlock(&req->r_mtx); 3357 if (nfs_request_using_gss(req)) { 3358 /* 3359 * It's an RPCSEC_GSS request. 3360 * Can't just resend the original request 3361 * without bumping the cred sequence number. 3362 * Go back and re-build the request. 3363 */ 3364 lck_mtx_lock(&req->r_mtx); 3365 req->r_flags &= ~R_SENDING; 3366 lck_mtx_unlock(&req->r_mtx); 3367 return (EAGAIN); 3368 } 3369 error = nfs_send(req, 1); 3370 lck_mtx_lock(&req->r_mtx); 3371 NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n", 3372 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error)); 3373 if (error) 3374 break; 3375 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead) 3376 break; 3377 } 3378 /* need to poll if we're P_NOREMOTEHANG */ 3379 if (nfs_noremotehang(req->r_thread)) 3380 ts.tv_sec = 1; 3381 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts); 3382 first = slpflag = 0; 3383 } 3384 lck_mtx_unlock(&req->r_mtx); 3385 3386 return (error); 3387} 3388 3389/* 3390 * An NFS request goes something like this: 3391 * (nb: always frees up mreq mbuf list) 3392 * nfs_request_create() 3393 * - allocates a request struct if one is not provided 3394 * - initial fill-in of the request struct 3395 * nfs_request_add_header() 3396 * - add the RPC header 3397 * nfs_request_send() 3398 * - link it into list 3399 * - call nfs_send() for first transmit 3400 * nfs_request_wait() 3401 * - call nfs_wait_reply() to wait for the reply 3402 * nfs_request_finish() 3403 * - break down rpc header and return with error or nfs reply 3404 * pointed to by nmrep. 3405 * nfs_request_rele() 3406 * nfs_request_destroy() 3407 * - clean up the request struct 3408 * - free the request struct if it was allocated by nfs_request_create() 3409 */ 3410 3411/* 3412 * Set up an NFS request struct (allocating if no request passed in). 3413 */ 3414int 3415nfs_request_create( 3416 nfsnode_t np, 3417 mount_t mp, /* used only if !np */ 3418 struct nfsm_chain *nmrest, 3419 int procnum, 3420 thread_t thd, 3421 kauth_cred_t cred, 3422 struct nfsreq **reqp) 3423{ 3424 struct nfsreq *req, *newreq = NULL; 3425 struct nfsmount *nmp; 3426 3427 req = *reqp; 3428 if (!req) { 3429 /* allocate a new NFS request structure */ 3430 MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK); 3431 if (!newreq) { 3432 mbuf_freem(nmrest->nmc_mhead); 3433 nmrest->nmc_mhead = NULL; 3434 return (ENOMEM); 3435 } 3436 req = newreq; 3437 } 3438 3439 bzero(req, sizeof(*req)); 3440 if (req == newreq) 3441 req->r_flags = R_ALLOCATED; 3442 3443 nmp = VFSTONFS(np ? NFSTOMP(np) : mp); 3444 if (!nmp) { 3445 if (newreq) 3446 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ); 3447 return (ENXIO); 3448 } 3449 lck_mtx_lock(&nmp->nm_lock); 3450 if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) == 3451 (NFSSTA_FORCE|NFSSTA_TIMEO)) { 3452 lck_mtx_unlock(&nmp->nm_lock); 3453 mbuf_freem(nmrest->nmc_mhead); 3454 nmrest->nmc_mhead = NULL; 3455 if (newreq) 3456 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ); 3457 return (ENXIO); 3458 } 3459 3460 if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS)) 3461 OSAddAtomic64(1, &nfsstats.rpccnt[procnum]); 3462 if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL)) 3463 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum); 3464 3465 lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL); 3466 req->r_nmp = nmp; 3467 req->r_np = np; 3468 req->r_thread = thd; 3469 if (!thd) 3470 req->r_flags |= R_NOINTR; 3471 if (IS_VALID_CRED(cred)) { 3472 kauth_cred_ref(cred); 3473 req->r_cred = cred; 3474 } 3475 req->r_procnum = procnum; 3476 if (proct[procnum] > 0) 3477 req->r_flags |= R_TIMING; 3478 req->r_nmrep.nmc_mhead = NULL; 3479 SLIST_INIT(&req->r_gss_seqlist); 3480 req->r_achain.tqe_next = NFSREQNOLIST; 3481 req->r_rchain.tqe_next = NFSREQNOLIST; 3482 req->r_cchain.tqe_next = NFSREQNOLIST; 3483 3484 /* set auth flavor to use for request */ 3485 if (!req->r_cred) 3486 req->r_auth = RPCAUTH_NONE; 3487 else if (req->r_np && (req->r_np->n_auth != RPCAUTH_INVALID)) 3488 req->r_auth = req->r_np->n_auth; 3489 else 3490 req->r_auth = nmp->nm_auth; 3491 3492 lck_mtx_unlock(&nmp->nm_lock); 3493 3494 /* move the request mbuf chain to the nfsreq */ 3495 req->r_mrest = nmrest->nmc_mhead; 3496 nmrest->nmc_mhead = NULL; 3497 3498 req->r_flags |= R_INITTED; 3499 req->r_refs = 1; 3500 if (newreq) 3501 *reqp = req; 3502 return (0); 3503} 3504 3505/* 3506 * Clean up and free an NFS request structure. 3507 */ 3508void 3509nfs_request_destroy(struct nfsreq *req) 3510{ 3511 struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3512 struct gss_seq *gsp, *ngsp; 3513 struct timespec ts = { 1, 0 }; 3514 int clearjbtimeo = 0; 3515 3516 if (!req || !(req->r_flags & R_INITTED)) 3517 return; 3518 req->r_flags &= ~R_INITTED; 3519 if (req->r_lflags & RL_QUEUED) 3520 nfs_reqdequeue(req); 3521 if (req->r_achain.tqe_next != NFSREQNOLIST) { 3522 /* still on an async I/O queue? */ 3523 lck_mtx_lock(nfsiod_mutex); 3524 if (nmp && (req->r_achain.tqe_next != NFSREQNOLIST)) { 3525 TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain); 3526 req->r_achain.tqe_next = NFSREQNOLIST; 3527 } 3528 lck_mtx_unlock(nfsiod_mutex); 3529 } 3530 lck_mtx_lock(&req->r_mtx); 3531 if (nmp) { 3532 lck_mtx_lock(&nmp->nm_lock); 3533 if (req->r_flags & R_CWND) { 3534 /* Decrement the outstanding request count. */ 3535 req->r_flags &= ~R_CWND; 3536 nmp->nm_sent -= NFS_CWNDSCALE; 3537 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { 3538 /* congestion window is open, poke the cwnd queue */ 3539 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); 3540 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); 3541 req2->r_cchain.tqe_next = NFSREQNOLIST; 3542 wakeup(req2); 3543 } 3544 } 3545 if (req->r_rchain.tqe_next != NFSREQNOLIST) { 3546 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); 3547 req->r_rchain.tqe_next = NFSREQNOLIST; 3548 if (req->r_flags & R_RESENDQ) 3549 req->r_flags &= ~R_RESENDQ; 3550 } 3551 if (req->r_cchain.tqe_next != NFSREQNOLIST) { 3552 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain); 3553 req->r_cchain.tqe_next = NFSREQNOLIST; 3554 } 3555 if (req->r_flags & R_JBTPRINTFMSG) { 3556 req->r_flags &= ~R_JBTPRINTFMSG; 3557 nmp->nm_jbreqs--; 3558 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; 3559 } 3560 lck_mtx_unlock(&nmp->nm_lock); 3561 } 3562 while (req->r_flags & R_RESENDQ) 3563 msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts); 3564 lck_mtx_unlock(&req->r_mtx); 3565 if (clearjbtimeo) 3566 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL); 3567 if (req->r_mhead) 3568 mbuf_freem(req->r_mhead); 3569 else if (req->r_mrest) 3570 mbuf_freem(req->r_mrest); 3571 if (req->r_nmrep.nmc_mhead) 3572 mbuf_freem(req->r_nmrep.nmc_mhead); 3573 if (IS_VALID_CRED(req->r_cred)) 3574 kauth_cred_unref(&req->r_cred); 3575 if (nfs_request_using_gss(req)) 3576 nfs_gss_clnt_rpcdone(req); 3577 SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp) 3578 FREE(gsp, M_TEMP); 3579 if (req->r_gss_ctx) 3580 nfs_gss_clnt_ctx_unref(req); 3581 if (req->r_wrongsec) 3582 FREE(req->r_wrongsec, M_TEMP); 3583 3584 lck_mtx_destroy(&req->r_mtx, nfs_request_grp); 3585 if (req->r_flags & R_ALLOCATED) 3586 FREE_ZONE(req, sizeof(*req), M_NFSREQ); 3587} 3588 3589void 3590nfs_request_ref(struct nfsreq *req, int locked) 3591{ 3592 if (!locked) 3593 lck_mtx_lock(&req->r_mtx); 3594 if (req->r_refs <= 0) 3595 panic("nfsreq reference error"); 3596 req->r_refs++; 3597 if (!locked) 3598 lck_mtx_unlock(&req->r_mtx); 3599} 3600 3601void 3602nfs_request_rele(struct nfsreq *req) 3603{ 3604 int destroy; 3605 3606 lck_mtx_lock(&req->r_mtx); 3607 if (req->r_refs <= 0) 3608 panic("nfsreq reference underflow"); 3609 req->r_refs--; 3610 destroy = (req->r_refs == 0); 3611 lck_mtx_unlock(&req->r_mtx); 3612 if (destroy) 3613 nfs_request_destroy(req); 3614} 3615 3616 3617/* 3618 * Add an (updated) RPC header with authorization to an NFS request. 3619 */ 3620int 3621nfs_request_add_header(struct nfsreq *req) 3622{ 3623 struct nfsmount *nmp; 3624 int error = 0; 3625 mbuf_t m; 3626 3627 /* free up any previous header */ 3628 if ((m = req->r_mhead)) { 3629 while (m && (m != req->r_mrest)) 3630 m = mbuf_free(m); 3631 req->r_mhead = NULL; 3632 } 3633 3634 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3635 if (!nmp) 3636 return (ENXIO); 3637 3638 error = nfsm_rpchead(req, req->r_mrest, &req->r_xid, &req->r_mhead); 3639 if (error) 3640 return (error); 3641 3642 req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead); 3643 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3644 if (!nmp) 3645 return (ENXIO); 3646 lck_mtx_lock(&nmp->nm_lock); 3647 if (NMFLAG(nmp, SOFT)) 3648 req->r_retry = nmp->nm_retry; 3649 else 3650 req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 3651 lck_mtx_unlock(&nmp->nm_lock); 3652 3653 return (error); 3654} 3655 3656 3657/* 3658 * Queue an NFS request up and send it out. 3659 */ 3660int 3661nfs_request_send(struct nfsreq *req, int wait) 3662{ 3663 struct nfsmount *nmp; 3664 struct timeval now; 3665 3666 lck_mtx_lock(&req->r_mtx); 3667 req->r_flags |= R_SENDING; 3668 lck_mtx_unlock(&req->r_mtx); 3669 3670 lck_mtx_lock(nfs_request_mutex); 3671 3672 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3673 if (!nmp) { 3674 lck_mtx_unlock(nfs_request_mutex); 3675 return (ENXIO); 3676 } 3677 3678 microuptime(&now); 3679 if (!req->r_start) { 3680 req->r_start = now.tv_sec; 3681 req->r_lastmsg = now.tv_sec - 3682 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); 3683 } 3684 3685 OSAddAtomic64(1, &nfsstats.rpcrequests); 3686 3687 /* 3688 * Chain request into list of outstanding requests. Be sure 3689 * to put it LAST so timer finds oldest requests first. 3690 * Make sure that the request queue timer is running 3691 * to check for possible request timeout. 3692 */ 3693 TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain); 3694 req->r_lflags |= RL_QUEUED; 3695 if (!nfs_request_timer_on) { 3696 nfs_request_timer_on = 1; 3697 nfs_interval_timer_start(nfs_request_timer_call, 3698 NFS_REQUESTDELAY); 3699 } 3700 lck_mtx_unlock(nfs_request_mutex); 3701 3702 /* Send the request... */ 3703 return (nfs_send(req, wait)); 3704} 3705 3706/* 3707 * Call nfs_wait_reply() to wait for the reply. 3708 */ 3709void 3710nfs_request_wait(struct nfsreq *req) 3711{ 3712 req->r_error = nfs_wait_reply(req); 3713} 3714 3715/* 3716 * Finish up an NFS request by dequeueing it and 3717 * doing the initial NFS request reply processing. 3718 */ 3719int 3720nfs_request_finish( 3721 struct nfsreq *req, 3722 struct nfsm_chain *nmrepp, 3723 int *status) 3724{ 3725 struct nfsmount *nmp; 3726 mbuf_t mrep; 3727 int verf_type = 0; 3728 uint32_t verf_len = 0; 3729 uint32_t reply_status = 0; 3730 uint32_t rejected_status = 0; 3731 uint32_t auth_status = 0; 3732 uint32_t accepted_status = 0; 3733 struct nfsm_chain nmrep; 3734 int error, clearjbtimeo; 3735 3736 error = req->r_error; 3737 3738 if (nmrepp) 3739 nmrepp->nmc_mhead = NULL; 3740 3741 /* RPC done, unlink the request. */ 3742 nfs_reqdequeue(req); 3743 3744 mrep = req->r_nmrep.nmc_mhead; 3745 3746 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3747 3748 if ((req->r_flags & R_CWND) && nmp) { 3749 /* 3750 * Decrement the outstanding request count. 3751 */ 3752 req->r_flags &= ~R_CWND; 3753 lck_mtx_lock(&nmp->nm_lock); 3754 FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd); 3755 nmp->nm_sent -= NFS_CWNDSCALE; 3756 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { 3757 /* congestion window is open, poke the cwnd queue */ 3758 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); 3759 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); 3760 req2->r_cchain.tqe_next = NFSREQNOLIST; 3761 wakeup(req2); 3762 } 3763 lck_mtx_unlock(&nmp->nm_lock); 3764 } 3765 3766 if (nfs_request_using_gss(req)) { 3767 /* 3768 * If the request used an RPCSEC_GSS credential 3769 * then reset its sequence number bit in the 3770 * request window. 3771 */ 3772 nfs_gss_clnt_rpcdone(req); 3773 3774 /* 3775 * If we need to re-send, go back and re-build the 3776 * request based on a new sequence number. 3777 * Note that we're using the original XID. 3778 */ 3779 if (error == EAGAIN) { 3780 req->r_error = 0; 3781 if (mrep) 3782 mbuf_freem(mrep); 3783 error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs 3784 req->r_nmrep.nmc_mhead = NULL; 3785 req->r_flags |= R_RESTART; 3786 if (error == ENEEDAUTH) { 3787 req->r_xid = 0; // get a new XID 3788 error = 0; 3789 } 3790 goto nfsmout; 3791 } 3792 } 3793 3794 /* 3795 * If there was a successful reply, make sure to mark the mount as up. 3796 * If a tprintf message was given (or if this is a timed-out soft mount) 3797 * then post a tprintf message indicating the server is alive again. 3798 */ 3799 if (!error) { 3800 if ((req->r_flags & R_TPRINTFMSG) || 3801 (nmp && NMFLAG(nmp, SOFT) && 3802 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE)) == NFSSTA_TIMEO))) 3803 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again"); 3804 else 3805 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL); 3806 } 3807 if (!error && !nmp) 3808 error = ENXIO; 3809 nfsmout_if(error); 3810 3811 /* 3812 * break down the RPC header and check if ok 3813 */ 3814 nmrep = req->r_nmrep; 3815 nfsm_chain_get_32(error, &nmrep, reply_status); 3816 nfsmout_if(error); 3817 if (reply_status == RPC_MSGDENIED) { 3818 nfsm_chain_get_32(error, &nmrep, rejected_status); 3819 nfsmout_if(error); 3820 if (rejected_status == RPC_MISMATCH) { 3821 error = ENOTSUP; 3822 goto nfsmout; 3823 } 3824 nfsm_chain_get_32(error, &nmrep, auth_status); 3825 nfsmout_if(error); 3826 switch (auth_status) { 3827 case RPCSEC_GSS_CREDPROBLEM: 3828 case RPCSEC_GSS_CTXPROBLEM: 3829 /* 3830 * An RPCSEC_GSS cred or context problem. 3831 * We can't use it anymore. 3832 * Restore the args, renew the context 3833 * and set up for a resend. 3834 */ 3835 error = nfs_gss_clnt_args_restore(req); 3836 if (error && error != ENEEDAUTH) 3837 break; 3838 3839 if (!error) { 3840 error = nfs_gss_clnt_ctx_renew(req); 3841 if (error) 3842 break; 3843 } 3844 mbuf_freem(mrep); 3845 req->r_nmrep.nmc_mhead = NULL; 3846 req->r_xid = 0; // get a new XID 3847 req->r_flags |= R_RESTART; 3848 goto nfsmout; 3849 default: 3850 error = EACCES; 3851 break; 3852 } 3853 goto nfsmout; 3854 } 3855 3856 /* Now check the verifier */ 3857 nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor 3858 nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length 3859 nfsmout_if(error); 3860 3861 switch (req->r_auth) { 3862 case RPCAUTH_NONE: 3863 case RPCAUTH_SYS: 3864 /* Any AUTH_SYS verifier is ignored */ 3865 if (verf_len > 0) 3866 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len)); 3867 nfsm_chain_get_32(error, &nmrep, accepted_status); 3868 break; 3869 case RPCAUTH_KRB5: 3870 case RPCAUTH_KRB5I: 3871 case RPCAUTH_KRB5P: 3872 error = nfs_gss_clnt_verf_get(req, &nmrep, 3873 verf_type, verf_len, &accepted_status); 3874 break; 3875 } 3876 nfsmout_if(error); 3877 3878 switch (accepted_status) { 3879 case RPC_SUCCESS: 3880 if (req->r_procnum == NFSPROC_NULL) { 3881 /* 3882 * The NFS null procedure is unique, 3883 * in not returning an NFS status. 3884 */ 3885 *status = NFS_OK; 3886 } else { 3887 nfsm_chain_get_32(error, &nmrep, *status); 3888 nfsmout_if(error); 3889 } 3890 3891 if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) { 3892 /* 3893 * It's a JUKEBOX error - delay and try again 3894 */ 3895 int delay, slpflag = (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) ? PCATCH : 0; 3896 3897 mbuf_freem(mrep); 3898 req->r_nmrep.nmc_mhead = NULL; 3899 if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) { 3900 /* we're not yet completely mounted and */ 3901 /* we can't complete an RPC, so we fail */ 3902 OSAddAtomic64(1, &nfsstats.rpctimeouts); 3903 nfs_softterm(req); 3904 error = req->r_error; 3905 goto nfsmout; 3906 } 3907 req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2); 3908 if (req->r_delay > 30) 3909 req->r_delay = 30; 3910 if (nmp->nm_tprintf_initial_delay && (req->r_delay >= nmp->nm_tprintf_initial_delay)) { 3911 if (!(req->r_flags & R_JBTPRINTFMSG)) { 3912 req->r_flags |= R_JBTPRINTFMSG; 3913 lck_mtx_lock(&nmp->nm_lock); 3914 nmp->nm_jbreqs++; 3915 lck_mtx_unlock(&nmp->nm_lock); 3916 } 3917 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO, 3918 "resource temporarily unavailable (jukebox)"); 3919 } 3920 if (NMFLAG(nmp, SOFT) && (req->r_delay == 30) && !(req->r_flags & R_NOINTR)) { 3921 /* for soft mounts, just give up after a short while */ 3922 OSAddAtomic64(1, &nfsstats.rpctimeouts); 3923 nfs_softterm(req); 3924 error = req->r_error; 3925 goto nfsmout; 3926 } 3927 delay = req->r_delay; 3928 if (req->r_callback.rcb_func) { 3929 struct timeval now; 3930 microuptime(&now); 3931 req->r_resendtime = now.tv_sec + delay; 3932 } else { 3933 do { 3934 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) 3935 goto nfsmout; 3936 tsleep(&lbolt, PSOCK|slpflag, "nfs_jukebox_trylater", 0); 3937 slpflag = 0; 3938 } while (--delay > 0); 3939 } 3940 req->r_xid = 0; // get a new XID 3941 req->r_flags |= R_RESTART; 3942 req->r_start = 0; 3943 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER); 3944 return (0); 3945 } 3946 3947 if (req->r_flags & R_JBTPRINTFMSG) { 3948 req->r_flags &= ~R_JBTPRINTFMSG; 3949 lck_mtx_lock(&nmp->nm_lock); 3950 nmp->nm_jbreqs--; 3951 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; 3952 lck_mtx_unlock(&nmp->nm_lock); 3953 nfs_up(nmp, req->r_thread, clearjbtimeo, "resource available again"); 3954 } 3955 3956 if ((nmp->nm_vers >= NFS_VER4) && (*status == NFSERR_WRONGSEC)) { 3957 /* 3958 * Hmmm... we need to try a different security flavor. 3959 * The first time a request hits this, we will allocate an array 3960 * to track flavors to try. We fill the array with the mount's 3961 * preferred flavors or the server's preferred flavors or just the 3962 * flavors we support. 3963 */ 3964 uint32_t srvflavors[NX_MAX_SEC_FLAVORS]; 3965 int srvcount, i, j; 3966 3967 /* Call SECINFO to try to get list of flavors from server. */ 3968 srvcount = NX_MAX_SEC_FLAVORS; 3969 nfs4_secinfo_rpc(nmp, &req->r_secinfo, req->r_cred, srvflavors, &srvcount); 3970 3971 if (!req->r_wrongsec) { 3972 /* first time... set up flavor array */ 3973 MALLOC(req->r_wrongsec, uint32_t*, NX_MAX_SEC_FLAVORS*sizeof(uint32_t), M_TEMP, M_WAITOK); 3974 if (!req->r_wrongsec) { 3975 error = EACCES; 3976 goto nfsmout; 3977 } 3978 i=0; 3979 if (nmp->nm_sec.count) { /* use the mount's preferred list of flavors */ 3980 for(; i < nmp->nm_sec.count; i++) 3981 req->r_wrongsec[i] = nmp->nm_sec.flavors[i]; 3982 } else if (srvcount) { /* otherwise use the server's list of flavors */ 3983 for(; i < srvcount; i++) 3984 req->r_wrongsec[i] = srvflavors[i]; 3985 } else { /* otherwise, just try the flavors we support. */ 3986 req->r_wrongsec[i++] = RPCAUTH_KRB5P; 3987 req->r_wrongsec[i++] = RPCAUTH_KRB5I; 3988 req->r_wrongsec[i++] = RPCAUTH_KRB5; 3989 req->r_wrongsec[i++] = RPCAUTH_SYS; 3990 req->r_wrongsec[i++] = RPCAUTH_NONE; 3991 } 3992 for(; i < NX_MAX_SEC_FLAVORS; i++) /* invalidate any remaining slots */ 3993 req->r_wrongsec[i] = RPCAUTH_INVALID; 3994 } 3995 3996 /* clear the current flavor from the list */ 3997 for(i=0; i < NX_MAX_SEC_FLAVORS; i++) 3998 if (req->r_wrongsec[i] == req->r_auth) 3999 req->r_wrongsec[i] = RPCAUTH_INVALID; 4000 4001 /* find the next flavor to try */ 4002 for(i=0; i < NX_MAX_SEC_FLAVORS; i++) 4003 if (req->r_wrongsec[i] != RPCAUTH_INVALID) { 4004 if (((req->r_wrongsec[i] == RPCAUTH_KRB5P) || 4005 (req->r_wrongsec[i] == RPCAUTH_KRB5I) || 4006 (req->r_wrongsec[i] == RPCAUTH_KRB5)) && (req->r_gss_ctx && 4007 (req->r_gss_ctx->gss_clnt_service == RPCSEC_GSS_SVC_SYS))) { 4008 /* don't bother trying Kerberos if we've already got a fallback context */ 4009 req->r_wrongsec[i] = RPCAUTH_INVALID; 4010 continue; 4011 } 4012 if (!srvcount) /* no server list, just try it */ 4013 break; 4014 /* check that it's in the server's list */ 4015 for(j=0; j < srvcount; j++) 4016 if (req->r_wrongsec[i] == srvflavors[j]) 4017 break; 4018 if (j < srvcount) /* found */ 4019 break; 4020 /* not found in server list */ 4021 req->r_wrongsec[i] = RPCAUTH_INVALID; 4022 } 4023 if (i == NX_MAX_SEC_FLAVORS) { 4024 /* nothing left to try! */ 4025 error = EACCES; 4026 goto nfsmout; 4027 } 4028 4029 /* retry with the next auth flavor */ 4030 req->r_auth = req->r_wrongsec[i]; 4031 req->r_xid = 0; // get a new XID 4032 req->r_flags |= R_RESTART; 4033 req->r_start = 0; 4034 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_WRONGSEC); 4035 return (0); 4036 } 4037 if ((nmp->nm_vers >= NFS_VER4) && req->r_wrongsec) { 4038 /* 4039 * We renegotiated security for this request; so update the 4040 * default security flavor for the associated node. 4041 */ 4042 if (req->r_np) 4043 req->r_np->n_auth = req->r_auth; 4044 } 4045 4046 if (*status == NFS_OK) { 4047 /* 4048 * Successful NFS request 4049 */ 4050 *nmrepp = nmrep; 4051 req->r_nmrep.nmc_mhead = NULL; 4052 break; 4053 } 4054 /* Got an NFS error of some kind */ 4055 4056 /* 4057 * If the File Handle was stale, invalidate the 4058 * lookup cache, just in case. 4059 */ 4060 if ((*status == ESTALE) && req->r_np) { 4061 cache_purge(NFSTOV(req->r_np)); 4062 /* if monitored, also send delete event */ 4063 if (vnode_ismonitored(NFSTOV(req->r_np))) 4064 nfs_vnode_notify(req->r_np, (VNODE_EVENT_ATTRIB|VNODE_EVENT_DELETE)); 4065 } 4066 if (nmp->nm_vers == NFS_VER2) 4067 mbuf_freem(mrep); 4068 else 4069 *nmrepp = nmrep; 4070 req->r_nmrep.nmc_mhead = NULL; 4071 error = 0; 4072 break; 4073 case RPC_PROGUNAVAIL: 4074 error = EPROGUNAVAIL; 4075 break; 4076 case RPC_PROGMISMATCH: 4077 error = ERPCMISMATCH; 4078 break; 4079 case RPC_PROCUNAVAIL: 4080 error = EPROCUNAVAIL; 4081 break; 4082 case RPC_GARBAGE: 4083 error = EBADRPC; 4084 break; 4085 case RPC_SYSTEM_ERR: 4086 default: 4087 error = EIO; 4088 break; 4089 } 4090nfsmout: 4091 if (req->r_flags & R_JBTPRINTFMSG) { 4092 req->r_flags &= ~R_JBTPRINTFMSG; 4093 lck_mtx_lock(&nmp->nm_lock); 4094 nmp->nm_jbreqs--; 4095 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; 4096 lck_mtx_unlock(&nmp->nm_lock); 4097 if (clearjbtimeo) 4098 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL); 4099 } 4100 FSDBG(273, R_XID32(req->r_xid), nmp, req, 4101 (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error); 4102 return (error); 4103} 4104 4105/* 4106 * NFS request using a GSS/Kerberos security flavor? 4107 */ 4108int 4109nfs_request_using_gss(struct nfsreq *req) 4110{ 4111 if (!req->r_gss_ctx) 4112 return (0); 4113 switch (req->r_auth) { 4114 case RPCAUTH_KRB5: 4115 case RPCAUTH_KRB5I: 4116 case RPCAUTH_KRB5P: 4117 return (1); 4118 } 4119 return (0); 4120} 4121 4122/* 4123 * Perform an NFS request synchronously. 4124 */ 4125 4126int 4127nfs_request( 4128 nfsnode_t np, 4129 mount_t mp, /* used only if !np */ 4130 struct nfsm_chain *nmrest, 4131 int procnum, 4132 vfs_context_t ctx, 4133 struct nfsreq_secinfo_args *si, 4134 struct nfsm_chain *nmrepp, 4135 u_int64_t *xidp, 4136 int *status) 4137{ 4138 return nfs_request2(np, mp, nmrest, procnum, 4139 vfs_context_thread(ctx), vfs_context_ucred(ctx), 4140 si, 0, nmrepp, xidp, status); 4141} 4142 4143int 4144nfs_request2( 4145 nfsnode_t np, 4146 mount_t mp, /* used only if !np */ 4147 struct nfsm_chain *nmrest, 4148 int procnum, 4149 thread_t thd, 4150 kauth_cred_t cred, 4151 struct nfsreq_secinfo_args *si, 4152 int flags, 4153 struct nfsm_chain *nmrepp, 4154 u_int64_t *xidp, 4155 int *status) 4156{ 4157 struct nfsreq rq, *req = &rq; 4158 int error; 4159 4160 if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req))) 4161 return (error); 4162 req->r_flags |= (flags & R_OPTMASK); 4163 if (si) 4164 req->r_secinfo = *si; 4165 4166 FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0); 4167 do { 4168 req->r_error = 0; 4169 req->r_flags &= ~R_RESTART; 4170 if ((error = nfs_request_add_header(req))) 4171 break; 4172 if (xidp) 4173 *xidp = req->r_xid; 4174 if ((error = nfs_request_send(req, 1))) 4175 break; 4176 nfs_request_wait(req); 4177 if ((error = nfs_request_finish(req, nmrepp, status))) 4178 break; 4179 } while (req->r_flags & R_RESTART); 4180 4181 FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error); 4182 nfs_request_rele(req); 4183 return (error); 4184} 4185 4186 4187/* 4188 * Set up a new null proc request to exchange GSS context tokens with the 4189 * server. Associate the context that we are setting up with the request that we 4190 * are sending. 4191 */ 4192 4193int 4194nfs_request_gss( 4195 mount_t mp, 4196 struct nfsm_chain *nmrest, 4197 thread_t thd, 4198 kauth_cred_t cred, 4199 int flags, 4200 struct nfs_gss_clnt_ctx *cp, /* Set to gss context to renew or setup */ 4201 struct nfsm_chain *nmrepp, 4202 int *status) 4203{ 4204 struct nfsreq rq, *req = &rq; 4205 int error; 4206 4207 if ((error = nfs_request_create(NULL, mp, nmrest, NFSPROC_NULL, thd, cred, &req))) 4208 return (error); 4209 req->r_flags |= (flags & R_OPTMASK); 4210 4211 if (cp == NULL) { 4212 printf("nfs_request_gss request has no context\n"); 4213 nfs_request_rele(req); 4214 return (NFSERR_EAUTH); 4215 } 4216 nfs_gss_clnt_ctx_ref(req, cp); 4217 4218 FSDBG_TOP(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, 0); 4219 do { 4220 req->r_error = 0; 4221 req->r_flags &= ~R_RESTART; 4222 if ((error = nfs_request_add_header(req))) 4223 break; 4224 4225 if ((error = nfs_request_send(req, 1))) 4226 break; 4227 nfs_request_wait(req); 4228 if ((error = nfs_request_finish(req, nmrepp, status))) 4229 break; 4230 } while (req->r_flags & R_RESTART); 4231 4232 FSDBG_BOT(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, error); 4233 nfs_request_rele(req); 4234 return (error); 4235} 4236 4237/* 4238 * Create and start an asynchronous NFS request. 4239 */ 4240int 4241nfs_request_async( 4242 nfsnode_t np, 4243 mount_t mp, /* used only if !np */ 4244 struct nfsm_chain *nmrest, 4245 int procnum, 4246 thread_t thd, 4247 kauth_cred_t cred, 4248 struct nfsreq_secinfo_args *si, 4249 int flags, 4250 struct nfsreq_cbinfo *cb, 4251 struct nfsreq **reqp) 4252{ 4253 struct nfsreq *req; 4254 struct nfsmount *nmp; 4255 int error, sent; 4256 4257 error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp); 4258 req = *reqp; 4259 FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error); 4260 if (error) 4261 return (error); 4262 req->r_flags |= (flags & R_OPTMASK); 4263 req->r_flags |= R_ASYNC; 4264 if (si) 4265 req->r_secinfo = *si; 4266 if (cb) 4267 req->r_callback = *cb; 4268 error = nfs_request_add_header(req); 4269 if (!error) { 4270 req->r_flags |= R_WAITSENT; 4271 if (req->r_callback.rcb_func) 4272 nfs_request_ref(req, 0); 4273 error = nfs_request_send(req, 1); 4274 lck_mtx_lock(&req->r_mtx); 4275 if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) { 4276 /* make sure to wait until this async I/O request gets sent */ 4277 int slpflag = (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) ? PCATCH : 0; 4278 struct timespec ts = { 2, 0 }; 4279 while (!(req->r_flags & R_SENT)) { 4280 if ((req->r_flags & R_RESENDQ) && ((nmp = req->r_nmp))) { 4281 lck_mtx_lock(&nmp->nm_lock); 4282 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) { 4283 /* 4284 * It's not going to get off the resend queue if we're in recovery. 4285 * So, just take it off ourselves. We could be holding mount state 4286 * busy and thus holding up the start of recovery. 4287 */ 4288 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); 4289 req->r_rchain.tqe_next = NFSREQNOLIST; 4290 if (req->r_flags & R_RESENDQ) 4291 req->r_flags &= ~R_RESENDQ; 4292 lck_mtx_unlock(&nmp->nm_lock); 4293 req->r_flags |= R_SENDING; 4294 lck_mtx_unlock(&req->r_mtx); 4295 error = nfs_send(req, 1); 4296 lck_mtx_lock(&req->r_mtx); 4297 if (error) 4298 break; 4299 continue; 4300 } 4301 lck_mtx_unlock(&nmp->nm_lock); 4302 } 4303 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) 4304 break; 4305 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts); 4306 slpflag = 0; 4307 } 4308 } 4309 sent = req->r_flags & R_SENT; 4310 lck_mtx_unlock(&req->r_mtx); 4311 if (error && req->r_callback.rcb_func && !sent) 4312 nfs_request_rele(req); 4313 } 4314 FSDBG(274, R_XID32(req->r_xid), np, procnum, error); 4315 if (error || req->r_callback.rcb_func) 4316 nfs_request_rele(req); 4317 return (error); 4318} 4319 4320/* 4321 * Wait for and finish an asynchronous NFS request. 4322 */ 4323int 4324nfs_request_async_finish( 4325 struct nfsreq *req, 4326 struct nfsm_chain *nmrepp, 4327 u_int64_t *xidp, 4328 int *status) 4329{ 4330 int error = 0, asyncio = req->r_callback.rcb_func ? 1 : 0; 4331 struct nfsmount *nmp; 4332 4333 lck_mtx_lock(&req->r_mtx); 4334 if (!asyncio) 4335 req->r_flags |= R_ASYNCWAIT; 4336 while (req->r_flags & R_RESENDQ) { /* wait until the request is off the resend queue */ 4337 struct timespec ts = { 2, 0 }; 4338 if ((nmp = req->r_nmp)) { 4339 lck_mtx_lock(&nmp->nm_lock); 4340 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) { 4341 /* 4342 * It's not going to get off the resend queue if we're in recovery. 4343 * So, just take it off ourselves. We could be holding mount state 4344 * busy and thus holding up the start of recovery. 4345 */ 4346 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); 4347 req->r_rchain.tqe_next = NFSREQNOLIST; 4348 if (req->r_flags & R_RESENDQ) 4349 req->r_flags &= ~R_RESENDQ; 4350 lck_mtx_unlock(&nmp->nm_lock); 4351 break; 4352 } 4353 lck_mtx_unlock(&nmp->nm_lock); 4354 } 4355 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) 4356 break; 4357 msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait", &ts); 4358 } 4359 lck_mtx_unlock(&req->r_mtx); 4360 4361 if (!error) { 4362 nfs_request_wait(req); 4363 error = nfs_request_finish(req, nmrepp, status); 4364 } 4365 4366 while (!error && (req->r_flags & R_RESTART)) { 4367 if (asyncio && req->r_resendtime) { /* send later */ 4368 lck_mtx_lock(&req->r_mtx); 4369 nfs_asyncio_resend(req); 4370 lck_mtx_unlock(&req->r_mtx); 4371 return (EINPROGRESS); 4372 } 4373 req->r_error = 0; 4374 req->r_flags &= ~R_RESTART; 4375 if ((error = nfs_request_add_header(req))) 4376 break; 4377 if ((error = nfs_request_send(req, !asyncio))) 4378 break; 4379 if (asyncio) 4380 return (EINPROGRESS); 4381 nfs_request_wait(req); 4382 if ((error = nfs_request_finish(req, nmrepp, status))) 4383 break; 4384 } 4385 if (xidp) 4386 *xidp = req->r_xid; 4387 4388 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error); 4389 nfs_request_rele(req); 4390 return (error); 4391} 4392 4393/* 4394 * Cancel a pending asynchronous NFS request. 4395 */ 4396void 4397nfs_request_async_cancel(struct nfsreq *req) 4398{ 4399 nfs_reqdequeue(req); 4400 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E); 4401 nfs_request_rele(req); 4402} 4403 4404/* 4405 * Flag a request as being terminated. 4406 */ 4407void 4408nfs_softterm(struct nfsreq *req) 4409{ 4410 struct nfsmount *nmp = req->r_nmp; 4411 req->r_flags |= R_SOFTTERM; 4412 req->r_error = ETIMEDOUT; 4413 if (!(req->r_flags & R_CWND) || !nmp) 4414 return; 4415 /* update congestion window */ 4416 req->r_flags &= ~R_CWND; 4417 lck_mtx_lock(&nmp->nm_lock); 4418 FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd); 4419 nmp->nm_sent -= NFS_CWNDSCALE; 4420 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { 4421 /* congestion window is open, poke the cwnd queue */ 4422 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); 4423 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); 4424 req2->r_cchain.tqe_next = NFSREQNOLIST; 4425 wakeup(req2); 4426 } 4427 lck_mtx_unlock(&nmp->nm_lock); 4428} 4429 4430/* 4431 * Ensure req isn't in use by the timer, then dequeue it. 4432 */ 4433void 4434nfs_reqdequeue(struct nfsreq *req) 4435{ 4436 lck_mtx_lock(nfs_request_mutex); 4437 while (req->r_lflags & RL_BUSY) { 4438 req->r_lflags |= RL_WAITING; 4439 msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL); 4440 } 4441 if (req->r_lflags & RL_QUEUED) { 4442 TAILQ_REMOVE(&nfs_reqq, req, r_chain); 4443 req->r_lflags &= ~RL_QUEUED; 4444 } 4445 lck_mtx_unlock(nfs_request_mutex); 4446} 4447 4448/* 4449 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not 4450 * free()'d out from under it. 4451 */ 4452void 4453nfs_reqbusy(struct nfsreq *req) 4454{ 4455 if (req->r_lflags & RL_BUSY) 4456 panic("req locked"); 4457 req->r_lflags |= RL_BUSY; 4458} 4459 4460/* 4461 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied. 4462 */ 4463struct nfsreq * 4464nfs_reqnext(struct nfsreq *req) 4465{ 4466 struct nfsreq * nextreq; 4467 4468 if (req == NULL) 4469 return (NULL); 4470 /* 4471 * We need to get and busy the next req before signalling the 4472 * current one, otherwise wakeup() may block us and we'll race to 4473 * grab the next req. 4474 */ 4475 nextreq = TAILQ_NEXT(req, r_chain); 4476 if (nextreq != NULL) 4477 nfs_reqbusy(nextreq); 4478 /* unbusy and signal. */ 4479 req->r_lflags &= ~RL_BUSY; 4480 if (req->r_lflags & RL_WAITING) { 4481 req->r_lflags &= ~RL_WAITING; 4482 wakeup(&req->r_lflags); 4483 } 4484 return (nextreq); 4485} 4486 4487/* 4488 * NFS request queue timer routine 4489 * 4490 * Scan the NFS request queue for any requests that have timed out. 4491 * 4492 * Alert the system of unresponsive servers. 4493 * Mark expired requests on soft mounts as terminated. 4494 * For UDP, mark/signal requests for retransmission. 4495 */ 4496void 4497nfs_request_timer(__unused void *param0, __unused void *param1) 4498{ 4499 struct nfsreq *req; 4500 struct nfsmount *nmp; 4501 int timeo, maxtime, finish_asyncio, error; 4502 struct timeval now; 4503 TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue; 4504 4505 lck_mtx_lock(nfs_request_mutex); 4506 req = TAILQ_FIRST(&nfs_reqq); 4507 if (req == NULL) { /* no requests - turn timer off */ 4508 nfs_request_timer_on = 0; 4509 lck_mtx_unlock(nfs_request_mutex); 4510 return; 4511 } 4512 4513 nfs_reqbusy(req); 4514 TAILQ_INIT(&nfs_mount_poke_queue); 4515 4516 microuptime(&now); 4517 for ( ; req != NULL ; req = nfs_reqnext(req)) { 4518 nmp = req->r_nmp; 4519 if (!nmp) /* unmounted */ 4520 continue; 4521 if (req->r_error || req->r_nmrep.nmc_mhead) 4522 continue; 4523 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) { 4524 if (req->r_callback.rcb_func != NULL) { 4525 /* async I/O RPC needs to be finished */ 4526 lck_mtx_lock(&req->r_mtx); 4527 req->r_error = error; 4528 finish_asyncio = !(req->r_flags & R_WAITSENT); 4529 wakeup(req); 4530 lck_mtx_unlock(&req->r_mtx); 4531 if (finish_asyncio) 4532 nfs_asyncio_finish(req); 4533 } 4534 continue; 4535 } 4536 4537 lck_mtx_lock(&req->r_mtx); 4538 4539 if (nmp->nm_tprintf_initial_delay && 4540 ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) && 4541 ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) { 4542 req->r_lastmsg = now.tv_sec; 4543 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO, 4544 "not responding"); 4545 req->r_flags |= R_TPRINTFMSG; 4546 lck_mtx_lock(&nmp->nm_lock); 4547 if (!(nmp->nm_state & NFSSTA_MOUNTED)) { 4548 lck_mtx_unlock(&nmp->nm_lock); 4549 /* we're not yet completely mounted and */ 4550 /* we can't complete an RPC, so we fail */ 4551 OSAddAtomic64(1, &nfsstats.rpctimeouts); 4552 nfs_softterm(req); 4553 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT)); 4554 wakeup(req); 4555 lck_mtx_unlock(&req->r_mtx); 4556 if (finish_asyncio) 4557 nfs_asyncio_finish(req); 4558 continue; 4559 } 4560 lck_mtx_unlock(&nmp->nm_lock); 4561 } 4562 4563 /* 4564 * Put a reasonable limit on the maximum timeout, 4565 * and reduce that limit when soft mounts get timeouts or are in reconnect. 4566 */ 4567 if (!NMFLAG(nmp, SOFT) && !nfs_can_squish(nmp)) 4568 maxtime = NFS_MAXTIMEO; 4569 else if ((req->r_flags & (R_SETUP|R_RECOVER)) || 4570 ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8))) 4571 maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2; 4572 else 4573 maxtime = NFS_MINTIMEO/4; 4574 4575 /* 4576 * Check for request timeout. 4577 */ 4578 if (req->r_rtt >= 0) { 4579 req->r_rtt++; 4580 lck_mtx_lock(&nmp->nm_lock); 4581 if (req->r_flags & R_RESENDERR) { 4582 /* with resend errors, retry every few seconds */ 4583 timeo = 4*hz; 4584 } else { 4585 if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL) 4586 timeo = NFS_MINIDEMTIMEO; // gss context setup 4587 else if (NMFLAG(nmp, DUMBTIMER)) 4588 timeo = nmp->nm_timeo; 4589 else 4590 timeo = NFS_RTO(nmp, proct[req->r_procnum]); 4591 4592 /* ensure 62.5 ms floor */ 4593 while (16 * timeo < hz) 4594 timeo *= 2; 4595 if (nmp->nm_timeouts > 0) 4596 timeo *= nfs_backoff[nmp->nm_timeouts - 1]; 4597 } 4598 /* limit timeout to max */ 4599 if (timeo > maxtime) 4600 timeo = maxtime; 4601 if (req->r_rtt <= timeo) { 4602 lck_mtx_unlock(&nmp->nm_lock); 4603 lck_mtx_unlock(&req->r_mtx); 4604 continue; 4605 } 4606 /* The request has timed out */ 4607 NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n", 4608 req->r_procnum, proct[req->r_procnum], 4609 req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts, 4610 (now.tv_sec - req->r_start)*NFS_HZ, maxtime)); 4611 if (nmp->nm_timeouts < 8) 4612 nmp->nm_timeouts++; 4613 nfs_mount_check_dead_timeout(nmp); 4614 /* if it's been a few seconds, try poking the socket */ 4615 if ((nmp->nm_sotype == SOCK_STREAM) && 4616 ((now.tv_sec - req->r_start) >= 3) && 4617 !(nmp->nm_sockflags & (NMSOCK_POKE|NMSOCK_UNMOUNT)) && 4618 (nmp->nm_sockflags & NMSOCK_READY)) { 4619 nmp->nm_sockflags |= NMSOCK_POKE; 4620 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq); 4621 } 4622 lck_mtx_unlock(&nmp->nm_lock); 4623 } 4624 4625 /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */ 4626 if ((NMFLAG(nmp, SOFT) || (req->r_flags & (R_SETUP|R_RECOVER))) && 4627 ((req->r_rexmit >= req->r_retry) || /* too many */ 4628 ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */ 4629 OSAddAtomic64(1, &nfsstats.rpctimeouts); 4630 lck_mtx_lock(&nmp->nm_lock); 4631 if (!(nmp->nm_state & NFSSTA_TIMEO)) { 4632 lck_mtx_unlock(&nmp->nm_lock); 4633 /* make sure we note the unresponsive server */ 4634 /* (maxtime may be less than tprintf delay) */ 4635 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO, 4636 "not responding"); 4637 req->r_lastmsg = now.tv_sec; 4638 req->r_flags |= R_TPRINTFMSG; 4639 } else { 4640 lck_mtx_unlock(&nmp->nm_lock); 4641 } 4642 if (req->r_flags & R_NOINTR) { 4643 /* don't terminate nointr requests on timeout */ 4644 lck_mtx_unlock(&req->r_mtx); 4645 continue; 4646 } 4647 NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n", 4648 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, 4649 now.tv_sec - req->r_start)); 4650 nfs_softterm(req); 4651 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT)); 4652 wakeup(req); 4653 lck_mtx_unlock(&req->r_mtx); 4654 if (finish_asyncio) 4655 nfs_asyncio_finish(req); 4656 continue; 4657 } 4658 4659 /* for TCP, only resend if explicitly requested */ 4660 if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) { 4661 if (++req->r_rexmit > NFS_MAXREXMIT) 4662 req->r_rexmit = NFS_MAXREXMIT; 4663 req->r_rtt = 0; 4664 lck_mtx_unlock(&req->r_mtx); 4665 continue; 4666 } 4667 4668 /* 4669 * The request needs to be (re)sent. Kick the requester to resend it. 4670 * (unless it's already marked as needing a resend) 4671 */ 4672 if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) { 4673 lck_mtx_unlock(&req->r_mtx); 4674 continue; 4675 } 4676 NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n", 4677 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt)); 4678 req->r_flags |= R_MUSTRESEND; 4679 req->r_rtt = -1; 4680 wakeup(req); 4681 if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) 4682 nfs_asyncio_resend(req); 4683 lck_mtx_unlock(&req->r_mtx); 4684 } 4685 4686 lck_mtx_unlock(nfs_request_mutex); 4687 4688 /* poke any sockets */ 4689 while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) { 4690 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq); 4691 nfs_sock_poke(nmp); 4692 lck_mtx_lock(&nmp->nm_lock); 4693 nmp->nm_sockflags &= ~NMSOCK_POKE; 4694 wakeup(&nmp->nm_sockflags); 4695 lck_mtx_unlock(&nmp->nm_lock); 4696 } 4697 4698 nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY); 4699} 4700 4701/* 4702 * check a thread's proc for the "noremotehang" flag. 4703 */ 4704int 4705nfs_noremotehang(thread_t thd) 4706{ 4707 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL; 4708 return (p && proc_noremotehang(p)); 4709} 4710 4711/* 4712 * Test for a termination condition pending on the process. 4713 * This is used to determine if we need to bail on a mount. 4714 * ETIMEDOUT is returned if there has been a soft timeout. 4715 * EINTR is returned if there is a signal pending that is not being ignored 4716 * and the mount is interruptable, or if we are a thread that is in the process 4717 * of cancellation (also SIGKILL posted). 4718 */ 4719extern int sigprop[NSIG+1]; 4720int 4721nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked) 4722{ 4723 proc_t p; 4724 int error = 0; 4725 4726 if (nmp == NULL) 4727 return (ENXIO); 4728 4729 if (req && (req->r_flags & R_SOFTTERM)) 4730 return (ETIMEDOUT); /* request has been terminated. */ 4731 if (req && (req->r_flags & R_NOINTR)) 4732 thd = NULL; /* don't check for signal on R_NOINTR */ 4733 4734 if (!nmplocked) 4735 lck_mtx_lock(&nmp->nm_lock); 4736 if (nmp->nm_state & NFSSTA_FORCE) { 4737 /* If a force unmount is in progress then fail. */ 4738 error = EIO; 4739 } else if (nmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) { 4740 /* Someone is unmounting us, go soft and mark it. */ 4741 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT); 4742 nmp->nm_state |= NFSSTA_FORCE; 4743 } 4744 4745 /* Check if the mount is marked dead. */ 4746 if (!error && (nmp->nm_state & NFSSTA_DEAD)) 4747 error = ENXIO; 4748 4749 /* 4750 * If the mount is hung and we've requested not to hang 4751 * on remote filesystems, then bail now. 4752 */ 4753 if (!error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd)) 4754 error = EIO; 4755 4756 if (!nmplocked) 4757 lck_mtx_unlock(&nmp->nm_lock); 4758 if (error) 4759 return (error); 4760 4761 /* may not have a thread for async I/O */ 4762 if (thd == NULL) 4763 return (0); 4764 4765 /* 4766 * Check if the process is aborted, but don't interrupt if we 4767 * were killed by a signal and this is the exiting thread which 4768 * is attempting to dump core. 4769 */ 4770 if (((p = current_proc()) != kernproc) && current_thread_aborted() && 4771 (!(p->p_acflag & AXSIG) || (p->exit_thread != current_thread()) || 4772 (p->p_sigacts == NULL) || 4773 (p->p_sigacts->ps_sig < 1) || (p->p_sigacts->ps_sig > NSIG) || 4774 !(sigprop[p->p_sigacts->ps_sig] & SA_CORE))) 4775 return (EINTR); 4776 4777 /* mask off thread and process blocked signals. */ 4778 if (NMFLAG(nmp, INTR) && ((p = get_bsdthreadtask_info(thd))) && 4779 proc_pendingsignals(p, NFSINT_SIGMASK)) 4780 return (EINTR); 4781 return (0); 4782} 4783 4784/* 4785 * Lock a socket against others. 4786 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 4787 * and also to avoid race conditions between the processes with nfs requests 4788 * in progress when a reconnect is necessary. 4789 */ 4790int 4791nfs_sndlock(struct nfsreq *req) 4792{ 4793 struct nfsmount *nmp = req->r_nmp; 4794 int *statep; 4795 int error = 0, slpflag = 0; 4796 struct timespec ts = { 0, 0 }; 4797 4798 if (nmp == NULL) 4799 return (ENXIO); 4800 4801 lck_mtx_lock(&nmp->nm_lock); 4802 statep = &nmp->nm_state; 4803 4804 if (NMFLAG(nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) 4805 slpflag = PCATCH; 4806 while (*statep & NFSSTA_SNDLOCK) { 4807 if ((error = nfs_sigintr(nmp, req, req->r_thread, 1))) 4808 break; 4809 *statep |= NFSSTA_WANTSND; 4810 if (nfs_noremotehang(req->r_thread)) 4811 ts.tv_sec = 1; 4812 msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck", &ts); 4813 if (slpflag == PCATCH) { 4814 slpflag = 0; 4815 ts.tv_sec = 2; 4816 } 4817 } 4818 if (!error) 4819 *statep |= NFSSTA_SNDLOCK; 4820 lck_mtx_unlock(&nmp->nm_lock); 4821 return (error); 4822} 4823 4824/* 4825 * Unlock the stream socket for others. 4826 */ 4827void 4828nfs_sndunlock(struct nfsreq *req) 4829{ 4830 struct nfsmount *nmp = req->r_nmp; 4831 int *statep, wake = 0; 4832 4833 if (nmp == NULL) 4834 return; 4835 lck_mtx_lock(&nmp->nm_lock); 4836 statep = &nmp->nm_state; 4837 if ((*statep & NFSSTA_SNDLOCK) == 0) 4838 panic("nfs sndunlock"); 4839 *statep &= ~(NFSSTA_SNDLOCK|NFSSTA_SENDING); 4840 if (*statep & NFSSTA_WANTSND) { 4841 *statep &= ~NFSSTA_WANTSND; 4842 wake = 1; 4843 } 4844 lck_mtx_unlock(&nmp->nm_lock); 4845 if (wake) 4846 wakeup(statep); 4847} 4848 4849int 4850nfs_aux_request( 4851 struct nfsmount *nmp, 4852 thread_t thd, 4853 struct sockaddr *saddr, 4854 socket_t so, 4855 int sotype, 4856 mbuf_t mreq, 4857 uint32_t xid, 4858 int bindresv, 4859 int timeo, 4860 struct nfsm_chain *nmrep) 4861{ 4862 int error = 0, on = 1, try, sendat = 2, soproto, recv, optlen, restoreto = 0; 4863 socket_t newso = NULL; 4864 struct sockaddr_storage ss; 4865 struct timeval orig_rcvto, orig_sndto, tv = { 1, 0 }; 4866 mbuf_t m, mrep = NULL; 4867 struct msghdr msg; 4868 uint32_t rxid = 0, reply = 0, reply_status, rejected_status; 4869 uint32_t verf_type, verf_len, accepted_status; 4870 size_t readlen, sentlen; 4871 struct nfs_rpc_record_state nrrs; 4872 4873 if (!so) { 4874 /* create socket and set options */ 4875 soproto = (sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP; 4876 if ((error = sock_socket(saddr->sa_family, sotype, soproto, NULL, NULL, &newso))) 4877 goto nfsmout; 4878 4879 if (bindresv) { 4880 int level = (saddr->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6; 4881 int optname = (saddr->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE; 4882 int portrange = IP_PORTRANGE_LOW; 4883 error = sock_setsockopt(newso, level, optname, &portrange, sizeof(portrange)); 4884 nfsmout_if(error); 4885 ss.ss_len = saddr->sa_len; 4886 ss.ss_family = saddr->sa_family; 4887 if (ss.ss_family == AF_INET) { 4888 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY; 4889 ((struct sockaddr_in*)&ss)->sin_port = htons(0); 4890 } else if (ss.ss_family == AF_INET6) { 4891 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any; 4892 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0); 4893 } else { 4894 error = EINVAL; 4895 } 4896 if (!error) 4897 error = sock_bind(newso, (struct sockaddr *)&ss); 4898 nfsmout_if(error); 4899 } 4900 4901 if (sotype == SOCK_STREAM) { 4902 on = 4; /* don't wait too long for the socket to connect */ 4903 sock_setsockopt(newso, IPPROTO_TCP, TCP_CONNECTIONTIMEOUT, &on, sizeof(on)); 4904 error = sock_connect(newso, saddr, 0); 4905 nfsmout_if(error); 4906 } 4907 if (((error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) || 4908 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)))) || 4909 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on))))) 4910 goto nfsmout; 4911 so = newso; 4912 } else { 4913 /* make sure socket is using a one second timeout in this function */ 4914 optlen = sizeof(orig_rcvto); 4915 error = sock_getsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, &optlen); 4916 if (!error) { 4917 optlen = sizeof(orig_sndto); 4918 error = sock_getsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, &optlen); 4919 } 4920 if (!error) { 4921 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); 4922 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); 4923 restoreto = 1; 4924 } 4925 } 4926 4927 if (sotype == SOCK_STREAM) { 4928 sendat = 0; /* we only resend the request for UDP */ 4929 nfs_rpc_record_state_init(&nrrs); 4930 } 4931 4932 for (try=0; try < timeo; try++) { 4933 if ((error = nfs_sigintr(nmp, NULL, !try ? NULL : thd, 0))) 4934 break; 4935 if (!try || (try == sendat)) { 4936 /* send the request (resending periodically for UDP) */ 4937 if ((error = mbuf_copym(mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m))) 4938 goto nfsmout; 4939 bzero(&msg, sizeof(msg)); 4940 if ((sotype == SOCK_DGRAM) && !sock_isconnected(so)) { 4941 msg.msg_name = saddr; 4942 msg.msg_namelen = saddr->sa_len; 4943 } 4944 if ((error = sock_sendmbuf(so, &msg, m, 0, &sentlen))) 4945 goto nfsmout; 4946 sendat *= 2; 4947 if (sendat > 30) 4948 sendat = 30; 4949 } 4950 /* wait for the response */ 4951 if (sotype == SOCK_STREAM) { 4952 /* try to read (more of) record */ 4953 error = nfs_rpc_record_read(so, &nrrs, 0, &recv, &mrep); 4954 /* if we don't have the whole record yet, we'll keep trying */ 4955 } else { 4956 readlen = 1<<18; 4957 bzero(&msg, sizeof(msg)); 4958 error = sock_receivembuf(so, &msg, &mrep, 0, &readlen); 4959 } 4960 if (error == EWOULDBLOCK) 4961 continue; 4962 nfsmout_if(error); 4963 /* parse the response */ 4964 nfsm_chain_dissect_init(error, nmrep, mrep); 4965 nfsm_chain_get_32(error, nmrep, rxid); 4966 nfsm_chain_get_32(error, nmrep, reply); 4967 nfsmout_if(error); 4968 if ((rxid != xid) || (reply != RPC_REPLY)) 4969 error = EBADRPC; 4970 nfsm_chain_get_32(error, nmrep, reply_status); 4971 nfsmout_if(error); 4972 if (reply_status == RPC_MSGDENIED) { 4973 nfsm_chain_get_32(error, nmrep, rejected_status); 4974 nfsmout_if(error); 4975 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES; 4976 goto nfsmout; 4977 } 4978 nfsm_chain_get_32(error, nmrep, verf_type); /* verifier flavor */ 4979 nfsm_chain_get_32(error, nmrep, verf_len); /* verifier length */ 4980 nfsmout_if(error); 4981 if (verf_len) 4982 nfsm_chain_adv(error, nmrep, nfsm_rndup(verf_len)); 4983 nfsm_chain_get_32(error, nmrep, accepted_status); 4984 nfsmout_if(error); 4985 switch (accepted_status) { 4986 case RPC_SUCCESS: 4987 error = 0; 4988 break; 4989 case RPC_PROGUNAVAIL: 4990 error = EPROGUNAVAIL; 4991 break; 4992 case RPC_PROGMISMATCH: 4993 error = EPROGMISMATCH; 4994 break; 4995 case RPC_PROCUNAVAIL: 4996 error = EPROCUNAVAIL; 4997 break; 4998 case RPC_GARBAGE: 4999 error = EBADRPC; 5000 break; 5001 case RPC_SYSTEM_ERR: 5002 default: 5003 error = EIO; 5004 break; 5005 } 5006 break; 5007 } 5008nfsmout: 5009 if (restoreto) { 5010 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, sizeof(tv)); 5011 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, sizeof(tv)); 5012 } 5013 if (newso) { 5014 sock_shutdown(newso, SHUT_RDWR); 5015 sock_close(newso); 5016 } 5017 mbuf_freem(mreq); 5018 return (error); 5019} 5020 5021int 5022nfs_portmap_lookup( 5023 struct nfsmount *nmp, 5024 vfs_context_t ctx, 5025 struct sockaddr *sa, 5026 socket_t so, 5027 uint32_t protocol, 5028 uint32_t vers, 5029 uint32_t ipproto, 5030 int timeo) 5031{ 5032 thread_t thd = vfs_context_thread(ctx); 5033 kauth_cred_t cred = vfs_context_ucred(ctx); 5034 struct sockaddr_storage ss; 5035 struct sockaddr *saddr = (struct sockaddr*)&ss; 5036 struct nfsm_chain nmreq, nmrep; 5037 mbuf_t mreq; 5038 int error = 0, ip, pmprog, pmvers, pmproc, ualen = 0; 5039 uint32_t port; 5040 uint64_t xid = 0; 5041 char uaddr[MAX_IPv6_STR_LEN+16]; 5042 5043 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len)); 5044 if (saddr->sa_family == AF_INET) { 5045 ip = 4; 5046 pmprog = PMAPPROG; 5047 pmvers = PMAPVERS; 5048 pmproc = PMAPPROC_GETPORT; 5049 } else if (saddr->sa_family == AF_INET6) { 5050 ip = 6; 5051 pmprog = RPCBPROG; 5052 pmvers = RPCBVERS4; 5053 pmproc = RPCBPROC_GETVERSADDR; 5054 } else { 5055 return (EINVAL); 5056 } 5057 nfsm_chain_null(&nmreq); 5058 nfsm_chain_null(&nmrep); 5059 5060tryagain: 5061 /* send portmapper request to get port/uaddr */ 5062 if (ip == 4) 5063 ((struct sockaddr_in*)saddr)->sin_port = htons(PMAPPORT); 5064 else 5065 ((struct sockaddr_in6*)saddr)->sin6_port = htons(PMAPPORT); 5066 nfsm_chain_build_alloc_init(error, &nmreq, 8*NFSX_UNSIGNED); 5067 nfsm_chain_add_32(error, &nmreq, protocol); 5068 nfsm_chain_add_32(error, &nmreq, vers); 5069 if (ip == 4) { 5070 nfsm_chain_add_32(error, &nmreq, ipproto); 5071 nfsm_chain_add_32(error, &nmreq, 0); 5072 } else { 5073 if (ipproto == IPPROTO_TCP) 5074 nfsm_chain_add_string(error, &nmreq, "tcp6", 4); 5075 else 5076 nfsm_chain_add_string(error, &nmreq, "udp6", 4); 5077 nfsm_chain_add_string(error, &nmreq, "", 0); /* uaddr */ 5078 nfsm_chain_add_string(error, &nmreq, "", 0); /* owner */ 5079 } 5080 nfsm_chain_build_done(error, &nmreq); 5081 nfsmout_if(error); 5082 error = nfsm_rpchead2(nmp, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM, 5083 pmprog, pmvers, pmproc, RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, 5084 &xid, &mreq); 5085 nfsmout_if(error); 5086 nmreq.nmc_mhead = NULL; 5087 error = nfs_aux_request(nmp, thd, saddr, so, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM, 5088 mreq, R_XID32(xid), 0, timeo, &nmrep); 5089 5090 /* grab port from portmap response */ 5091 if (ip == 4) { 5092 nfsm_chain_get_32(error, &nmrep, port); 5093 if (!error) 5094 ((struct sockaddr_in*)sa)->sin_port = htons(port); 5095 } else { 5096 /* get uaddr string and convert to sockaddr */ 5097 nfsm_chain_get_32(error, &nmrep, ualen); 5098 if (!error) { 5099 if (ualen > ((int)sizeof(uaddr)-1)) 5100 error = EIO; 5101 if (ualen < 1) { 5102 /* program is not available, just return a zero port */ 5103 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len)); 5104 ((struct sockaddr_in6*)saddr)->sin6_port = htons(0); 5105 } else { 5106 nfsm_chain_get_opaque(error, &nmrep, ualen, uaddr); 5107 if (!error) { 5108 uaddr[ualen] = '\0'; 5109 if (!nfs_uaddr2sockaddr(uaddr, saddr)) 5110 error = EIO; 5111 } 5112 } 5113 } 5114 if ((error == EPROGMISMATCH) || (error == EPROCUNAVAIL) || (error == EIO) || (error == EBADRPC)) { 5115 /* remote doesn't support rpcbind version or proc (or we couldn't parse uaddr) */ 5116 if (pmvers == RPCBVERS4) { 5117 /* fall back to v3 and GETADDR */ 5118 pmvers = RPCBVERS3; 5119 pmproc = RPCBPROC_GETADDR; 5120 nfsm_chain_cleanup(&nmreq); 5121 nfsm_chain_cleanup(&nmrep); 5122 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len)); 5123 xid = 0; 5124 error = 0; 5125 goto tryagain; 5126 } 5127 } 5128 if (!error) 5129 bcopy(saddr, sa, min(saddr->sa_len, sa->sa_len)); 5130 } 5131nfsmout: 5132 nfsm_chain_cleanup(&nmreq); 5133 nfsm_chain_cleanup(&nmrep); 5134 return (error); 5135} 5136 5137int 5138nfs_msg(thread_t thd, 5139 const char *server, 5140 const char *msg, 5141 int error) 5142{ 5143 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL; 5144 tpr_t tpr; 5145 5146 if (p) 5147 tpr = tprintf_open(p); 5148 else 5149 tpr = NULL; 5150 if (error) 5151 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error); 5152 else 5153 tprintf(tpr, "nfs server %s: %s\n", server, msg); 5154 tprintf_close(tpr); 5155 return (0); 5156} 5157 5158#define NFS_SQUISH_MOBILE_ONLY 0x0001 /* Squish mounts only on mobile machines */ 5159#define NFS_SQUISH_AUTOMOUNTED_ONLY 0x0002 /* Squish mounts only if the are automounted */ 5160#define NFS_SQUISH_SOFT 0x0004 /* Treat all soft mounts as though they were on a mobile machine */ 5161#define NFS_SQUISH_QUICK 0x0008 /* Try to squish mounts more quickly. */ 5162#define NFS_SQUISH_SHUTDOWN 0x1000 /* Squish all mounts on shutdown. Currently not implemented */ 5163 5164uint32_t nfs_squishy_flags = NFS_SQUISH_MOBILE_ONLY | NFS_SQUISH_AUTOMOUNTED_ONLY | NFS_SQUISH_QUICK; 5165int32_t nfs_is_mobile; 5166 5167#define NFS_SQUISHY_DEADTIMEOUT 8 /* Dead time out for squishy mounts */ 5168#define NFS_SQUISHY_QUICKTIMEOUT 4 /* Quicker dead time out when nfs_squish_flags NFS_SQUISH_QUICK bit is set*/ 5169 5170/* 5171 * Could this mount be squished? 5172 */ 5173int 5174nfs_can_squish(struct nfsmount *nmp) 5175{ 5176 uint64_t flags = vfs_flags(nmp->nm_mountp); 5177 int softsquish = ((nfs_squishy_flags & NFS_SQUISH_SOFT) & NMFLAG(nmp, SOFT)); 5178 5179 if (!softsquish && (nfs_squishy_flags & NFS_SQUISH_MOBILE_ONLY) && nfs_is_mobile == 0) 5180 return (0); 5181 5182 if ((nfs_squishy_flags & NFS_SQUISH_AUTOMOUNTED_ONLY) && (flags & MNT_AUTOMOUNTED) == 0) 5183 return (0); 5184 5185 return (1); 5186} 5187 5188/* 5189 * NFS mounts default to "rw,hard" - but frequently on mobile clients 5190 * the mount may become "not responding". It's desirable to be able 5191 * to unmount these dead mounts, but only if there is no risk of 5192 * losing data or crashing applications. A "squishy" NFS mount is one 5193 * that can be force unmounted with little risk of harm. 5194 * 5195 * nfs_is_squishy checks if a mount is in a squishy state. A mount is 5196 * in a squishy state iff it is allowed to be squishy and there are no 5197 * dirty pages and there are no mmapped files and there are no files 5198 * open for write. Mounts are allowed to be squishy is controlled by 5199 * the settings of the nfs_squishy_flags and its mobility state. These 5200 * flags can be set by sysctls. 5201 * 5202 * If nfs_is_squishy determines that we are in a squishy state we will 5203 * update the current dead timeout to at least NFS_SQUISHY_DEADTIMEOUT 5204 * (or NFS_SQUISHY_QUICKTIMEOUT if NFS_SQUISH_QUICK is set) (see 5205 * above) or 1/8th of the mount's nm_deadtimeout value, otherwise we just 5206 * update the current dead timeout with the mount's nm_deadtimeout 5207 * value set at mount time. 5208 * 5209 * Assumes that nm_lock is held. 5210 * 5211 * Note this routine is racey, but its effects on setting the 5212 * dead timeout only have effects when we're in trouble and are likely 5213 * to stay that way. Since by default its only for automounted 5214 * volumes on mobile machines; this is a reasonable trade off between 5215 * data integrity and user experience. It can be disabled or set via 5216 * nfs.conf file. 5217 */ 5218 5219int 5220nfs_is_squishy(struct nfsmount *nmp) 5221{ 5222 mount_t mp = nmp->nm_mountp; 5223 int squishy = 0; 5224 int timeo = (nfs_squishy_flags & NFS_SQUISH_QUICK) ? NFS_SQUISHY_QUICKTIMEOUT : NFS_SQUISHY_DEADTIMEOUT; 5225 5226 NFS_SOCK_DBG(("nfs_is_squishy: %s: nm_curdeadtiemout = %d, nfs_is_mobile = %d\n", 5227 vfs_statfs(mp)->f_mntfromname, nmp->nm_curdeadtimeout, nfs_is_mobile)); 5228 5229 if (!nfs_can_squish(nmp)) 5230 goto out; 5231 5232 timeo = (nmp->nm_deadtimeout > timeo) ? max(nmp->nm_deadtimeout/8, timeo) : timeo; 5233 NFS_SOCK_DBG(("nfs_is_squishy: nm_writers = %d nm_mappers = %d timeo = %d\n", nmp->nm_writers, nmp->nm_mappers, timeo)); 5234 5235 if (nmp->nm_writers == 0 && nmp->nm_mappers == 0) { 5236 uint64_t flags = mp ? vfs_flags(mp) : 0; 5237 squishy = 1; 5238 5239 /* 5240 * Walk the nfs nodes and check for dirty buffers it we're not 5241 * RDONLY and we've not already been declared as squishy since 5242 * this can be a bit expensive. 5243 */ 5244 if (!(flags & MNT_RDONLY) && !(nmp->nm_state & NFSSTA_SQUISHY)) 5245 squishy = !nfs_mount_is_dirty(mp); 5246 } 5247 5248out: 5249 if (squishy) 5250 nmp->nm_state |= NFSSTA_SQUISHY; 5251 else 5252 nmp->nm_state &= ~NFSSTA_SQUISHY; 5253 5254 nmp->nm_curdeadtimeout = squishy ? timeo : nmp->nm_deadtimeout; 5255 5256 NFS_SOCK_DBG(("nfs_is_squishy: nm_curdeadtimeout = %d\n", nmp->nm_curdeadtimeout)); 5257 5258 return (squishy); 5259} 5260 5261/* 5262 * On a send operation, if we can't reach the server and we've got only one server to talk to 5263 * and NFS_SQUISH_QUICK flag is set and we are in a squishy state then mark the mount as dead 5264 * and ask to be forcibly unmounted. Return 1 if we're dead and 0 otherwise. 5265 */ 5266static int 5267nfs_is_dead_lock(int error, struct nfsmount *nmp) 5268{ 5269 if (nmp->nm_state & NFSSTA_DEAD) 5270 return (1); 5271 5272 if ((error != ENETUNREACH && error != EHOSTUNREACH) || 5273 !(nmp->nm_locations.nl_numlocs == 1 && nmp->nm_locations.nl_locations[0]->nl_servcount == 1)) 5274 return (0); 5275 if ((nfs_squishy_flags & NFS_SQUISH_QUICK) && nfs_is_squishy(nmp)) { 5276 printf("nfs_is_dead: nfs server %s: unreachable. Squished dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); 5277 nmp->nm_state |= NFSSTA_DEAD; 5278 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0); 5279 return (1); 5280 } 5281 return (0); 5282} 5283 5284int 5285nfs_is_dead(int error, struct nfsmount *nmp) 5286{ 5287 int is_dead; 5288 5289 lck_mtx_lock(&nmp->nm_lock); 5290 is_dead = nfs_is_dead_lock(error, nmp); 5291 lck_mtx_unlock(&nmp->nm_lock); 5292 5293 return (is_dead); 5294} 5295 5296void 5297nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg) 5298{ 5299 int timeoutmask, wasunresponsive, unresponsive, softnobrowse; 5300 uint32_t do_vfs_signal; 5301 struct timeval now; 5302 5303 if (nmp == NULL) 5304 return; 5305 5306 lck_mtx_lock(&nmp->nm_lock); 5307 5308 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; 5309 if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */ 5310 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO; 5311 wasunresponsive = (nmp->nm_state & timeoutmask); 5312 5313 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */ 5314 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE)); 5315 5316 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) 5317 nmp->nm_state |= NFSSTA_TIMEO; 5318 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) 5319 nmp->nm_state |= NFSSTA_LOCKTIMEO; 5320 if ((flags & NFSSTA_JUKEBOXTIMEO) && !(nmp->nm_state & NFSSTA_JUKEBOXTIMEO)) 5321 nmp->nm_state |= NFSSTA_JUKEBOXTIMEO; 5322 5323 unresponsive = (nmp->nm_state & timeoutmask); 5324 5325 nfs_is_squishy(nmp); 5326 5327 if (unresponsive && (nmp->nm_curdeadtimeout > 0)) { 5328 microuptime(&now); 5329 if (!wasunresponsive) { 5330 nmp->nm_deadto_start = now.tv_sec; 5331 nfs_mount_sock_thread_wake(nmp); 5332 } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_curdeadtimeout) { 5333 if (!(nmp->nm_state & NFSSTA_DEAD)) 5334 printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, 5335 (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : ""); 5336 nmp->nm_state |= NFSSTA_DEAD; 5337 } 5338 } 5339 lck_mtx_unlock(&nmp->nm_lock); 5340 5341 if (nmp->nm_state & NFSSTA_DEAD) 5342 do_vfs_signal = VQ_DEAD; 5343 else if (softnobrowse || wasunresponsive || !unresponsive) 5344 do_vfs_signal = 0; 5345 else 5346 do_vfs_signal = VQ_NOTRESP; 5347 if (do_vfs_signal) 5348 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, do_vfs_signal, 0); 5349 5350 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error); 5351} 5352 5353void 5354nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg) 5355{ 5356 int timeoutmask, wasunresponsive, unresponsive, softnobrowse; 5357 int do_vfs_signal; 5358 5359 if (nmp == NULL) 5360 return; 5361 5362 if (msg) 5363 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0); 5364 5365 lck_mtx_lock(&nmp->nm_lock); 5366 5367 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; 5368 if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */ 5369 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO; 5370 wasunresponsive = (nmp->nm_state & timeoutmask); 5371 5372 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */ 5373 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE)); 5374 5375 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) 5376 nmp->nm_state &= ~NFSSTA_TIMEO; 5377 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) 5378 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 5379 if ((flags & NFSSTA_JUKEBOXTIMEO) && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO)) 5380 nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO; 5381 5382 unresponsive = (nmp->nm_state & timeoutmask); 5383 5384 nmp->nm_deadto_start = 0; 5385 nmp->nm_curdeadtimeout = nmp->nm_deadtimeout; 5386 nmp->nm_state &= ~NFSSTA_SQUISHY; 5387 lck_mtx_unlock(&nmp->nm_lock); 5388 5389 if (softnobrowse) 5390 do_vfs_signal = 0; 5391 else 5392 do_vfs_signal = (wasunresponsive && !unresponsive); 5393 if (do_vfs_signal) 5394 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1); 5395} 5396 5397 5398#endif /* NFSCLIENT */ 5399 5400#if NFSSERVER 5401 5402/* 5403 * Generate the rpc reply header 5404 * siz arg. is used to decide if adding a cluster is worthwhile 5405 */ 5406int 5407nfsrv_rephead( 5408 struct nfsrv_descript *nd, 5409 __unused struct nfsrv_sock *slp, 5410 struct nfsm_chain *nmrepp, 5411 size_t siz) 5412{ 5413 mbuf_t mrep; 5414 u_int32_t *tl; 5415 struct nfsm_chain nmrep; 5416 int err, error; 5417 5418 err = nd->nd_repstat; 5419 if (err && (nd->nd_vers == NFS_VER2)) 5420 siz = 0; 5421 5422 /* 5423 * If this is a big reply, use a cluster else 5424 * try and leave leading space for the lower level headers. 5425 */ 5426 siz += RPC_REPLYSIZ; 5427 if (siz >= nfs_mbuf_minclsize) { 5428 error = mbuf_getpacket(MBUF_WAITOK, &mrep); 5429 } else { 5430 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep); 5431 } 5432 if (error) { 5433 /* unable to allocate packet */ 5434 /* XXX should we keep statistics for these errors? */ 5435 return (error); 5436 } 5437 if (siz < nfs_mbuf_minclsize) { 5438 /* leave space for lower level headers */ 5439 tl = mbuf_data(mrep); 5440 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */ 5441 mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED); 5442 } 5443 nfsm_chain_init(&nmrep, mrep); 5444 nfsm_chain_add_32(error, &nmrep, nd->nd_retxid); 5445 nfsm_chain_add_32(error, &nmrep, RPC_REPLY); 5446 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) { 5447 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED); 5448 if (err & NFSERR_AUTHERR) { 5449 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR); 5450 nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR)); 5451 } else { 5452 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH); 5453 nfsm_chain_add_32(error, &nmrep, RPC_VER2); 5454 nfsm_chain_add_32(error, &nmrep, RPC_VER2); 5455 } 5456 } else { 5457 /* reply status */ 5458 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED); 5459 if (nd->nd_gss_context != NULL) { 5460 /* RPCSEC_GSS verifier */ 5461 error = nfs_gss_svc_verf_put(nd, &nmrep); 5462 if (error) { 5463 nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR); 5464 goto done; 5465 } 5466 } else { 5467 /* RPCAUTH_NULL verifier */ 5468 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL); 5469 nfsm_chain_add_32(error, &nmrep, 0); 5470 } 5471 /* accepted status */ 5472 switch (err) { 5473 case EPROGUNAVAIL: 5474 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL); 5475 break; 5476 case EPROGMISMATCH: 5477 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH); 5478 /* XXX hard coded versions? */ 5479 nfsm_chain_add_32(error, &nmrep, NFS_VER2); 5480 nfsm_chain_add_32(error, &nmrep, NFS_VER3); 5481 break; 5482 case EPROCUNAVAIL: 5483 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL); 5484 break; 5485 case EBADRPC: 5486 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE); 5487 break; 5488 default: 5489 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS); 5490 if (nd->nd_gss_context != NULL) 5491 error = nfs_gss_svc_prepare_reply(nd, &nmrep); 5492 if (err != NFSERR_RETVOID) 5493 nfsm_chain_add_32(error, &nmrep, 5494 (err ? nfsrv_errmap(nd, err) : 0)); 5495 break; 5496 } 5497 } 5498 5499done: 5500 nfsm_chain_build_done(error, &nmrep); 5501 if (error) { 5502 /* error composing reply header */ 5503 /* XXX should we keep statistics for these errors? */ 5504 mbuf_freem(mrep); 5505 return (error); 5506 } 5507 5508 *nmrepp = nmrep; 5509 if ((err != 0) && (err != NFSERR_RETVOID)) 5510 OSAddAtomic64(1, &nfsstats.srvrpc_errs); 5511 return (0); 5512} 5513 5514/* 5515 * The nfs server send routine. 5516 * 5517 * - return EINTR or ERESTART if interrupted by a signal 5518 * - return EPIPE if a connection is lost for connection based sockets (TCP...) 5519 * - do any cleanup required by recoverable socket errors (???) 5520 */ 5521int 5522nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top) 5523{ 5524 int error; 5525 socket_t so = slp->ns_so; 5526 struct sockaddr *sendnam; 5527 struct msghdr msg; 5528 5529 bzero(&msg, sizeof(msg)); 5530 if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) { 5531 if ((sendnam = mbuf_data(nam))) { 5532 msg.msg_name = (caddr_t)sendnam; 5533 msg.msg_namelen = sendnam->sa_len; 5534 } 5535 } 5536 error = sock_sendmbuf(so, &msg, top, 0, NULL); 5537 if (!error) 5538 return (0); 5539 log(LOG_INFO, "nfsd send error %d\n", error); 5540 5541 if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM)) 5542 error = EPIPE; /* zap TCP sockets if they time out on send */ 5543 5544 /* Handle any recoverable (soft) socket errors here. (???) */ 5545 if (error != EINTR && error != ERESTART && error != EIO && 5546 error != EWOULDBLOCK && error != EPIPE) 5547 error = 0; 5548 5549 return (error); 5550} 5551 5552/* 5553 * Socket upcall routine for the nfsd sockets. 5554 * The caddr_t arg is a pointer to the "struct nfsrv_sock". 5555 * Essentially do as much as possible non-blocking, else punt and it will 5556 * be called with MBUF_WAITOK from an nfsd. 5557 */ 5558void 5559nfsrv_rcv(socket_t so, void *arg, int waitflag) 5560{ 5561 struct nfsrv_sock *slp = arg; 5562 5563 if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID)) 5564 return; 5565 5566 lck_rw_lock_exclusive(&slp->ns_rwlock); 5567 nfsrv_rcv_locked(so, slp, waitflag); 5568 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */ 5569} 5570void 5571nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag) 5572{ 5573 mbuf_t m, mp, mhck, m2; 5574 int ns_flag=0, error; 5575 struct msghdr msg; 5576 size_t bytes_read; 5577 5578 if ((slp->ns_flag & SLP_VALID) == 0) { 5579 if (waitflag == MBUF_DONTWAIT) 5580 lck_rw_done(&slp->ns_rwlock); 5581 return; 5582 } 5583 5584#ifdef notdef 5585 /* 5586 * Define this to test for nfsds handling this under heavy load. 5587 */ 5588 if (waitflag == MBUF_DONTWAIT) { 5589 ns_flag = SLP_NEEDQ; 5590 goto dorecs; 5591 } 5592#endif 5593 if (slp->ns_sotype == SOCK_STREAM) { 5594 /* 5595 * If there are already records on the queue, defer soreceive() 5596 * to an(other) nfsd so that there is feedback to the TCP layer that 5597 * the nfs servers are heavily loaded. 5598 */ 5599 if (slp->ns_rec) { 5600 ns_flag = SLP_NEEDQ; 5601 goto dorecs; 5602 } 5603 5604 /* 5605 * Do soreceive(). 5606 */ 5607 bytes_read = 1000000000; 5608 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read); 5609 if (error || mp == NULL) { 5610 if (error == EWOULDBLOCK) 5611 ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0; 5612 else 5613 ns_flag = SLP_DISCONN; 5614 goto dorecs; 5615 } 5616 m = mp; 5617 if (slp->ns_rawend) { 5618 if ((error = mbuf_setnext(slp->ns_rawend, m))) 5619 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error); 5620 slp->ns_cc += bytes_read; 5621 } else { 5622 slp->ns_raw = m; 5623 slp->ns_cc = bytes_read; 5624 } 5625 while ((m2 = mbuf_next(m))) 5626 m = m2; 5627 slp->ns_rawend = m; 5628 5629 /* 5630 * Now try and parse record(s) out of the raw stream data. 5631 */ 5632 error = nfsrv_getstream(slp, waitflag); 5633 if (error) { 5634 if (error == EPERM) 5635 ns_flag = SLP_DISCONN; 5636 else 5637 ns_flag = SLP_NEEDQ; 5638 } 5639 } else { 5640 struct sockaddr_storage nam; 5641 5642 if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) { 5643 /* already have max # RPC records queued on this socket */ 5644 ns_flag = SLP_NEEDQ; 5645 goto dorecs; 5646 } 5647 5648 bzero(&msg, sizeof(msg)); 5649 msg.msg_name = (caddr_t)&nam; 5650 msg.msg_namelen = sizeof(nam); 5651 5652 do { 5653 bytes_read = 1000000000; 5654 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read); 5655 if (mp) { 5656 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) { 5657 mbuf_setlen(mhck, nam.ss_len); 5658 bcopy(&nam, mbuf_data(mhck), nam.ss_len); 5659 m = mhck; 5660 if (mbuf_setnext(m, mp)) { 5661 /* trouble... just drop it */ 5662 printf("nfsrv_rcv: mbuf_setnext failed\n"); 5663 mbuf_free(mhck); 5664 m = mp; 5665 } 5666 } else { 5667 m = mp; 5668 } 5669 if (slp->ns_recend) 5670 mbuf_setnextpkt(slp->ns_recend, m); 5671 else { 5672 slp->ns_rec = m; 5673 slp->ns_flag |= SLP_DOREC; 5674 } 5675 slp->ns_recend = m; 5676 mbuf_setnextpkt(m, NULL); 5677 slp->ns_reccnt++; 5678 } 5679 } while (mp); 5680 } 5681 5682 /* 5683 * Now try and process the request records, non-blocking. 5684 */ 5685dorecs: 5686 if (ns_flag) 5687 slp->ns_flag |= ns_flag; 5688 if (waitflag == MBUF_DONTWAIT) { 5689 int wake = (slp->ns_flag & SLP_WORKTODO); 5690 lck_rw_done(&slp->ns_rwlock); 5691 if (wake && nfsd_thread_count) { 5692 lck_mtx_lock(nfsd_mutex); 5693 nfsrv_wakenfsd(slp); 5694 lck_mtx_unlock(nfsd_mutex); 5695 } 5696 } 5697} 5698 5699/* 5700 * Try and extract an RPC request from the mbuf data list received on a 5701 * stream socket. The "waitflag" argument indicates whether or not it 5702 * can sleep. 5703 */ 5704int 5705nfsrv_getstream(struct nfsrv_sock *slp, int waitflag) 5706{ 5707 mbuf_t m; 5708 char *cp1, *cp2, *mdata; 5709 int len, mlen, error; 5710 mbuf_t om, m2, recm; 5711 u_int32_t recmark; 5712 5713 if (slp->ns_flag & SLP_GETSTREAM) 5714 panic("nfs getstream"); 5715 slp->ns_flag |= SLP_GETSTREAM; 5716 for (;;) { 5717 if (slp->ns_reclen == 0) { 5718 if (slp->ns_cc < NFSX_UNSIGNED) { 5719 slp->ns_flag &= ~SLP_GETSTREAM; 5720 return (0); 5721 } 5722 m = slp->ns_raw; 5723 mdata = mbuf_data(m); 5724 mlen = mbuf_len(m); 5725 if (mlen >= NFSX_UNSIGNED) { 5726 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED); 5727 mdata += NFSX_UNSIGNED; 5728 mlen -= NFSX_UNSIGNED; 5729 mbuf_setdata(m, mdata, mlen); 5730 } else { 5731 cp1 = (caddr_t)&recmark; 5732 cp2 = mdata; 5733 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { 5734 while (mlen == 0) { 5735 m = mbuf_next(m); 5736 cp2 = mbuf_data(m); 5737 mlen = mbuf_len(m); 5738 } 5739 *cp1++ = *cp2++; 5740 mlen--; 5741 mbuf_setdata(m, cp2, mlen); 5742 } 5743 } 5744 slp->ns_cc -= NFSX_UNSIGNED; 5745 recmark = ntohl(recmark); 5746 slp->ns_reclen = recmark & ~0x80000000; 5747 if (recmark & 0x80000000) 5748 slp->ns_flag |= SLP_LASTFRAG; 5749 else 5750 slp->ns_flag &= ~SLP_LASTFRAG; 5751 if (slp->ns_reclen <= 0 || slp->ns_reclen > NFS_MAXPACKET) { 5752 slp->ns_flag &= ~SLP_GETSTREAM; 5753 return (EPERM); 5754 } 5755 } 5756 5757 /* 5758 * Now get the record part. 5759 * 5760 * Note that slp->ns_reclen may be 0. Linux sometimes 5761 * generates 0-length RPCs 5762 */ 5763 recm = NULL; 5764 if (slp->ns_cc == slp->ns_reclen) { 5765 recm = slp->ns_raw; 5766 slp->ns_raw = slp->ns_rawend = NULL; 5767 slp->ns_cc = slp->ns_reclen = 0; 5768 } else if (slp->ns_cc > slp->ns_reclen) { 5769 len = 0; 5770 m = slp->ns_raw; 5771 mlen = mbuf_len(m); 5772 mdata = mbuf_data(m); 5773 om = NULL; 5774 while (len < slp->ns_reclen) { 5775 if ((len + mlen) > slp->ns_reclen) { 5776 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) { 5777 slp->ns_flag &= ~SLP_GETSTREAM; 5778 return (EWOULDBLOCK); 5779 } 5780 if (om) { 5781 if (mbuf_setnext(om, m2)) { 5782 /* trouble... just drop it */ 5783 printf("nfsrv_getstream: mbuf_setnext failed\n"); 5784 mbuf_freem(m2); 5785 slp->ns_flag &= ~SLP_GETSTREAM; 5786 return (EWOULDBLOCK); 5787 } 5788 recm = slp->ns_raw; 5789 } else { 5790 recm = m2; 5791 } 5792 mdata += slp->ns_reclen - len; 5793 mlen -= slp->ns_reclen - len; 5794 mbuf_setdata(m, mdata, mlen); 5795 len = slp->ns_reclen; 5796 } else if ((len + mlen) == slp->ns_reclen) { 5797 om = m; 5798 len += mlen; 5799 m = mbuf_next(m); 5800 recm = slp->ns_raw; 5801 if (mbuf_setnext(om, NULL)) { 5802 printf("nfsrv_getstream: mbuf_setnext failed 2\n"); 5803 slp->ns_flag &= ~SLP_GETSTREAM; 5804 return (EWOULDBLOCK); 5805 } 5806 mlen = mbuf_len(m); 5807 mdata = mbuf_data(m); 5808 } else { 5809 om = m; 5810 len += mlen; 5811 m = mbuf_next(m); 5812 mlen = mbuf_len(m); 5813 mdata = mbuf_data(m); 5814 } 5815 } 5816 slp->ns_raw = m; 5817 slp->ns_cc -= len; 5818 slp->ns_reclen = 0; 5819 } else { 5820 slp->ns_flag &= ~SLP_GETSTREAM; 5821 return (0); 5822 } 5823 5824 /* 5825 * Accumulate the fragments into a record. 5826 */ 5827 if (slp->ns_frag == NULL) { 5828 slp->ns_frag = recm; 5829 } else { 5830 m = slp->ns_frag; 5831 while ((m2 = mbuf_next(m))) 5832 m = m2; 5833 if ((error = mbuf_setnext(m, recm))) 5834 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error); 5835 } 5836 if (slp->ns_flag & SLP_LASTFRAG) { 5837 if (slp->ns_recend) 5838 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag); 5839 else { 5840 slp->ns_rec = slp->ns_frag; 5841 slp->ns_flag |= SLP_DOREC; 5842 } 5843 slp->ns_recend = slp->ns_frag; 5844 slp->ns_frag = NULL; 5845 } 5846 } 5847} 5848 5849/* 5850 * Parse an RPC header. 5851 */ 5852int 5853nfsrv_dorec( 5854 struct nfsrv_sock *slp, 5855 struct nfsd *nfsd, 5856 struct nfsrv_descript **ndp) 5857{ 5858 mbuf_t m; 5859 mbuf_t nam; 5860 struct nfsrv_descript *nd; 5861 int error = 0; 5862 5863 *ndp = NULL; 5864 if (!(slp->ns_flag & (SLP_VALID|SLP_DOREC)) || (slp->ns_rec == NULL)) 5865 return (ENOBUFS); 5866 MALLOC_ZONE(nd, struct nfsrv_descript *, 5867 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK); 5868 if (!nd) 5869 return (ENOMEM); 5870 m = slp->ns_rec; 5871 slp->ns_rec = mbuf_nextpkt(m); 5872 if (slp->ns_rec) 5873 mbuf_setnextpkt(m, NULL); 5874 else { 5875 slp->ns_flag &= ~SLP_DOREC; 5876 slp->ns_recend = NULL; 5877 } 5878 slp->ns_reccnt--; 5879 if (mbuf_type(m) == MBUF_TYPE_SONAME) { 5880 nam = m; 5881 m = mbuf_next(m); 5882 if ((error = mbuf_setnext(nam, NULL))) 5883 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error); 5884 } else 5885 nam = NULL; 5886 nd->nd_nam2 = nam; 5887 nfsm_chain_dissect_init(error, &nd->nd_nmreq, m); 5888 if (!error) 5889 error = nfsrv_getreq(nd); 5890 if (error) { 5891 if (nam) 5892 mbuf_freem(nam); 5893 if (nd->nd_gss_context) 5894 nfs_gss_svc_ctx_deref(nd->nd_gss_context); 5895 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); 5896 return (error); 5897 } 5898 nd->nd_mrep = NULL; 5899 *ndp = nd; 5900 nfsd->nfsd_nd = nd; 5901 return (0); 5902} 5903 5904/* 5905 * Parse an RPC request 5906 * - verify it 5907 * - fill in the cred struct. 5908 */ 5909int 5910nfsrv_getreq(struct nfsrv_descript *nd) 5911{ 5912 struct nfsm_chain *nmreq; 5913 int len, i; 5914 u_int32_t nfsvers, auth_type; 5915 int error = 0; 5916 uid_t user_id; 5917 gid_t group_id; 5918 int ngroups; 5919 uint32_t val; 5920 5921 nd->nd_cr = NULL; 5922 nd->nd_gss_context = NULL; 5923 nd->nd_gss_seqnum = 0; 5924 nd->nd_gss_mb = NULL; 5925 5926 user_id = group_id = -2; 5927 val = auth_type = len = 0; 5928 5929 nmreq = &nd->nd_nmreq; 5930 nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID 5931 nfsm_chain_get_32(error, nmreq, val); // RPC Call 5932 if (!error && (val != RPC_CALL)) 5933 error = EBADRPC; 5934 nfsmout_if(error); 5935 nd->nd_repstat = 0; 5936 nfsm_chain_get_32(error, nmreq, val); // RPC Version 5937 nfsmout_if(error); 5938 if (val != RPC_VER2) { 5939 nd->nd_repstat = ERPCMISMATCH; 5940 nd->nd_procnum = NFSPROC_NOOP; 5941 return (0); 5942 } 5943 nfsm_chain_get_32(error, nmreq, val); // RPC Program Number 5944 nfsmout_if(error); 5945 if (val != NFS_PROG) { 5946 nd->nd_repstat = EPROGUNAVAIL; 5947 nd->nd_procnum = NFSPROC_NOOP; 5948 return (0); 5949 } 5950 nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number 5951 nfsmout_if(error); 5952 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) { 5953 nd->nd_repstat = EPROGMISMATCH; 5954 nd->nd_procnum = NFSPROC_NOOP; 5955 return (0); 5956 } 5957 nd->nd_vers = nfsvers; 5958 nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number 5959 nfsmout_if(error); 5960 if ((nd->nd_procnum >= NFS_NPROCS) || 5961 ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) { 5962 nd->nd_repstat = EPROCUNAVAIL; 5963 nd->nd_procnum = NFSPROC_NOOP; 5964 return (0); 5965 } 5966 if (nfsvers != NFS_VER3) 5967 nd->nd_procnum = nfsv3_procid[nd->nd_procnum]; 5968 nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor 5969 nfsm_chain_get_32(error, nmreq, len); // Auth Length 5970 if (!error && (len < 0 || len > RPCAUTH_MAXSIZ)) 5971 error = EBADRPC; 5972 nfsmout_if(error); 5973 5974 /* Handle authentication */ 5975 if (auth_type == RPCAUTH_SYS) { 5976 struct posix_cred temp_pcred; 5977 if (nd->nd_procnum == NFSPROC_NULL) 5978 return (0); 5979 nd->nd_sec = RPCAUTH_SYS; 5980 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp 5981 nfsm_chain_get_32(error, nmreq, len); // hostname length 5982 if (len < 0 || len > NFS_MAXNAMLEN) 5983 error = EBADRPC; 5984 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname 5985 nfsmout_if(error); 5986 5987 /* create a temporary credential using the bits from the wire */ 5988 bzero(&temp_pcred, sizeof(temp_pcred)); 5989 nfsm_chain_get_32(error, nmreq, user_id); 5990 nfsm_chain_get_32(error, nmreq, group_id); 5991 temp_pcred.cr_groups[0] = group_id; 5992 nfsm_chain_get_32(error, nmreq, len); // extra GID count 5993 if ((len < 0) || (len > RPCAUTH_UNIXGIDS)) 5994 error = EBADRPC; 5995 nfsmout_if(error); 5996 for (i = 1; i <= len; i++) 5997 if (i < NGROUPS) 5998 nfsm_chain_get_32(error, nmreq, temp_pcred.cr_groups[i]); 5999 else 6000 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); 6001 nfsmout_if(error); 6002 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 6003 if (ngroups > 1) 6004 nfsrv_group_sort(&temp_pcred.cr_groups[0], ngroups); 6005 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE) 6006 nfsm_chain_get_32(error, nmreq, len); // verifier length 6007 if (len < 0 || len > RPCAUTH_MAXSIZ) 6008 error = EBADRPC; 6009 if (len > 0) 6010 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); 6011 6012 /* request creation of a real credential */ 6013 temp_pcred.cr_uid = user_id; 6014 temp_pcred.cr_ngroups = ngroups; 6015 nd->nd_cr = posix_cred_create(&temp_pcred); 6016 if (nd->nd_cr == NULL) { 6017 nd->nd_repstat = ENOMEM; 6018 nd->nd_procnum = NFSPROC_NOOP; 6019 return (0); 6020 } 6021 } else if (auth_type == RPCSEC_GSS) { 6022 error = nfs_gss_svc_cred_get(nd, nmreq); 6023 if (error) { 6024 if (error == EINVAL) 6025 goto nfsmout; // drop the request 6026 nd->nd_repstat = error; 6027 nd->nd_procnum = NFSPROC_NOOP; 6028 return (0); 6029 } 6030 } else { 6031 if (nd->nd_procnum == NFSPROC_NULL) // assume it's AUTH_NONE 6032 return (0); 6033 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED); 6034 nd->nd_procnum = NFSPROC_NOOP; 6035 return (0); 6036 } 6037 return (0); 6038nfsmout: 6039 if (IS_VALID_CRED(nd->nd_cr)) 6040 kauth_cred_unref(&nd->nd_cr); 6041 nfsm_chain_cleanup(nmreq); 6042 return (error); 6043} 6044 6045/* 6046 * Search for a sleeping nfsd and wake it up. 6047 * SIDE EFFECT: If none found, make sure the socket is queued up so that one 6048 * of the running nfsds will go look for the work in the nfsrv_sockwait list. 6049 * Note: Must be called with nfsd_mutex held. 6050 */ 6051void 6052nfsrv_wakenfsd(struct nfsrv_sock *slp) 6053{ 6054 struct nfsd *nd; 6055 6056 if ((slp->ns_flag & SLP_VALID) == 0) 6057 return; 6058 6059 lck_rw_lock_exclusive(&slp->ns_rwlock); 6060 /* if there's work to do on this socket, make sure it's queued up */ 6061 if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) { 6062 TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq); 6063 slp->ns_flag |= SLP_WAITQ; 6064 } 6065 lck_rw_done(&slp->ns_rwlock); 6066 6067 /* wake up a waiting nfsd, if possible */ 6068 nd = TAILQ_FIRST(&nfsd_queue); 6069 if (!nd) 6070 return; 6071 6072 TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue); 6073 nd->nfsd_flag &= ~NFSD_WAITING; 6074 wakeup(nd); 6075} 6076 6077#endif /* NFSSERVER */ 6078