1/* 2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1989, 1991, 1993, 1995 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Rick Macklem at The University of Guelph. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $ 66 */ 67 68/* 69 * Socket operations for use by nfs 70 */ 71 72#include <sys/param.h> 73#include <sys/systm.h> 74#include <sys/proc.h> 75#include <sys/signalvar.h> 76#include <sys/kauth.h> 77#include <sys/mount_internal.h> 78#include <sys/kernel.h> 79#include <sys/kpi_mbuf.h> 80#include <sys/malloc.h> 81#include <sys/vnode.h> 82#include <sys/domain.h> 83#include <sys/protosw.h> 84#include <sys/socket.h> 85#include <sys/syslog.h> 86#include <sys/tprintf.h> 87#include <libkern/OSAtomic.h> 88 89#include <sys/time.h> 90#include <kern/clock.h> 91#include <kern/task.h> 92#include <kern/thread.h> 93#include <kern/thread_call.h> 94#include <sys/user.h> 95#include <sys/acct.h> 96 97#include <netinet/in.h> 98#include <netinet/tcp.h> 99 100#include <nfs/rpcv2.h> 101#include <nfs/krpc.h> 102#include <nfs/nfsproto.h> 103#include <nfs/nfs.h> 104#include <nfs/xdr_subs.h> 105#include <nfs/nfsm_subs.h> 106#include <nfs/nfs_gss.h> 107#include <nfs/nfsmount.h> 108#include <nfs/nfsnode.h> 109 110#define NFS_SOCK_DBG(...) NFS_DBG(NFS_FAC_SOCK, 7, ## __VA_ARGS__) 111 112/* XXX */ 113boolean_t current_thread_aborted(void); 114kern_return_t thread_terminate(thread_t); 115 116 117#if NFSSERVER 118int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */ 119 120int nfsrv_getstream(struct nfsrv_sock *,int); 121int nfsrv_getreq(struct nfsrv_descript *); 122extern int nfsv3_procid[NFS_NPROCS]; 123#endif /* NFSSERVER */ 124 125/* 126 * compare two sockaddr structures 127 */ 128int 129nfs_sockaddr_cmp(struct sockaddr *sa1, struct sockaddr *sa2) 130{ 131 if (!sa1) 132 return (-1); 133 if (!sa2) 134 return (1); 135 if (sa1->sa_family != sa2->sa_family) 136 return ((sa1->sa_family < sa2->sa_family) ? -1 : 1); 137 if (sa1->sa_len != sa2->sa_len) 138 return ((sa1->sa_len < sa2->sa_len) ? -1 : 1); 139 if (sa1->sa_family == AF_INET) 140 return (bcmp(&((struct sockaddr_in*)sa1)->sin_addr, 141 &((struct sockaddr_in*)sa2)->sin_addr, sizeof(((struct sockaddr_in*)sa1)->sin_addr))); 142 if (sa1->sa_family == AF_INET6) 143 return (bcmp(&((struct sockaddr_in6*)sa1)->sin6_addr, 144 &((struct sockaddr_in6*)sa2)->sin6_addr, sizeof(((struct sockaddr_in6*)sa1)->sin6_addr))); 145 return (-1); 146} 147 148#if NFSCLIENT 149 150int nfs_connect_search_new_socket(struct nfsmount *, struct nfs_socket_search *, struct timeval *); 151int nfs_connect_search_socket_connect(struct nfsmount *, struct nfs_socket *, int); 152int nfs_connect_search_ping(struct nfsmount *, struct nfs_socket *, struct timeval *); 153void nfs_connect_search_socket_found(struct nfsmount *, struct nfs_socket_search *, struct nfs_socket *); 154void nfs_connect_search_socket_reap(struct nfsmount *, struct nfs_socket_search *, struct timeval *); 155int nfs_connect_search_check(struct nfsmount *, struct nfs_socket_search *, struct timeval *); 156int nfs_reconnect(struct nfsmount *); 157int nfs_connect_setup(struct nfsmount *); 158void nfs_mount_sock_thread(void *, wait_result_t); 159void nfs_udp_rcv(socket_t, void*, int); 160void nfs_tcp_rcv(socket_t, void*, int); 161void nfs_sock_poke(struct nfsmount *); 162void nfs_request_match_reply(struct nfsmount *, mbuf_t); 163void nfs_reqdequeue(struct nfsreq *); 164void nfs_reqbusy(struct nfsreq *); 165struct nfsreq *nfs_reqnext(struct nfsreq *); 166int nfs_wait_reply(struct nfsreq *); 167void nfs_softterm(struct nfsreq *); 168int nfs_can_squish(struct nfsmount *); 169int nfs_is_squishy(struct nfsmount *); 170int nfs_is_dead(int, struct nfsmount *); 171 172/* 173 * Estimate rto for an nfs rpc sent via. an unreliable datagram. 174 * Use the mean and mean deviation of rtt for the appropriate type of rpc 175 * for the frequent rpcs and a default for the others. 176 * The justification for doing "other" this way is that these rpcs 177 * happen so infrequently that timer est. would probably be stale. 178 * Also, since many of these rpcs are 179 * non-idempotent, a conservative timeout is desired. 180 * getattr, lookup - A+2D 181 * read, write - A+4D 182 * other - nm_timeo 183 */ 184#define NFS_RTO(n, t) \ 185 ((t) == 0 ? (n)->nm_timeo : \ 186 ((t) < 3 ? \ 187 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 188 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 189#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 190#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 191 192/* 193 * Defines which timer to use for the procnum. 194 * 0 - default 195 * 1 - getattr 196 * 2 - lookup 197 * 3 - read 198 * 4 - write 199 */ 200static int proct[NFS_NPROCS] = { 201 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0 202}; 203 204/* 205 * There is a congestion window for outstanding rpcs maintained per mount 206 * point. The cwnd size is adjusted in roughly the way that: 207 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 208 * SIGCOMM '88". ACM, August 1988. 209 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 210 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 211 * of rpcs is in progress. 212 * (The sent count and cwnd are scaled for integer arith.) 213 * Variants of "slow start" were tried and were found to be too much of a 214 * performance hit (ave. rtt 3 times larger), 215 * I suspect due to the large rtt that nfs rpcs have. 216 */ 217#define NFS_CWNDSCALE 256 218#define NFS_MAXCWND (NFS_CWNDSCALE * 32) 219static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 220 221/* 222 * Increment location index to next address/server/location. 223 */ 224void 225nfs_location_next(struct nfs_fs_locations *nlp, struct nfs_location_index *nlip) 226{ 227 uint8_t loc = nlip->nli_loc; 228 uint8_t serv = nlip->nli_serv; 229 uint8_t addr = nlip->nli_addr; 230 231 /* move to next address */ 232 addr++; 233 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) { 234 /* no more addresses on current server, go to first address of next server */ 235next_server: 236 addr = 0; 237 serv++; 238 if (serv >= nlp->nl_locations[loc]->nl_servcount) { 239 /* no more servers on current location, go to first server of next location */ 240 serv = 0; 241 loc++; 242 if (loc >= nlp->nl_numlocs) 243 loc = 0; /* after last location, wrap back around to first location */ 244 } 245 } 246 /* 247 * It's possible for this next server to not have any addresses. 248 * Check for that here and go to the next server. 249 * But bail out if we've managed to come back around to the original 250 * location that was passed in. (That would mean no servers had any 251 * addresses. And we don't want to spin here forever.) 252 */ 253 if ((loc == nlip->nli_loc) && (serv == nlip->nli_serv) && (addr == nlip->nli_addr)) 254 return; 255 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) 256 goto next_server; 257 258 nlip->nli_loc = loc; 259 nlip->nli_serv = serv; 260 nlip->nli_addr = addr; 261} 262 263/* 264 * Compare two location indices. 265 */ 266int 267nfs_location_index_cmp(struct nfs_location_index *nlip1, struct nfs_location_index *nlip2) 268{ 269 if (nlip1->nli_loc != nlip2->nli_loc) 270 return (nlip1->nli_loc - nlip2->nli_loc); 271 if (nlip1->nli_serv != nlip2->nli_serv) 272 return (nlip1->nli_serv - nlip2->nli_serv); 273 return (nlip1->nli_addr - nlip2->nli_addr); 274} 275 276/* 277 * Get the mntfromname (or path portion only) for a given location. 278 */ 279void 280nfs_location_mntfromname(struct nfs_fs_locations *locs, struct nfs_location_index idx, char *s, int size, int pathonly) 281{ 282 struct nfs_fs_location *fsl = locs->nl_locations[idx.nli_loc]; 283 char *p; 284 int cnt, i; 285 286 p = s; 287 if (!pathonly) { 288 cnt = snprintf(p, size, "%s:", fsl->nl_servers[idx.nli_serv]->ns_name); 289 p += cnt; 290 size -= cnt; 291 } 292 if (fsl->nl_path.np_compcount == 0) { 293 /* mounting root export on server */ 294 if (size > 0) { 295 *p++ = '/'; 296 *p++ = '\0'; 297 } 298 return; 299 } 300 /* append each server path component */ 301 for (i=0; (size > 0) && (i < (int)fsl->nl_path.np_compcount); i++) { 302 cnt = snprintf(p, size, "/%s", fsl->nl_path.np_components[i]); 303 p += cnt; 304 size -= cnt; 305 } 306} 307 308/* 309 * NFS client connect socket upcall. 310 * (Used only during socket connect/search.) 311 */ 312void 313nfs_connect_upcall(socket_t so, void *arg, __unused int waitflag) 314{ 315 struct nfs_socket *nso = arg; 316 size_t rcvlen; 317 mbuf_t m; 318 int error = 0, recv = 1; 319 320 if (nso->nso_flags & NSO_CONNECTING) { 321 NFS_SOCK_DBG("nfs connect - socket %p upcall - connecting\n", nso); 322 wakeup(nso->nso_wake); 323 return; 324 } 325 326 lck_mtx_lock(&nso->nso_lock); 327 if ((nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) || !(nso->nso_flags & NSO_PINGING)) { 328 NFS_SOCK_DBG("nfs connect - socket %p upcall - nevermind\n", nso); 329 lck_mtx_unlock(&nso->nso_lock); 330 return; 331 } 332 NFS_SOCK_DBG("nfs connect - socket %p upcall\n", nso); 333 nso->nso_flags |= NSO_UPCALL; 334 335 /* loop while we make error-free progress */ 336 while (!error && recv) { 337 /* make sure we're still interested in this socket */ 338 if (nso->nso_flags & (NSO_DISCONNECTING|NSO_DEAD)) 339 break; 340 lck_mtx_unlock(&nso->nso_lock); 341 m = NULL; 342 if (nso->nso_sotype == SOCK_STREAM) { 343 error = nfs_rpc_record_read(so, &nso->nso_rrs, MSG_DONTWAIT, &recv, &m); 344 } else { 345 rcvlen = 1000000; 346 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen); 347 recv = m ? 1 : 0; 348 } 349 lck_mtx_lock(&nso->nso_lock); 350 if (m) { 351 /* match response with request */ 352 struct nfsm_chain nmrep; 353 uint32_t reply = 0, rxid = 0, verf_type, verf_len; 354 uint32_t reply_status, rejected_status, accepted_status; 355 356 nfsm_chain_dissect_init(error, &nmrep, m); 357 nfsm_chain_get_32(error, &nmrep, rxid); 358 nfsm_chain_get_32(error, &nmrep, reply); 359 if (!error && ((reply != RPC_REPLY) || (rxid != nso->nso_pingxid))) 360 error = EBADRPC; 361 nfsm_chain_get_32(error, &nmrep, reply_status); 362 if (!error && (reply_status == RPC_MSGDENIED)) { 363 nfsm_chain_get_32(error, &nmrep, rejected_status); 364 if (!error) 365 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES; 366 } 367 nfsm_chain_get_32(error, &nmrep, verf_type); /* verifier flavor */ 368 nfsm_chain_get_32(error, &nmrep, verf_len); /* verifier length */ 369 nfsmout_if(error); 370 if (verf_len) 371 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len)); 372 nfsm_chain_get_32(error, &nmrep, accepted_status); 373 nfsmout_if(error); 374 if ((accepted_status == RPC_PROGMISMATCH) && !nso->nso_version) { 375 uint32_t minvers, maxvers; 376 nfsm_chain_get_32(error, &nmrep, minvers); 377 nfsm_chain_get_32(error, &nmrep, maxvers); 378 nfsmout_if(error); 379 if (nso->nso_protocol == PMAPPROG) { 380 if ((minvers > RPCBVERS4) || (maxvers < PMAPVERS)) 381 error = EPROGMISMATCH; 382 else if ((nso->nso_saddr->sa_family == AF_INET) && 383 (PMAPVERS >= minvers) && (PMAPVERS <= maxvers)) 384 nso->nso_version = PMAPVERS; 385 else if (nso->nso_saddr->sa_family == AF_INET6) { 386 if ((RPCBVERS4 >= minvers) && (RPCBVERS4 <= maxvers)) 387 nso->nso_version = RPCBVERS4; 388 else if ((RPCBVERS3 >= minvers) && (RPCBVERS3 <= maxvers)) 389 nso->nso_version = RPCBVERS3; 390 } 391 } else if (nso->nso_protocol == NFS_PROG) { 392 if ((minvers > NFS_VER4) || (maxvers < NFS_VER2)) 393 error = EPROGMISMATCH; 394 else if ((NFS_VER3 >= minvers) && (NFS_VER3 <= maxvers)) 395 nso->nso_version = NFS_VER3; 396 else if ((NFS_VER2 >= minvers) && (NFS_VER2 <= maxvers)) 397 nso->nso_version = NFS_VER2; 398 else if ((NFS_VER4 >= minvers) && (NFS_VER4 <= maxvers)) 399 nso->nso_version = NFS_VER4; 400 } 401 if (!error && nso->nso_version) 402 accepted_status = RPC_SUCCESS; 403 } 404 if (!error) { 405 switch (accepted_status) { 406 case RPC_SUCCESS: 407 error = 0; 408 break; 409 case RPC_PROGUNAVAIL: 410 error = EPROGUNAVAIL; 411 break; 412 case RPC_PROGMISMATCH: 413 error = EPROGMISMATCH; 414 break; 415 case RPC_PROCUNAVAIL: 416 error = EPROCUNAVAIL; 417 break; 418 case RPC_GARBAGE: 419 error = EBADRPC; 420 break; 421 case RPC_SYSTEM_ERR: 422 default: 423 error = EIO; 424 break; 425 } 426 } 427nfsmout: 428 nso->nso_flags &= ~NSO_PINGING; 429 if (error) { 430 nso->nso_error = error; 431 nso->nso_flags |= NSO_DEAD; 432 } else { 433 nso->nso_flags |= NSO_VERIFIED; 434 } 435 mbuf_freem(m); 436 /* wake up search thread */ 437 wakeup(nso->nso_wake); 438 break; 439 } 440 } 441 442 nso->nso_flags &= ~NSO_UPCALL; 443 if ((error != EWOULDBLOCK) && (error || !recv)) { 444 /* problems with the socket... */ 445 nso->nso_error = error ? error : EPIPE; 446 nso->nso_flags |= NSO_DEAD; 447 wakeup(nso->nso_wake); 448 } 449 if (nso->nso_flags & NSO_DISCONNECTING) 450 wakeup(&nso->nso_flags); 451 lck_mtx_unlock(&nso->nso_lock); 452} 453 454/* 455 * Create/initialize an nfs_socket structure. 456 */ 457int 458nfs_socket_create( 459 __unused struct nfsmount *nmp, 460 struct sockaddr *sa, 461 int sotype, 462 in_port_t port, 463 uint32_t protocol, 464 uint32_t vers, 465 int resvport, 466 struct nfs_socket **nsop) 467{ 468 struct nfs_socket *nso; 469 struct timeval now; 470 int error; 471#ifdef NFS_SOCKET_DEBUGGING 472 char naddr[MAX_IPv6_STR_LEN]; 473 void *sinaddr; 474 475 if (sa->sa_family == AF_INET) 476 sinaddr = &((struct sockaddr_in*)sa)->sin_addr; 477 else 478 sinaddr = &((struct sockaddr_in6*)sa)->sin6_addr; 479 if (inet_ntop(sa->sa_family, sinaddr, naddr, sizeof(naddr)) != naddr) 480 strlcpy(naddr, "<unknown>", sizeof(naddr)); 481#else 482 char naddr[1] = { 0 }; 483#endif 484 485 *nsop = NULL; 486 487 /* Create the socket. */ 488 MALLOC(nso, struct nfs_socket *, sizeof(struct nfs_socket), M_TEMP, M_WAITOK|M_ZERO); 489 if (nso) 490 MALLOC(nso->nso_saddr, struct sockaddr *, sa->sa_len, M_SONAME, M_WAITOK|M_ZERO); 491 if (!nso || !nso->nso_saddr) { 492 if (nso) 493 FREE(nso, M_TEMP); 494 return (ENOMEM); 495 } 496 lck_mtx_init(&nso->nso_lock, nfs_request_grp, LCK_ATTR_NULL); 497 nso->nso_sotype = sotype; 498 if (nso->nso_sotype == SOCK_STREAM) 499 nfs_rpc_record_state_init(&nso->nso_rrs); 500 microuptime(&now); 501 nso->nso_timestamp = now.tv_sec; 502 bcopy(sa, nso->nso_saddr, sa->sa_len); 503 if (sa->sa_family == AF_INET) 504 ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port); 505 else if (sa->sa_family == AF_INET6) 506 ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port); 507 nso->nso_protocol = protocol; 508 nso->nso_version = vers; 509 510 error = sock_socket(sa->sa_family, nso->nso_sotype, 0, NULL, NULL, &nso->nso_so); 511 512 /* Some servers require that the client port be a reserved port number. */ 513 if (!error && resvport && ((sa->sa_family == AF_INET) || (sa->sa_family == AF_INET6))) { 514 struct sockaddr_storage ss; 515 int level = (sa->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6; 516 int optname = (sa->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE; 517 int portrange = IP_PORTRANGE_LOW; 518 519 error = sock_setsockopt(nso->nso_so, level, optname, &portrange, sizeof(portrange)); 520 if (!error) { /* bind now to check for failure */ 521 ss.ss_len = sa->sa_len; 522 ss.ss_family = sa->sa_family; 523 if (ss.ss_family == AF_INET) { 524 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY; 525 ((struct sockaddr_in*)&ss)->sin_port = htons(0); 526 } else if (ss.ss_family == AF_INET6) { 527 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any; 528 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0); 529 } else { 530 error = EINVAL; 531 } 532 if (!error) 533 error = sock_bind(nso->nso_so, (struct sockaddr*)&ss); 534 } 535 } 536 537 if (error) { 538 NFS_SOCK_DBG("nfs connect %s error %d creating socket %p %s type %d%s port %d prot %d %d\n", 539 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nso, naddr, sotype, 540 resvport ? "r" : "", port, protocol, vers); 541 nfs_socket_destroy(nso); 542 } else { 543 NFS_SOCK_DBG("nfs connect %s created socket %p %s type %d%s port %d prot %d %d\n", 544 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, naddr, 545 sotype, resvport ? "r" : "", port, protocol, vers); 546 *nsop = nso; 547 } 548 return (error); 549} 550 551/* 552 * Destroy an nfs_socket structure. 553 */ 554void 555nfs_socket_destroy(struct nfs_socket *nso) 556{ 557 struct timespec ts = { 4, 0 }; 558 559 lck_mtx_lock(&nso->nso_lock); 560 nso->nso_flags |= NSO_DISCONNECTING; 561 if (nso->nso_flags & NSO_UPCALL) /* give upcall a chance to complete */ 562 msleep(&nso->nso_flags, &nso->nso_lock, PZERO-1, "nfswaitupcall", &ts); 563 lck_mtx_unlock(&nso->nso_lock); 564 sock_shutdown(nso->nso_so, SHUT_RDWR); 565 sock_close(nso->nso_so); 566 if (nso->nso_sotype == SOCK_STREAM) 567 nfs_rpc_record_state_cleanup(&nso->nso_rrs); 568 lck_mtx_destroy(&nso->nso_lock, nfs_request_grp); 569 if (nso->nso_saddr) 570 FREE(nso->nso_saddr, M_SONAME); 571 if (nso->nso_saddr2) 572 FREE(nso->nso_saddr2, M_SONAME); 573 NFS_SOCK_DBG("nfs connect - socket %p destroyed\n", nso); 574 FREE(nso, M_TEMP); 575} 576 577/* 578 * Set common socket options on an nfs_socket. 579 */ 580void 581nfs_socket_options(struct nfsmount *nmp, struct nfs_socket *nso) 582{ 583 /* 584 * Set socket send/receive timeouts 585 * - Receive timeout shouldn't matter because most receives are performed 586 * in the socket upcall non-blocking. 587 * - Send timeout should allow us to react to a blocked socket. 588 * Soft mounts will want to abort sooner. 589 */ 590 struct timeval timeo; 591 int on = 1, proto; 592 593 timeo.tv_usec = 0; 594 timeo.tv_sec = (NMFLAG(nmp, SOFT) || nfs_can_squish(nmp)) ? 5 : 60; 595 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 596 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 597 if (nso->nso_sotype == SOCK_STREAM) { 598 /* Assume that SOCK_STREAM always requires a connection */ 599 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); 600 /* set nodelay for TCP */ 601 sock_gettype(nso->nso_so, NULL, NULL, &proto); 602 if (proto == IPPROTO_TCP) 603 sock_setsockopt(nso->nso_so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 604 } 605 if (nso->nso_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ 606 int reserve = NFS_UDPSOCKBUF; 607 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve)); 608 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve)); 609 } 610 /* set SO_NOADDRERR to detect network changes ASAP */ 611 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); 612 /* just playin' it safe with upcalls */ 613 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); 614 /* socket should be interruptible if the mount is */ 615 if (!NMFLAG(nmp, INTR)) 616 sock_nointerrupt(nso->nso_so, 1); 617} 618 619/* 620 * Release resources held in an nfs_socket_search. 621 */ 622void 623nfs_socket_search_cleanup(struct nfs_socket_search *nss) 624{ 625 struct nfs_socket *nso, *nsonext; 626 627 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) { 628 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); 629 nss->nss_sockcnt--; 630 nfs_socket_destroy(nso); 631 } 632 if (nss->nss_sock) { 633 nfs_socket_destroy(nss->nss_sock); 634 nss->nss_sock = NULL; 635 } 636} 637 638/* 639 * Prefer returning certain errors over others. 640 * This function returns a ranking of the given error. 641 */ 642int 643nfs_connect_error_class(int error) 644{ 645 switch (error) { 646 case 0: 647 return (0); 648 case ETIMEDOUT: 649 case EAGAIN: 650 return (1); 651 case EPIPE: 652 case EADDRNOTAVAIL: 653 case ENETDOWN: 654 case ENETUNREACH: 655 case ENETRESET: 656 case ECONNABORTED: 657 case ECONNRESET: 658 case EISCONN: 659 case ENOTCONN: 660 case ESHUTDOWN: 661 case ECONNREFUSED: 662 case EHOSTDOWN: 663 case EHOSTUNREACH: 664 return (2); 665 case ERPCMISMATCH: 666 case EPROCUNAVAIL: 667 case EPROGMISMATCH: 668 case EPROGUNAVAIL: 669 return (3); 670 case EBADRPC: 671 return (4); 672 default: 673 return (5); 674 } 675} 676 677/* 678 * Make sure a socket search returns the best error. 679 */ 680void 681nfs_socket_search_update_error(struct nfs_socket_search *nss, int error) 682{ 683 if (nfs_connect_error_class(error) >= nfs_connect_error_class(nss->nss_error)) 684 nss->nss_error = error; 685} 686 687/* nfs_connect_search_new_socket: 688 * Given a socket search structure for an nfs mount try to find a new socket from the set of addresses specified 689 * by nss. 690 * 691 * nss_last is set to -1 at initialization to indicate the first time. Its set to -2 if address was found but 692 * could not be used or if a socket timed out. 693 */ 694int 695nfs_connect_search_new_socket(struct nfsmount *nmp, struct nfs_socket_search *nss, struct timeval *now) 696{ 697 struct nfs_fs_location *fsl; 698 struct nfs_fs_server *fss; 699 struct sockaddr_storage ss; 700 struct nfs_socket *nso; 701 char *addrstr; 702 int error = 0; 703 704 705 NFS_SOCK_DBG("nfs connect %s nss_addrcnt = %d\n", 706 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss->nss_addrcnt); 707 708 /* 709 * while there are addresses and: 710 * we have no sockets or 711 * the last address failed and did not produce a socket (nss_last < 0) or 712 * Its been a while (2 seconds) and we have less than the max number of concurrent sockets to search (4) 713 * then attempt to create a socket with the current address. 714 */ 715 while (nss->nss_addrcnt > 0 && ((nss->nss_last < 0) || (nss->nss_sockcnt == 0) || 716 ((nss->nss_sockcnt < 4) && (now->tv_sec >= (nss->nss_last + 2))))) { 717 if (nmp->nm_sockflags & NMSOCK_UNMOUNT) 718 return (EINTR); 719 /* Can we convert the address to a sockaddr? */ 720 fsl = nmp->nm_locations.nl_locations[nss->nss_nextloc.nli_loc]; 721 fss = fsl->nl_servers[nss->nss_nextloc.nli_serv]; 722 addrstr = fss->ns_addresses[nss->nss_nextloc.nli_addr]; 723 if (!nfs_uaddr2sockaddr(addrstr, (struct sockaddr*)&ss)) { 724 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); 725 nss->nss_addrcnt -= 1; 726 nss->nss_last = -2; 727 continue; 728 } 729 /* Check that socket family is acceptable. */ 730 if (nmp->nm_sofamily && (ss.ss_family != nmp->nm_sofamily)) { 731 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); 732 nss->nss_addrcnt -= 1; 733 nss->nss_last = -2; 734 continue; 735 } 736 737 /* Create the socket. */ 738 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nss->nss_sotype, 739 nss->nss_port, nss->nss_protocol, nss->nss_version, 740 ((nss->nss_protocol == NFS_PROG) && NMFLAG(nmp, RESVPORT)), &nso); 741 if (error) 742 return (error); 743 744 nso->nso_location = nss->nss_nextloc; 745 nso->nso_wake = nss; 746 error = sock_setupcall(nso->nso_so, nfs_connect_upcall, nso); 747 if (error) { 748 lck_mtx_lock(&nso->nso_lock); 749 nso->nso_error = error; 750 nso->nso_flags |= NSO_DEAD; 751 lck_mtx_unlock(&nso->nso_lock); 752 } 753 754 TAILQ_INSERT_TAIL(&nss->nss_socklist, nso, nso_link); 755 nss->nss_sockcnt++; 756 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); 757 nss->nss_addrcnt -= 1; 758 759 nss->nss_last = now->tv_sec; 760 } 761 762 if (nss->nss_addrcnt == 0 && nss->nss_last < 0) 763 nss->nss_last = now->tv_sec; 764 765 return (error); 766} 767 768/* 769 * nfs_connect_search_socket_connect: Connect an nfs socket nso for nfsmount nmp. 770 * If successful set the socket options for the socket as require from the mount. 771 * 772 * Assumes: nso->nso_lock is held on entry and return. 773 */ 774int 775nfs_connect_search_socket_connect(struct nfsmount *nmp, struct nfs_socket *nso, int verbose) 776{ 777 int error; 778 779 if ((nso->nso_sotype != SOCK_STREAM) && NMFLAG(nmp, NOCONNECT)) { 780 /* no connection needed, just say it's already connected */ 781 NFS_SOCK_DBG("nfs connect %s UDP socket %p noconnect\n", 782 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 783 nso->nso_flags |= NSO_CONNECTED; 784 nfs_socket_options(nmp, nso); 785 return (1); /* Socket is connected and setup */ 786 } else if (!(nso->nso_flags & NSO_CONNECTING)) { 787 /* initiate the connection */ 788 nso->nso_flags |= NSO_CONNECTING; 789 lck_mtx_unlock(&nso->nso_lock); 790 NFS_SOCK_DBG("nfs connect %s connecting socket %p\n", 791 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 792 error = sock_connect(nso->nso_so, nso->nso_saddr, MSG_DONTWAIT); 793 lck_mtx_lock(&nso->nso_lock); 794 if (error && (error != EINPROGRESS)) { 795 nso->nso_error = error; 796 nso->nso_flags |= NSO_DEAD; 797 return (0); 798 } 799 } 800 if (nso->nso_flags & NSO_CONNECTING) { 801 /* check the connection */ 802 if (sock_isconnected(nso->nso_so)) { 803 NFS_SOCK_DBG("nfs connect %s socket %p is connected\n", 804 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 805 nso->nso_flags &= ~NSO_CONNECTING; 806 nso->nso_flags |= NSO_CONNECTED; 807 nfs_socket_options(nmp, nso); 808 return (1); /* Socket is connected and setup */ 809 } else { 810 int optlen = sizeof(error); 811 error = 0; 812 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &error, &optlen); 813 if (error) { /* we got an error on the socket */ 814 NFS_SOCK_DBG("nfs connect %s socket %p connection error %d\n", 815 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); 816 if (verbose) 817 printf("nfs connect socket error %d for %s\n", 818 error, vfs_statfs(nmp->nm_mountp)->f_mntfromname); 819 nso->nso_error = error; 820 nso->nso_flags |= NSO_DEAD; 821 return (0); 822 } 823 } 824 } 825 826 return (0); /* Waiting to be connected */ 827} 828 829/* 830 * nfs_connect_search_ping: Send a null proc on the nso socket. 831 */ 832int 833nfs_connect_search_ping(struct nfsmount *nmp, struct nfs_socket *nso, struct timeval *now) 834{ 835 /* initiate a NULL RPC request */ 836 uint64_t xid = nso->nso_pingxid; 837 mbuf_t m, mreq = NULL; 838 struct msghdr msg; 839 size_t reqlen, sentlen; 840 uint32_t vers = nso->nso_version; 841 int error; 842 843 if (!vers) { 844 if (nso->nso_protocol == PMAPPROG) 845 vers = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; 846 else if (nso->nso_protocol == NFS_PROG) 847 vers = NFS_VER3; 848 } 849 lck_mtx_unlock(&nso->nso_lock); 850 error = nfsm_rpchead2(nmp, nso->nso_sotype, nso->nso_protocol, vers, 0, RPCAUTH_SYS, 851 vfs_context_ucred(vfs_context_kernel()), NULL, NULL, &xid, &mreq); 852 lck_mtx_lock(&nso->nso_lock); 853 if (!error) { 854 nso->nso_flags |= NSO_PINGING; 855 nso->nso_pingxid = R_XID32(xid); 856 nso->nso_reqtimestamp = now->tv_sec; 857 bzero(&msg, sizeof(msg)); 858 if ((nso->nso_sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so)) { 859 msg.msg_name = nso->nso_saddr; 860 msg.msg_namelen = nso->nso_saddr->sa_len; 861 } 862 for (reqlen=0, m=mreq; m; m = mbuf_next(m)) 863 reqlen += mbuf_len(m); 864 lck_mtx_unlock(&nso->nso_lock); 865 error = sock_sendmbuf(nso->nso_so, &msg, mreq, 0, &sentlen); 866 NFS_SOCK_DBG("nfs connect %s verifying socket %p send rv %d\n", 867 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); 868 lck_mtx_lock(&nso->nso_lock); 869 if (!error && (sentlen != reqlen)) 870 error = ETIMEDOUT; 871 } 872 if (error) { 873 nso->nso_error = error; 874 nso->nso_flags |= NSO_DEAD; 875 return (0); 876 } 877 878 return (1); 879} 880 881/* 882 * nfs_connect_search_socket_found: Take the found socket of the socket search list and assign it to the searched socket. 883 * Set the nfs socket protocol and version if needed. 884 */ 885void 886nfs_connect_search_socket_found(struct nfsmount *nmp __unused, struct nfs_socket_search *nss, struct nfs_socket *nso) 887{ 888 NFS_SOCK_DBG("nfs connect %s socket %p verified\n", 889 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 890 if (!nso->nso_version) { 891 /* If the version isn't set, the default must have worked. */ 892 if (nso->nso_protocol == PMAPPROG) 893 nso->nso_version = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; 894 if (nso->nso_protocol == NFS_PROG) 895 nso->nso_version = NFS_VER3; 896 } 897 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); 898 nss->nss_sockcnt--; 899 nss->nss_sock = nso; 900} 901 902/* 903 * nfs_connect_search_socket_reap: For each socket in the search list mark any timed out socket as dead and remove from 904 * the list. Dead socket are then destroyed. 905 */ 906void 907nfs_connect_search_socket_reap(struct nfsmount *nmp __unused, struct nfs_socket_search *nss, struct timeval *now) 908{ 909 struct nfs_socket *nso, *nsonext; 910 911 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) { 912 lck_mtx_lock(&nso->nso_lock); 913 if (now->tv_sec >= (nso->nso_timestamp + nss->nss_timeo)) { 914 /* took too long */ 915 NFS_SOCK_DBG("nfs connect %s socket %p timed out\n", 916 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 917 nso->nso_error = ETIMEDOUT; 918 nso->nso_flags |= NSO_DEAD; 919 } 920 if (!(nso->nso_flags & NSO_DEAD)) { 921 lck_mtx_unlock(&nso->nso_lock); 922 continue; 923 } 924 lck_mtx_unlock(&nso->nso_lock); 925 NFS_SOCK_DBG("nfs connect %s reaping socket %p %d\n", 926 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, nso->nso_error); 927 nfs_socket_search_update_error(nss, nso->nso_error); 928 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); 929 nss->nss_sockcnt--; 930 nfs_socket_destroy(nso); 931 /* If there are more sockets to try, force the starting of another socket */ 932 if (nss->nss_addrcnt > 0) 933 nss->nss_last = -2; 934 } 935} 936 937/* 938 * nfs_connect_search_check: Check on the status of search and wait for replies if needed. 939 */ 940int 941nfs_connect_search_check(struct nfsmount *nmp, struct nfs_socket_search *nss, struct timeval *now) 942{ 943 int error; 944 945 /* log a warning if connect is taking a while */ 946 if (((now->tv_sec - nss->nss_timestamp) >= 8) && ((nss->nss_flags & (NSS_VERBOSE|NSS_WARNED)) == NSS_VERBOSE)) { 947 printf("nfs_connect: socket connect taking a while for %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); 948 nss->nss_flags |= NSS_WARNED; 949 } 950 if (nmp->nm_sockflags & NMSOCK_UNMOUNT) 951 return (EINTR); 952 if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0))) 953 return (error); 954 955 /* If we were succesfull at sending a ping, wait up to a second for a reply */ 956 if (nss->nss_last >= 0) 957 tsleep(nss, PSOCK, "nfs_connect_search_wait", hz); 958 959 return (0); 960} 961 962 963/* 964 * Continue the socket search until we have something to report. 965 */ 966int 967nfs_connect_search_loop(struct nfsmount *nmp, struct nfs_socket_search *nss) 968{ 969 struct nfs_socket *nso; 970 struct timeval now; 971 int error; 972 int verbose = (nss->nss_flags & NSS_VERBOSE); 973 974loop: 975 microuptime(&now); 976 NFS_SOCK_DBG("nfs connect %s search %ld\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, now.tv_sec); 977 978 /* add a new socket to the socket list if needed and available */ 979 error = nfs_connect_search_new_socket(nmp, nss, &now); 980 if (error) { 981 NFS_SOCK_DBG("nfs connect returned %d\n", error); 982 return (error); 983 } 984 985 /* check each active socket on the list and try to push it along */ 986 TAILQ_FOREACH(nso, &nss->nss_socklist, nso_link) { 987 lck_mtx_lock(&nso->nso_lock); 988 989 /* If not connected connect it */ 990 if (!(nso->nso_flags & NSO_CONNECTED)) { 991 if (!nfs_connect_search_socket_connect(nmp, nso, verbose)) { 992 lck_mtx_unlock(&nso->nso_lock); 993 continue; 994 } 995 } 996 997 /* If the socket hasn't been verified or in a ping, ping it. We also handle UDP retransmits */ 998 if (!(nso->nso_flags & (NSO_PINGING|NSO_VERIFIED)) || 999 ((nso->nso_sotype == SOCK_DGRAM) && (now.tv_sec >= nso->nso_reqtimestamp+2))) { 1000 if (!nfs_connect_search_ping(nmp, nso, &now)) { 1001 lck_mtx_unlock(&nso->nso_lock); 1002 continue; 1003 } 1004 } 1005 1006 /* Has the socket been verified by the up call routine? */ 1007 if (nso->nso_flags & NSO_VERIFIED) { 1008 /* WOOHOO!! This socket looks good! */ 1009 nfs_connect_search_socket_found(nmp, nss, nso); 1010 lck_mtx_unlock(&nso->nso_lock); 1011 break; 1012 } 1013 lck_mtx_unlock(&nso->nso_lock); 1014 } 1015 1016 /* Check for timed out sockets and mark as dead and then remove all dead sockets. */ 1017 nfs_connect_search_socket_reap(nmp, nss, &now); 1018 1019 /* 1020 * Keep looping if we haven't found a socket yet and we have more 1021 * sockets to (continue to) try. 1022 */ 1023 error = 0; 1024 if (!nss->nss_sock && (!TAILQ_EMPTY(&nss->nss_socklist) || nss->nss_addrcnt)) { 1025 error = nfs_connect_search_check(nmp, nss, &now); 1026 if (!error) 1027 goto loop; 1028 } 1029 1030 NFS_SOCK_DBG("nfs connect %s returning %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, error); 1031 return (error); 1032} 1033 1034/* 1035 * Initialize a new NFS connection. 1036 * 1037 * Search for a location to connect a socket to and initialize the connection. 1038 * 1039 * An NFS mount may have multiple locations/servers/addresses available. 1040 * We attempt to connect to each one asynchronously and will start 1041 * several sockets in parallel if other locations are slow to answer. 1042 * We'll use the first NFS socket we can successfully set up. 1043 * 1044 * The search may involve contacting the portmapper service first. 1045 * 1046 * A mount's initial connection may require negotiating some parameters such 1047 * as socket type and NFS version. 1048 */ 1049int 1050nfs_connect(struct nfsmount *nmp, int verbose, int timeo) 1051{ 1052 struct nfs_socket_search nss; 1053 struct nfs_socket *nso, *nsonfs; 1054 struct sockaddr_storage ss; 1055 struct sockaddr *saddr, *oldsaddr; 1056 sock_upcall upcall; 1057 struct timeval now, start; 1058 int error, savederror, nfsvers; 1059 uint8_t sotype = nmp->nm_sotype ? nmp->nm_sotype : SOCK_STREAM; 1060 fhandle_t *fh = NULL; 1061 char *path = NULL; 1062 in_port_t port; 1063 int addrtotal = 0; 1064 1065 /* paranoia... check that we have at least one address in the locations */ 1066 uint32_t loc, serv; 1067 for (loc=0; loc < nmp->nm_locations.nl_numlocs; loc++) { 1068 for (serv=0; serv < nmp->nm_locations.nl_locations[loc]->nl_servcount; serv++) { 1069 addrtotal += nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount; 1070 if (nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount == 0) 1071 NFS_SOCK_DBG("nfs connect %s search, server %s has no addresses\n", 1072 vfs_statfs(nmp->nm_mountp)->f_mntfromname, 1073 nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name); 1074 } 1075 } 1076 1077 if (addrtotal == 0) { 1078 NFS_SOCK_DBG("nfs connect %s search failed, no addresses\n", 1079 vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1080 return (EINVAL); 1081 } else 1082 NFS_SOCK_DBG("nfs connect %s has %d addresses\n", 1083 vfs_statfs(nmp->nm_mountp)->f_mntfromname, addrtotal); 1084 1085 lck_mtx_lock(&nmp->nm_lock); 1086 nmp->nm_sockflags |= NMSOCK_CONNECTING; 1087 nmp->nm_nss = &nss; 1088 lck_mtx_unlock(&nmp->nm_lock); 1089 microuptime(&start); 1090 savederror = error = 0; 1091 1092tryagain: 1093 /* initialize socket search state */ 1094 bzero(&nss, sizeof(nss)); 1095 nss.nss_addrcnt = addrtotal; 1096 nss.nss_error = savederror; 1097 TAILQ_INIT(&nss.nss_socklist); 1098 nss.nss_sotype = sotype; 1099 nss.nss_startloc = nmp->nm_locations.nl_current; 1100 nss.nss_timestamp = start.tv_sec; 1101 nss.nss_timeo = timeo; 1102 if (verbose) 1103 nss.nss_flags |= NSS_VERBOSE; 1104 1105 /* First time connecting, we may need to negotiate some things */ 1106 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { 1107 if (!nmp->nm_vers) { 1108 /* No NFS version specified... */ 1109 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) { 1110 /* ...connect to portmapper first if we (may) need any ports. */ 1111 nss.nss_port = PMAPPORT; 1112 nss.nss_protocol = PMAPPROG; 1113 nss.nss_version = 0; 1114 } else { 1115 /* ...connect to NFS port first. */ 1116 nss.nss_port = nmp->nm_nfsport; 1117 nss.nss_protocol = NFS_PROG; 1118 nss.nss_version = 0; 1119 } 1120 } else if (nmp->nm_vers >= NFS_VER4) { 1121 /* For NFSv4, we use the given (or default) port. */ 1122 nss.nss_port = nmp->nm_nfsport ? nmp->nm_nfsport : NFS_PORT; 1123 nss.nss_protocol = NFS_PROG; 1124 nss.nss_version = 4; 1125 } else { 1126 /* For NFSv3/v2... */ 1127 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) { 1128 /* ...connect to portmapper first if we need any ports. */ 1129 nss.nss_port = PMAPPORT; 1130 nss.nss_protocol = PMAPPROG; 1131 nss.nss_version = 0; 1132 } else { 1133 /* ...connect to NFS port first. */ 1134 nss.nss_port = nmp->nm_nfsport; 1135 nss.nss_protocol = NFS_PROG; 1136 nss.nss_version = nmp->nm_vers; 1137 } 1138 } 1139 NFS_SOCK_DBG("nfs connect first %s, so type %d port %d prot %d %d\n", 1140 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port, 1141 nss.nss_protocol, nss.nss_version); 1142 } else { 1143 /* we've connected before, just connect to NFS port */ 1144 if (!nmp->nm_nfsport) { 1145 /* need to ask portmapper which port that would be */ 1146 nss.nss_port = PMAPPORT; 1147 nss.nss_protocol = PMAPPROG; 1148 nss.nss_version = 0; 1149 } else { 1150 nss.nss_port = nmp->nm_nfsport; 1151 nss.nss_protocol = NFS_PROG; 1152 nss.nss_version = nmp->nm_vers; 1153 } 1154 NFS_SOCK_DBG("nfs connect %s, so type %d port %d prot %d %d\n", 1155 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port, 1156 nss.nss_protocol, nss.nss_version); 1157 } 1158 1159 /* Set next location to first valid location. */ 1160 /* If start location is invalid, find next location. */ 1161 nss.nss_nextloc = nss.nss_startloc; 1162 if ((nss.nss_nextloc.nli_serv >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servcount) || 1163 (nss.nss_nextloc.nli_addr >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servers[nss.nss_nextloc.nli_serv]->ns_addrcount)) { 1164 nfs_location_next(&nmp->nm_locations, &nss.nss_nextloc); 1165 if (!nfs_location_index_cmp(&nss.nss_nextloc, &nss.nss_startloc)) { 1166 NFS_SOCK_DBG("nfs connect %s search failed, couldn't find a valid location index\n", 1167 vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1168 return (ENOENT); 1169 } 1170 } 1171 nss.nss_last = -1; 1172 1173keepsearching: 1174 1175 error = nfs_connect_search_loop(nmp, &nss); 1176 if (error || !nss.nss_sock) { 1177 /* search failed */ 1178 nfs_socket_search_cleanup(&nss); 1179 if (!error && (nss.nss_sotype == SOCK_STREAM) && !nmp->nm_sotype && (nmp->nm_vers < NFS_VER4)) { 1180 /* Try using UDP */ 1181 sotype = SOCK_DGRAM; 1182 savederror = nss.nss_error; 1183 NFS_SOCK_DBG("nfs connect %s TCP failed %d %d, trying UDP\n", 1184 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error); 1185 goto tryagain; 1186 } 1187 if (!error) 1188 error = nss.nss_error ? nss.nss_error : ETIMEDOUT; 1189 lck_mtx_lock(&nmp->nm_lock); 1190 nmp->nm_sockflags &= ~NMSOCK_CONNECTING; 1191 nmp->nm_nss = NULL; 1192 lck_mtx_unlock(&nmp->nm_lock); 1193 if (nss.nss_flags & NSS_WARNED) 1194 log(LOG_INFO, "nfs_connect: socket connect aborted for %s\n", 1195 vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1196 if (fh) 1197 FREE(fh, M_TEMP); 1198 if (path) 1199 FREE_ZONE(path, MAXPATHLEN, M_NAMEI); 1200 NFS_SOCK_DBG("nfs connect %s search failed, returning %d\n", 1201 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error); 1202 return (error); 1203 } 1204 1205 /* try to use nss_sock */ 1206 nso = nss.nss_sock; 1207 nss.nss_sock = NULL; 1208 1209 /* We may be speaking to portmap first... to determine port(s). */ 1210 if (nso->nso_saddr->sa_family == AF_INET) 1211 port = ntohs(((struct sockaddr_in*)nso->nso_saddr)->sin_port); 1212 else 1213 port = ntohs(((struct sockaddr_in6*)nso->nso_saddr)->sin6_port); 1214 if (port == PMAPPORT) { 1215 /* Use this portmapper port to get the port #s we need. */ 1216 NFS_SOCK_DBG("nfs connect %s got portmapper socket %p\n", 1217 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 1218 1219 /* remove the connect upcall so nfs_portmap_lookup() can use this socket */ 1220 sock_setupcall(nso->nso_so, NULL, NULL); 1221 1222 /* Set up socket address and port for NFS socket. */ 1223 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len); 1224 1225 /* If NFS version not set, try NFSv3 then NFSv2. */ 1226 nfsvers = nmp->nm_vers ? nmp->nm_vers : NFS_VER3; 1227 1228 if (!(port = nmp->nm_nfsport)) { 1229 if (ss.ss_family == AF_INET) 1230 ((struct sockaddr_in*)&ss)->sin_port = htons(0); 1231 else if (ss.ss_family == AF_INET6) 1232 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0); 1233 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, 1234 nso->nso_so, NFS_PROG, nfsvers, 1235 (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo); 1236 if (!error) { 1237 if (ss.ss_family == AF_INET) 1238 port = ntohs(((struct sockaddr_in*)&ss)->sin_port); 1239 else if (ss.ss_family == AF_INET6) 1240 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); 1241 if (!port) 1242 error = EPROGUNAVAIL; 1243 } 1244 if (error && !nmp->nm_vers) { 1245 nfsvers = NFS_VER2; 1246 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, 1247 nso->nso_so, NFS_PROG, nfsvers, 1248 (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo); 1249 if (!error) { 1250 if (ss.ss_family == AF_INET) 1251 port = ntohs(((struct sockaddr_in*)&ss)->sin_port); 1252 else if (ss.ss_family == AF_INET6) 1253 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); 1254 if (!port) 1255 error = EPROGUNAVAIL; 1256 } 1257 } 1258 if (error) { 1259 nfs_socket_search_update_error(&nss, error); 1260 nfs_socket_destroy(nso); 1261 goto keepsearching; 1262 } 1263 } 1264 /* Create NFS protocol socket and add it to the list of sockets. */ 1265 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nso->nso_sotype, port, 1266 NFS_PROG, nfsvers, NMFLAG(nmp, RESVPORT), &nsonfs); 1267 if (error) { 1268 nfs_socket_search_update_error(&nss, error); 1269 nfs_socket_destroy(nso); 1270 goto keepsearching; 1271 } 1272 nsonfs->nso_location = nso->nso_location; 1273 nsonfs->nso_wake = &nss; 1274 error = sock_setupcall(nsonfs->nso_so, nfs_connect_upcall, nsonfs); 1275 if (error) { 1276 nfs_socket_search_update_error(&nss, error); 1277 nfs_socket_destroy(nsonfs); 1278 nfs_socket_destroy(nso); 1279 goto keepsearching; 1280 } 1281 TAILQ_INSERT_TAIL(&nss.nss_socklist, nsonfs, nso_link); 1282 nss.nss_sockcnt++; 1283 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) { 1284 /* Set up socket address and port for MOUNT socket. */ 1285 error = 0; 1286 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len); 1287 port = nmp->nm_mountport; 1288 if (ss.ss_family == AF_INET) 1289 ((struct sockaddr_in*)&ss)->sin_port = htons(port); 1290 else if (ss.ss_family == AF_INET6) 1291 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port); 1292 if (!port) { 1293 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */ 1294 /* If NFS version is unknown, optimistically choose for NFSv3. */ 1295 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3; 1296 int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP; 1297 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, 1298 nso->nso_so, RPCPROG_MNT, mntvers, mntproto, timeo); 1299 } 1300 if (!error) { 1301 if (ss.ss_family == AF_INET) 1302 port = ntohs(((struct sockaddr_in*)&ss)->sin_port); 1303 else if (ss.ss_family == AF_INET6) 1304 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); 1305 if (!port) 1306 error = EPROGUNAVAIL; 1307 } 1308 /* create sockaddr for MOUNT */ 1309 if (!error) 1310 MALLOC(nsonfs->nso_saddr2, struct sockaddr *, ss.ss_len, M_SONAME, M_WAITOK|M_ZERO); 1311 if (!error && !nsonfs->nso_saddr2) 1312 error = ENOMEM; 1313 if (!error) 1314 bcopy(&ss, nsonfs->nso_saddr2, ss.ss_len); 1315 if (error) { 1316 lck_mtx_lock(&nsonfs->nso_lock); 1317 nsonfs->nso_error = error; 1318 nsonfs->nso_flags |= NSO_DEAD; 1319 lck_mtx_unlock(&nsonfs->nso_lock); 1320 } 1321 } 1322 nfs_socket_destroy(nso); 1323 goto keepsearching; 1324 } 1325 1326 /* nso is an NFS socket */ 1327 NFS_SOCK_DBG("nfs connect %s got NFS socket %p\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 1328 1329 /* If NFS version wasn't specified, it was determined during the connect. */ 1330 nfsvers = nmp->nm_vers ? nmp->nm_vers : (int)nso->nso_version; 1331 1332 /* Perform MOUNT call for initial NFSv2/v3 connection/mount. */ 1333 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) { 1334 error = 0; 1335 saddr = nso->nso_saddr2; 1336 if (!saddr) { 1337 /* Need sockaddr for MOUNT port */ 1338 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len); 1339 port = nmp->nm_mountport; 1340 if (ss.ss_family == AF_INET) 1341 ((struct sockaddr_in*)&ss)->sin_port = htons(port); 1342 else if (ss.ss_family == AF_INET6) 1343 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port); 1344 if (!port) { 1345 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */ 1346 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3; 1347 int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP; 1348 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, 1349 NULL, RPCPROG_MNT, mntvers, mntproto, timeo); 1350 if (ss.ss_family == AF_INET) 1351 port = ntohs(((struct sockaddr_in*)&ss)->sin_port); 1352 else if (ss.ss_family == AF_INET6) 1353 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); 1354 } 1355 if (!error) { 1356 if (port) 1357 saddr = (struct sockaddr*)&ss; 1358 else 1359 error = EPROGUNAVAIL; 1360 } 1361 } 1362 if (saddr) 1363 MALLOC(fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK|M_ZERO); 1364 if (saddr && fh) 1365 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); 1366 if (!saddr || !fh || !path) { 1367 if (!error) 1368 error = ENOMEM; 1369 if (fh) 1370 FREE(fh, M_TEMP); 1371 if (path) 1372 FREE_ZONE(path, MAXPATHLEN, M_NAMEI); 1373 fh = NULL; 1374 path = NULL; 1375 nfs_socket_search_update_error(&nss, error); 1376 nfs_socket_destroy(nso); 1377 goto keepsearching; 1378 } 1379 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, path, MAXPATHLEN, 1); 1380 error = nfs3_mount_rpc(nmp, saddr, nso->nso_sotype, nfsvers, 1381 path, vfs_context_current(), timeo, fh, &nmp->nm_servsec); 1382 NFS_SOCK_DBG("nfs connect %s socket %p mount %d\n", 1383 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); 1384 if (!error) { 1385 /* Make sure we can agree on a security flavor. */ 1386 int o, s; /* indices into mount option and server security flavor lists */ 1387 int found = 0; 1388 1389 if ((nfsvers == NFS_VER3) && !nmp->nm_servsec.count) { 1390 /* Some servers return an empty list to indicate RPCAUTH_SYS? */ 1391 nmp->nm_servsec.count = 1; 1392 nmp->nm_servsec.flavors[0] = RPCAUTH_SYS; 1393 } 1394 if (nmp->nm_sec.count) { 1395 /* Choose the first flavor in our list that the server supports. */ 1396 if (!nmp->nm_servsec.count) { 1397 /* we don't know what the server supports, just use our first choice */ 1398 nmp->nm_auth = nmp->nm_sec.flavors[0]; 1399 found = 1; 1400 } 1401 for (o=0; !found && (o < nmp->nm_sec.count); o++) 1402 for (s=0; !found && (s < nmp->nm_servsec.count); s++) 1403 if (nmp->nm_sec.flavors[o] == nmp->nm_servsec.flavors[s]) { 1404 nmp->nm_auth = nmp->nm_sec.flavors[o]; 1405 found = 1; 1406 } 1407 } else { 1408 /* Choose the first one we support from the server's list. */ 1409 if (!nmp->nm_servsec.count) { 1410 nmp->nm_auth = RPCAUTH_SYS; 1411 found = 1; 1412 } 1413 for (s=0; s < nmp->nm_servsec.count; s++) 1414 switch (nmp->nm_servsec.flavors[s]) { 1415 case RPCAUTH_SYS: 1416 /* prefer RPCAUTH_SYS to RPCAUTH_NONE */ 1417 if (found && (nmp->nm_auth == RPCAUTH_NONE)) 1418 found = 0; 1419 case RPCAUTH_NONE: 1420 case RPCAUTH_KRB5: 1421 case RPCAUTH_KRB5I: 1422 case RPCAUTH_KRB5P: 1423 if (!found) { 1424 nmp->nm_auth = nmp->nm_servsec.flavors[s]; 1425 found = 1; 1426 } 1427 break; 1428 } 1429 } 1430 error = !found ? EAUTH : 0; 1431 } 1432 FREE_ZONE(path, MAXPATHLEN, M_NAMEI); 1433 path = NULL; 1434 if (error) { 1435 nfs_socket_search_update_error(&nss, error); 1436 FREE(fh, M_TEMP); 1437 fh = NULL; 1438 nfs_socket_destroy(nso); 1439 goto keepsearching; 1440 } 1441 if (nmp->nm_fh) 1442 FREE(nmp->nm_fh, M_TEMP); 1443 nmp->nm_fh = fh; 1444 fh = NULL; 1445 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_CALLUMNT); 1446 } 1447 1448 /* put the real upcall in place */ 1449 upcall = (nso->nso_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv; 1450 error = sock_setupcall(nso->nso_so, upcall, nmp); 1451 if (error) { 1452 nfs_socket_search_update_error(&nss, error); 1453 nfs_socket_destroy(nso); 1454 goto keepsearching; 1455 } 1456 1457 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { 1458 /* set mntfromname to this location */ 1459 if (!NM_OMATTR_GIVEN(nmp, MNTFROM)) 1460 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, 1461 vfs_statfs(nmp->nm_mountp)->f_mntfromname, 1462 sizeof(vfs_statfs(nmp->nm_mountp)->f_mntfromname), 0); 1463 /* some negotiated values need to remain unchanged for the life of the mount */ 1464 if (!nmp->nm_sotype) 1465 nmp->nm_sotype = nso->nso_sotype; 1466 if (!nmp->nm_vers) { 1467 nmp->nm_vers = nfsvers; 1468 /* If we negotiated NFSv4, set nm_nfsport if we ended up on the standard NFS port */ 1469 if ((nfsvers >= NFS_VER4) && !NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) { 1470 if (nso->nso_saddr->sa_family == AF_INET) 1471 port = ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port); 1472 else if (nso->nso_saddr->sa_family == AF_INET6) 1473 port = ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port); 1474 else 1475 port = 0; 1476 if (port == NFS_PORT) 1477 nmp->nm_nfsport = NFS_PORT; 1478 } 1479 } 1480 /* do some version-specific pre-mount set up */ 1481 if (nmp->nm_vers >= NFS_VER4) { 1482 microtime(&now); 1483 nmp->nm_mounttime = ((uint64_t)now.tv_sec << 32) | now.tv_usec; 1484 if (!NMFLAG(nmp, NOCALLBACK)) 1485 nfs4_mount_callback_setup(nmp); 1486 } 1487 } 1488 1489 /* Initialize NFS socket state variables */ 1490 lck_mtx_lock(&nmp->nm_lock); 1491 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = 1492 nmp->nm_srtt[3] = (NFS_TIMEO << 3); 1493 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 1494 nmp->nm_sdrtt[3] = 0; 1495 if (nso->nso_sotype == SOCK_DGRAM) { 1496 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 1497 nmp->nm_sent = 0; 1498 } else if (nso->nso_sotype == SOCK_STREAM) { 1499 nmp->nm_timeouts = 0; 1500 } 1501 nmp->nm_sockflags &= ~NMSOCK_CONNECTING; 1502 nmp->nm_sockflags |= NMSOCK_SETUP; 1503 /* move the socket to the mount structure */ 1504 nmp->nm_nso = nso; 1505 oldsaddr = nmp->nm_saddr; 1506 nmp->nm_saddr = nso->nso_saddr; 1507 lck_mtx_unlock(&nmp->nm_lock); 1508 error = nfs_connect_setup(nmp); 1509 lck_mtx_lock(&nmp->nm_lock); 1510 nmp->nm_sockflags &= ~NMSOCK_SETUP; 1511 if (!error) { 1512 nmp->nm_sockflags |= NMSOCK_READY; 1513 wakeup(&nmp->nm_sockflags); 1514 } 1515 if (error) { 1516 NFS_SOCK_DBG("nfs connect %s socket %p setup failed %d\n", 1517 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); 1518 nfs_socket_search_update_error(&nss, error); 1519 nmp->nm_saddr = oldsaddr; 1520 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { 1521 /* undo settings made prior to setup */ 1522 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_SOCKET_TYPE)) 1523 nmp->nm_sotype = 0; 1524 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_VERSION)) { 1525 if (nmp->nm_vers >= NFS_VER4) { 1526 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) 1527 nmp->nm_nfsport = 0; 1528 if (nmp->nm_cbid) 1529 nfs4_mount_callback_shutdown(nmp); 1530 if (IS_VALID_CRED(nmp->nm_mcred)) 1531 kauth_cred_unref(&nmp->nm_mcred); 1532 bzero(&nmp->nm_un, sizeof(nmp->nm_un)); 1533 } 1534 nmp->nm_vers = 0; 1535 } 1536 } 1537 lck_mtx_unlock(&nmp->nm_lock); 1538 nmp->nm_nso = NULL; 1539 nfs_socket_destroy(nso); 1540 goto keepsearching; 1541 } 1542 1543 /* update current location */ 1544 if ((nmp->nm_locations.nl_current.nli_flags & NLI_VALID) && 1545 (nmp->nm_locations.nl_current.nli_serv != nso->nso_location.nli_serv)) { 1546 /* server has changed, we should initiate failover/recovery */ 1547 // XXX 1548 } 1549 nmp->nm_locations.nl_current = nso->nso_location; 1550 nmp->nm_locations.nl_current.nli_flags |= NLI_VALID; 1551 1552 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { 1553 /* We have now successfully connected... make a note of it. */ 1554 nmp->nm_sockflags |= NMSOCK_HASCONNECTED; 1555 } 1556 1557 lck_mtx_unlock(&nmp->nm_lock); 1558 if (oldsaddr) 1559 FREE(oldsaddr, M_SONAME); 1560 1561 if (nss.nss_flags & NSS_WARNED) 1562 log(LOG_INFO, "nfs_connect: socket connect completed for %s\n", 1563 vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1564 1565 nmp->nm_nss = NULL; 1566 nfs_socket_search_cleanup(&nss); 1567 if (fh) 1568 FREE(fh, M_TEMP); 1569 if (path) 1570 FREE_ZONE(path, MAXPATHLEN, M_NAMEI); 1571 NFS_SOCK_DBG("nfs connect %s success\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1572 return (0); 1573} 1574 1575 1576/* setup & confirm socket connection is functional */ 1577int 1578nfs_connect_setup(struct nfsmount *nmp) 1579{ 1580 int error = 0; 1581 1582 if (nmp->nm_vers >= NFS_VER4) { 1583 if (nmp->nm_state & NFSSTA_CLIENTID) { 1584 /* first, try to renew our current state */ 1585 error = nfs4_renew(nmp, R_SETUP); 1586 if ((error == NFSERR_ADMIN_REVOKED) || 1587 (error == NFSERR_CB_PATH_DOWN) || 1588 (error == NFSERR_EXPIRED) || 1589 (error == NFSERR_LEASE_MOVED) || 1590 (error == NFSERR_STALE_CLIENTID)) { 1591 lck_mtx_lock(&nmp->nm_lock); 1592 nfs_need_recover(nmp, error); 1593 lck_mtx_unlock(&nmp->nm_lock); 1594 } 1595 } 1596 error = nfs4_setclientid(nmp); 1597 } 1598 return (error); 1599} 1600 1601/* 1602 * NFS socket reconnect routine: 1603 * Called when a connection is broken. 1604 * - disconnect the old socket 1605 * - nfs_connect() again 1606 * - set R_MUSTRESEND for all outstanding requests on mount point 1607 * If this fails the mount point is DEAD! 1608 */ 1609int 1610nfs_reconnect(struct nfsmount *nmp) 1611{ 1612 struct nfsreq *rq; 1613 struct timeval now; 1614 thread_t thd = current_thread(); 1615 int error, wentdown = 0, verbose = 1; 1616 time_t lastmsg; 1617 int timeo; 1618 1619 microuptime(&now); 1620 lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay); 1621 1622 nfs_disconnect(nmp); 1623 1624 1625 lck_mtx_lock(&nmp->nm_lock); 1626 timeo = nfs_is_squishy(nmp) ? 8 : 30; 1627 lck_mtx_unlock(&nmp->nm_lock); 1628 1629 while ((error = nfs_connect(nmp, verbose, timeo))) { 1630 verbose = 0; 1631 nfs_disconnect(nmp); 1632 if ((error == EINTR) || (error == ERESTART)) 1633 return (EINTR); 1634 if (error == EIO) 1635 return (EIO); 1636 microuptime(&now); 1637 if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) { 1638 lastmsg = now.tv_sec; 1639 nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect", 0); 1640 wentdown = 1; 1641 } 1642 lck_mtx_lock(&nmp->nm_lock); 1643 if (!(nmp->nm_state & NFSSTA_MOUNTED)) { 1644 /* we're not yet completely mounted and */ 1645 /* we can't reconnect, so we fail */ 1646 lck_mtx_unlock(&nmp->nm_lock); 1647 NFS_SOCK_DBG("Not mounted returning %d\n", error); 1648 return (error); 1649 } 1650 1651 if (nfs_mount_check_dead_timeout(nmp)) { 1652 nfs_mount_make_zombie(nmp); 1653 lck_mtx_unlock(&nmp->nm_lock); 1654 return (ENXIO); 1655 } 1656 1657 if ((error = nfs_sigintr(nmp, NULL, thd, 1))) { 1658 lck_mtx_unlock(&nmp->nm_lock); 1659 return (error); 1660 } 1661 lck_mtx_unlock(&nmp->nm_lock); 1662 tsleep(nfs_reconnect, PSOCK, "nfs_reconnect_delay", 2*hz); 1663 if ((error = nfs_sigintr(nmp, NULL, thd, 0))) 1664 return (error); 1665 } 1666 1667 if (wentdown) 1668 nfs_up(nmp, thd, NFSSTA_TIMEO, "connected"); 1669 1670 /* 1671 * Loop through outstanding request list and mark all requests 1672 * as needing a resend. (Though nfs_need_reconnect() probably 1673 * marked them all already.) 1674 */ 1675 lck_mtx_lock(nfs_request_mutex); 1676 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) { 1677 if (rq->r_nmp == nmp) { 1678 lck_mtx_lock(&rq->r_mtx); 1679 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) { 1680 rq->r_flags |= R_MUSTRESEND; 1681 rq->r_rtt = -1; 1682 wakeup(rq); 1683 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) 1684 nfs_asyncio_resend(rq); 1685 } 1686 lck_mtx_unlock(&rq->r_mtx); 1687 } 1688 } 1689 lck_mtx_unlock(nfs_request_mutex); 1690 return (0); 1691} 1692 1693/* 1694 * NFS disconnect. Clean up and unlink. 1695 */ 1696void 1697nfs_disconnect(struct nfsmount *nmp) 1698{ 1699 struct nfs_socket *nso; 1700 1701 lck_mtx_lock(&nmp->nm_lock); 1702tryagain: 1703 if (nmp->nm_nso) { 1704 struct timespec ts = { 1, 0 }; 1705 if (nmp->nm_state & NFSSTA_SENDING) { /* wait for sending to complete */ 1706 nmp->nm_state |= NFSSTA_WANTSND; 1707 msleep(&nmp->nm_state, &nmp->nm_lock, PZERO-1, "nfswaitsending", &ts); 1708 goto tryagain; 1709 } 1710 if (nmp->nm_sockflags & NMSOCK_POKE) { /* wait for poking to complete */ 1711 msleep(&nmp->nm_sockflags, &nmp->nm_lock, PZERO-1, "nfswaitpoke", &ts); 1712 goto tryagain; 1713 } 1714 nmp->nm_sockflags |= NMSOCK_DISCONNECTING; 1715 nmp->nm_sockflags &= ~NMSOCK_READY; 1716 nso = nmp->nm_nso; 1717 nmp->nm_nso = NULL; 1718 if (nso->nso_saddr == nmp->nm_saddr) 1719 nso->nso_saddr = NULL; 1720 lck_mtx_unlock(&nmp->nm_lock); 1721 nfs_socket_destroy(nso); 1722 lck_mtx_lock(&nmp->nm_lock); 1723 nmp->nm_sockflags &= ~NMSOCK_DISCONNECTING; 1724 lck_mtx_unlock(&nmp->nm_lock); 1725 } else { 1726 lck_mtx_unlock(&nmp->nm_lock); 1727 } 1728} 1729 1730/* 1731 * mark an NFS mount as needing a reconnect/resends. 1732 */ 1733void 1734nfs_need_reconnect(struct nfsmount *nmp) 1735{ 1736 struct nfsreq *rq; 1737 1738 lck_mtx_lock(&nmp->nm_lock); 1739 nmp->nm_sockflags &= ~(NMSOCK_READY|NMSOCK_SETUP); 1740 lck_mtx_unlock(&nmp->nm_lock); 1741 1742 /* 1743 * Loop through outstanding request list and 1744 * mark all requests as needing a resend. 1745 */ 1746 lck_mtx_lock(nfs_request_mutex); 1747 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) { 1748 if (rq->r_nmp == nmp) { 1749 lck_mtx_lock(&rq->r_mtx); 1750 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) { 1751 rq->r_flags |= R_MUSTRESEND; 1752 rq->r_rtt = -1; 1753 wakeup(rq); 1754 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) 1755 nfs_asyncio_resend(rq); 1756 } 1757 lck_mtx_unlock(&rq->r_mtx); 1758 } 1759 } 1760 lck_mtx_unlock(nfs_request_mutex); 1761} 1762 1763 1764/* 1765 * thread to handle miscellaneous async NFS socket work (reconnects/resends) 1766 */ 1767void 1768nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) 1769{ 1770 struct nfsmount *nmp = arg; 1771 struct timespec ts = { 30, 0 }; 1772 thread_t thd = current_thread(); 1773 struct nfsreq *req; 1774 struct timeval now; 1775 int error, dofinish; 1776 nfsnode_t np; 1777 int do_reconnect_sleep = 0; 1778 1779 lck_mtx_lock(&nmp->nm_lock); 1780 while (!(nmp->nm_sockflags & NMSOCK_READY) || 1781 !TAILQ_EMPTY(&nmp->nm_resendq) || 1782 !LIST_EMPTY(&nmp->nm_monlist) || 1783 nmp->nm_deadto_start || 1784 (nmp->nm_state & NFSSTA_RECOVER) || 1785 ((nmp->nm_vers >= NFS_VER4) && !TAILQ_EMPTY(&nmp->nm_dreturnq))) 1786 { 1787 if (nmp->nm_sockflags & NMSOCK_UNMOUNT) 1788 break; 1789 /* do reconnect, if necessary */ 1790 if (!(nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) { 1791 if (nmp->nm_reconnect_start <= 0) { 1792 microuptime(&now); 1793 nmp->nm_reconnect_start = now.tv_sec; 1794 } 1795 lck_mtx_unlock(&nmp->nm_lock); 1796 NFS_SOCK_DBG("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1797 /* 1798 * XXX We don't want to call reconnect again right away if returned errors 1799 * before that may not have blocked. This has caused spamming null procs 1800 * from machines in the pass. 1801 */ 1802 if (do_reconnect_sleep) 1803 tsleep(nfs_mount_sock_thread, PSOCK, "nfs_reconnect_sock_thread_delay", hz); 1804 error = nfs_reconnect(nmp); 1805 if (error) { 1806 int lvl = 7; 1807 if (error == EIO || error == EINTR) { 1808 lvl = (do_reconnect_sleep++ % 600) ? 7 : 0; 1809 } 1810 nfs_printf(NFS_FAC_SOCK, lvl, "nfs reconnect %s: returned %d\n", 1811 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error); 1812 } else { 1813 nmp->nm_reconnect_start = 0; 1814 do_reconnect_sleep = 0; 1815 } 1816 lck_mtx_lock(&nmp->nm_lock); 1817 } 1818 if ((nmp->nm_sockflags & NMSOCK_READY) && 1819 (nmp->nm_state & NFSSTA_RECOVER) && 1820 !(nmp->nm_sockflags & NMSOCK_UNMOUNT) && 1821 !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) { 1822 /* perform state recovery */ 1823 lck_mtx_unlock(&nmp->nm_lock); 1824 nfs_recover(nmp); 1825 lck_mtx_lock(&nmp->nm_lock); 1826 } 1827 /* handle NFSv4 delegation returns */ 1828 while ((nmp->nm_vers >= NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) && 1829 (nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER) && 1830 ((np = TAILQ_FIRST(&nmp->nm_dreturnq)))) { 1831 lck_mtx_unlock(&nmp->nm_lock); 1832 nfs4_delegation_return(np, R_RECOVER, thd, nmp->nm_mcred); 1833 lck_mtx_lock(&nmp->nm_lock); 1834 } 1835 /* do resends, if necessary/possible */ 1836 while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) || 1837 (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) && 1838 ((req = TAILQ_FIRST(&nmp->nm_resendq)))) { 1839 if (req->r_resendtime) 1840 microuptime(&now); 1841 while (req && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) && req->r_resendtime && (now.tv_sec < req->r_resendtime)) 1842 req = TAILQ_NEXT(req, r_rchain); 1843 if (!req) 1844 break; 1845 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); 1846 req->r_rchain.tqe_next = NFSREQNOLIST; 1847 lck_mtx_unlock(&nmp->nm_lock); 1848 lck_mtx_lock(&req->r_mtx); 1849 if (req->r_error || req->r_nmrep.nmc_mhead) { 1850 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); 1851 req->r_flags &= ~R_RESENDQ; 1852 wakeup(req); 1853 lck_mtx_unlock(&req->r_mtx); 1854 if (dofinish) 1855 nfs_asyncio_finish(req); 1856 lck_mtx_lock(&nmp->nm_lock); 1857 continue; 1858 } 1859 if ((req->r_flags & R_RESTART) || nfs_request_using_gss(req)) { 1860 req->r_flags &= ~R_RESTART; 1861 req->r_resendtime = 0; 1862 lck_mtx_unlock(&req->r_mtx); 1863 /* async RPCs on GSS mounts need to be rebuilt and resent. */ 1864 nfs_reqdequeue(req); 1865 if (nfs_request_using_gss(req)) { 1866 nfs_gss_clnt_rpcdone(req); 1867 error = nfs_gss_clnt_args_restore(req); 1868 if (error == ENEEDAUTH) 1869 req->r_xid = 0; 1870 } 1871 NFS_SOCK_DBG("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n", 1872 nfs_request_using_gss(req) ? " gss" : "", req->r_procnum, req->r_xid, 1873 req->r_flags, req->r_rtt); 1874 error = nfs_sigintr(nmp, req, req->r_thread, 0); 1875 if (!error) 1876 error = nfs_request_add_header(req); 1877 if (!error) 1878 error = nfs_request_send(req, 0); 1879 lck_mtx_lock(&req->r_mtx); 1880 if (req->r_flags & R_RESENDQ) 1881 req->r_flags &= ~R_RESENDQ; 1882 if (error) 1883 req->r_error = error; 1884 wakeup(req); 1885 dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); 1886 lck_mtx_unlock(&req->r_mtx); 1887 if (dofinish) 1888 nfs_asyncio_finish(req); 1889 lck_mtx_lock(&nmp->nm_lock); 1890 error = 0; 1891 continue; 1892 } 1893 NFS_SOCK_DBG("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n", 1894 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); 1895 error = nfs_sigintr(nmp, req, req->r_thread, 0); 1896 if (!error) { 1897 req->r_flags |= R_SENDING; 1898 lck_mtx_unlock(&req->r_mtx); 1899 error = nfs_send(req, 0); 1900 lck_mtx_lock(&req->r_mtx); 1901 if (!error) { 1902 if (req->r_flags & R_RESENDQ) 1903 req->r_flags &= ~R_RESENDQ; 1904 wakeup(req); 1905 lck_mtx_unlock(&req->r_mtx); 1906 lck_mtx_lock(&nmp->nm_lock); 1907 continue; 1908 } 1909 } 1910 req->r_error = error; 1911 if (req->r_flags & R_RESENDQ) 1912 req->r_flags &= ~R_RESENDQ; 1913 wakeup(req); 1914 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); 1915 lck_mtx_unlock(&req->r_mtx); 1916 if (dofinish) 1917 nfs_asyncio_finish(req); 1918 lck_mtx_lock(&nmp->nm_lock); 1919 } 1920 if (nfs_mount_check_dead_timeout(nmp)) { 1921 nfs_mount_make_zombie(nmp); 1922 break; 1923 } 1924 1925 if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) 1926 break; 1927 /* check monitored nodes, if necessary/possible */ 1928 if (!LIST_EMPTY(&nmp->nm_monlist)) { 1929 nmp->nm_state |= NFSSTA_MONITOR_SCAN; 1930 LIST_FOREACH(np, &nmp->nm_monlist, n_monlink) { 1931 if (!(nmp->nm_sockflags & NMSOCK_READY) || 1932 (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE|NFSSTA_DEAD))) 1933 break; 1934 np->n_mflag |= NMMONSCANINPROG; 1935 lck_mtx_unlock(&nmp->nm_lock); 1936 error = nfs_getattr(np, NULL, vfs_context_kernel(), (NGA_UNCACHED|NGA_MONITOR)); 1937 if (!error && ISSET(np->n_flag, NUPDATESIZE)) /* update quickly to avoid multiple events */ 1938 nfs_data_update_size(np, 0); 1939 lck_mtx_lock(&nmp->nm_lock); 1940 np->n_mflag &= ~NMMONSCANINPROG; 1941 if (np->n_mflag & NMMONSCANWANT) { 1942 np->n_mflag &= ~NMMONSCANWANT; 1943 wakeup(&np->n_mflag); 1944 } 1945 if (error || !(nmp->nm_sockflags & NMSOCK_READY) || 1946 (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE|NFSSTA_DEAD))) 1947 break; 1948 } 1949 nmp->nm_state &= ~NFSSTA_MONITOR_SCAN; 1950 if (nmp->nm_state & NFSSTA_UNMOUNTING) 1951 wakeup(&nmp->nm_state); /* let unmounting thread know scan is done */ 1952 } 1953 if ((nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING))) { 1954 if (nmp->nm_deadto_start || !TAILQ_EMPTY(&nmp->nm_resendq) || 1955 (nmp->nm_state & NFSSTA_RECOVER)) 1956 ts.tv_sec = 1; 1957 else 1958 ts.tv_sec = 5; 1959 msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts); 1960 } 1961 } 1962 1963 /* If we're unmounting, send the unmount RPC, if requested/appropriate. */ 1964 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) && 1965 (nmp->nm_state & NFSSTA_MOUNTED) && NMFLAG(nmp, CALLUMNT) && 1966 (nmp->nm_vers < NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) { 1967 lck_mtx_unlock(&nmp->nm_lock); 1968 nfs3_umount_rpc(nmp, vfs_context_kernel(), 1969 (nmp->nm_sockflags & NMSOCK_READY) ? 6 : 2); 1970 lck_mtx_lock(&nmp->nm_lock); 1971 } 1972 1973 if (nmp->nm_sockthd == thd) 1974 nmp->nm_sockthd = NULL; 1975 lck_mtx_unlock(&nmp->nm_lock); 1976 wakeup(&nmp->nm_sockthd); 1977 thread_terminate(thd); 1978} 1979 1980/* start or wake a mount's socket thread */ 1981void 1982nfs_mount_sock_thread_wake(struct nfsmount *nmp) 1983{ 1984 if (nmp->nm_sockthd) 1985 wakeup(&nmp->nm_sockthd); 1986 else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS) 1987 thread_deallocate(nmp->nm_sockthd); 1988} 1989 1990/* 1991 * Check if we should mark the mount dead because the 1992 * unresponsive mount has reached the dead timeout. 1993 * (must be called with nmp locked) 1994 */ 1995int 1996nfs_mount_check_dead_timeout(struct nfsmount *nmp) 1997{ 1998 struct timeval now; 1999 2000 if (nmp->nm_state & NFSSTA_DEAD) 2001 return 1; 2002 if (nmp->nm_deadto_start == 0) 2003 return 0; 2004 nfs_is_squishy(nmp); 2005 if (nmp->nm_curdeadtimeout <= 0) 2006 return 0; 2007 microuptime(&now); 2008 if ((now.tv_sec - nmp->nm_deadto_start) < nmp->nm_curdeadtimeout) 2009 return 0; 2010 return 1; 2011} 2012 2013/* 2014 * Call nfs_mount_zombie to remove most of the 2015 * nfs state for the mount, and then ask to be forcibly unmounted. 2016 * 2017 * Assumes the nfs mount structure lock nm_lock is held. 2018 */ 2019 2020void 2021nfs_mount_make_zombie(struct nfsmount *nmp) 2022{ 2023 fsid_t fsid; 2024 2025 if (!nmp) 2026 return; 2027 2028 if (nmp->nm_state & NFSSTA_DEAD) 2029 return; 2030 2031 printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, 2032 (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : ""); 2033 fsid = vfs_statfs(nmp->nm_mountp)->f_fsid; 2034 lck_mtx_unlock(&nmp->nm_lock); 2035 nfs_mount_zombie(nmp, NFSSTA_DEAD); 2036 vfs_event_signal(&fsid, VQ_DEAD, 0); 2037 lck_mtx_lock(&nmp->nm_lock); 2038} 2039 2040 2041/* 2042 * NFS callback channel socket state 2043 */ 2044struct nfs_callback_socket 2045{ 2046 TAILQ_ENTRY(nfs_callback_socket) ncbs_link; 2047 socket_t ncbs_so; /* the socket */ 2048 struct sockaddr_storage ncbs_saddr; /* socket address */ 2049 struct nfs_rpc_record_state ncbs_rrs; /* RPC record parsing state */ 2050 time_t ncbs_stamp; /* last accessed at */ 2051 uint32_t ncbs_flags; /* see below */ 2052}; 2053#define NCBSOCK_UPCALL 0x0001 2054#define NCBSOCK_UPCALLWANT 0x0002 2055#define NCBSOCK_DEAD 0x0004 2056 2057/* 2058 * NFS callback channel state 2059 * 2060 * One listening socket for accepting socket connections from servers and 2061 * a list of connected sockets to handle callback requests on. 2062 * Mounts registered with the callback channel are assigned IDs and 2063 * put on a list so that the callback request handling code can match 2064 * the requests up with mounts. 2065 */ 2066socket_t nfs4_cb_so = NULL; 2067socket_t nfs4_cb_so6 = NULL; 2068in_port_t nfs4_cb_port = 0; 2069in_port_t nfs4_cb_port6 = 0; 2070uint32_t nfs4_cb_id = 0; 2071uint32_t nfs4_cb_so_usecount = 0; 2072TAILQ_HEAD(nfs4_cb_sock_list,nfs_callback_socket) nfs4_cb_socks; 2073TAILQ_HEAD(nfs4_cb_mount_list,nfsmount) nfs4_cb_mounts; 2074 2075int nfs4_cb_handler(struct nfs_callback_socket *, mbuf_t); 2076 2077/* 2078 * Set up the callback channel for the NFS mount. 2079 * 2080 * Initializes the callback channel socket state and 2081 * assigns a callback ID to the mount. 2082 */ 2083void 2084nfs4_mount_callback_setup(struct nfsmount *nmp) 2085{ 2086 struct sockaddr_in sin; 2087 struct sockaddr_in6 sin6; 2088 socket_t so = NULL; 2089 socket_t so6 = NULL; 2090 struct timeval timeo; 2091 int error, on = 1; 2092 in_port_t port; 2093 2094 lck_mtx_lock(nfs_global_mutex); 2095 if (nfs4_cb_id == 0) { 2096 TAILQ_INIT(&nfs4_cb_mounts); 2097 TAILQ_INIT(&nfs4_cb_socks); 2098 nfs4_cb_id++; 2099 } 2100 nmp->nm_cbid = nfs4_cb_id++; 2101 if (nmp->nm_cbid == 0) 2102 nmp->nm_cbid = nfs4_cb_id++; 2103 nfs4_cb_so_usecount++; 2104 TAILQ_INSERT_HEAD(&nfs4_cb_mounts, nmp, nm_cblink); 2105 2106 if (nfs4_cb_so) { 2107 lck_mtx_unlock(nfs_global_mutex); 2108 return; 2109 } 2110 2111 /* IPv4 */ 2112 error = sock_socket(AF_INET, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so); 2113 if (error) { 2114 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv4 socket\n", error); 2115 goto fail; 2116 } 2117 so = nfs4_cb_so; 2118 2119 sock_setsockopt(so, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); 2120 sin.sin_len = sizeof(struct sockaddr_in); 2121 sin.sin_family = AF_INET; 2122 sin.sin_addr.s_addr = htonl(INADDR_ANY); 2123 sin.sin_port = htons(nfs_callback_port); /* try to use specified port */ 2124 error = sock_bind(so, (struct sockaddr *)&sin); 2125 if (error) { 2126 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv4 socket\n", error); 2127 goto fail; 2128 } 2129 error = sock_getsockname(so, (struct sockaddr *)&sin, sin.sin_len); 2130 if (error) { 2131 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv4 socket port\n", error); 2132 goto fail; 2133 } 2134 nfs4_cb_port = ntohs(sin.sin_port); 2135 2136 error = sock_listen(so, 32); 2137 if (error) { 2138 log(LOG_INFO, "nfs callback setup: error %d on IPv4 listen\n", error); 2139 goto fail; 2140 } 2141 2142 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ 2143 timeo.tv_usec = 0; 2144 timeo.tv_sec = 60; 2145 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 2146 if (error) 2147 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket rx timeout\n", error); 2148 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 2149 if (error) 2150 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket tx timeout\n", error); 2151 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 2152 sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); 2153 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); 2154 error = 0; 2155 2156 /* IPv6 */ 2157 error = sock_socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so6); 2158 if (error) { 2159 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv6 socket\n", error); 2160 goto fail; 2161 } 2162 so6 = nfs4_cb_so6; 2163 2164 sock_setsockopt(so6, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); 2165 sock_setsockopt(so6, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)); 2166 /* try to use specified port or same port as IPv4 */ 2167 port = nfs_callback_port ? nfs_callback_port : nfs4_cb_port; 2168ipv6_bind_again: 2169 sin6.sin6_len = sizeof(struct sockaddr_in6); 2170 sin6.sin6_family = AF_INET6; 2171 sin6.sin6_addr = in6addr_any; 2172 sin6.sin6_port = htons(port); 2173 error = sock_bind(so6, (struct sockaddr *)&sin6); 2174 if (error) { 2175 if (port != nfs_callback_port) { 2176 /* if we simply tried to match the IPv4 port, then try any port */ 2177 port = 0; 2178 goto ipv6_bind_again; 2179 } 2180 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv6 socket\n", error); 2181 goto fail; 2182 } 2183 error = sock_getsockname(so6, (struct sockaddr *)&sin6, sin6.sin6_len); 2184 if (error) { 2185 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv6 socket port\n", error); 2186 goto fail; 2187 } 2188 nfs4_cb_port6 = ntohs(sin6.sin6_port); 2189 2190 error = sock_listen(so6, 32); 2191 if (error) { 2192 log(LOG_INFO, "nfs callback setup: error %d on IPv6 listen\n", error); 2193 goto fail; 2194 } 2195 2196 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ 2197 timeo.tv_usec = 0; 2198 timeo.tv_sec = 60; 2199 error = sock_setsockopt(so6, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 2200 if (error) 2201 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket rx timeout\n", error); 2202 error = sock_setsockopt(so6, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 2203 if (error) 2204 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket tx timeout\n", error); 2205 sock_setsockopt(so6, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 2206 sock_setsockopt(so6, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); 2207 sock_setsockopt(so6, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); 2208 error = 0; 2209 2210fail: 2211 if (error) { 2212 nfs4_cb_so = nfs4_cb_so6 = NULL; 2213 lck_mtx_unlock(nfs_global_mutex); 2214 if (so) { 2215 sock_shutdown(so, SHUT_RDWR); 2216 sock_close(so); 2217 } 2218 if (so6) { 2219 sock_shutdown(so6, SHUT_RDWR); 2220 sock_close(so6); 2221 } 2222 } else { 2223 lck_mtx_unlock(nfs_global_mutex); 2224 } 2225} 2226 2227/* 2228 * Shut down the callback channel for the NFS mount. 2229 * 2230 * Clears the mount's callback ID and releases the mounts 2231 * reference on the callback socket. Last reference dropped 2232 * will also shut down the callback socket(s). 2233 */ 2234void 2235nfs4_mount_callback_shutdown(struct nfsmount *nmp) 2236{ 2237 struct nfs_callback_socket *ncbsp; 2238 socket_t so, so6; 2239 struct nfs4_cb_sock_list cb_socks; 2240 struct timespec ts = {1,0}; 2241 2242 lck_mtx_lock(nfs_global_mutex); 2243 TAILQ_REMOVE(&nfs4_cb_mounts, nmp, nm_cblink); 2244 /* wait for any callbacks in progress to complete */ 2245 while (nmp->nm_cbrefs) 2246 msleep(&nmp->nm_cbrefs, nfs_global_mutex, PSOCK, "cbshutwait", &ts); 2247 nmp->nm_cbid = 0; 2248 if (--nfs4_cb_so_usecount) { 2249 lck_mtx_unlock(nfs_global_mutex); 2250 return; 2251 } 2252 so = nfs4_cb_so; 2253 so6 = nfs4_cb_so6; 2254 nfs4_cb_so = nfs4_cb_so6 = NULL; 2255 TAILQ_INIT(&cb_socks); 2256 TAILQ_CONCAT(&cb_socks, &nfs4_cb_socks, ncbs_link); 2257 lck_mtx_unlock(nfs_global_mutex); 2258 if (so) { 2259 sock_shutdown(so, SHUT_RDWR); 2260 sock_close(so); 2261 } 2262 if (so6) { 2263 sock_shutdown(so6, SHUT_RDWR); 2264 sock_close(so6); 2265 } 2266 while ((ncbsp = TAILQ_FIRST(&cb_socks))) { 2267 TAILQ_REMOVE(&cb_socks, ncbsp, ncbs_link); 2268 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR); 2269 sock_close(ncbsp->ncbs_so); 2270 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs); 2271 FREE(ncbsp, M_TEMP); 2272 } 2273} 2274 2275/* 2276 * Check periodically for stale/unused nfs callback sockets 2277 */ 2278#define NFS4_CB_TIMER_PERIOD 30 2279#define NFS4_CB_IDLE_MAX 300 2280void 2281nfs4_callback_timer(__unused void *param0, __unused void *param1) 2282{ 2283 struct nfs_callback_socket *ncbsp, *nextncbsp; 2284 struct timeval now; 2285 2286loop: 2287 lck_mtx_lock(nfs_global_mutex); 2288 if (TAILQ_EMPTY(&nfs4_cb_socks)) { 2289 nfs4_callback_timer_on = 0; 2290 lck_mtx_unlock(nfs_global_mutex); 2291 return; 2292 } 2293 microuptime(&now); 2294 TAILQ_FOREACH_SAFE(ncbsp, &nfs4_cb_socks, ncbs_link, nextncbsp) { 2295 if (!(ncbsp->ncbs_flags & NCBSOCK_DEAD) && 2296 (now.tv_sec < (ncbsp->ncbs_stamp + NFS4_CB_IDLE_MAX))) 2297 continue; 2298 TAILQ_REMOVE(&nfs4_cb_socks, ncbsp, ncbs_link); 2299 lck_mtx_unlock(nfs_global_mutex); 2300 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR); 2301 sock_close(ncbsp->ncbs_so); 2302 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs); 2303 FREE(ncbsp, M_TEMP); 2304 goto loop; 2305 } 2306 nfs4_callback_timer_on = 1; 2307 nfs_interval_timer_start(nfs4_callback_timer_call, 2308 NFS4_CB_TIMER_PERIOD * 1000); 2309 lck_mtx_unlock(nfs_global_mutex); 2310} 2311 2312/* 2313 * Accept a new callback socket. 2314 */ 2315void 2316nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag) 2317{ 2318 socket_t newso = NULL; 2319 struct nfs_callback_socket *ncbsp; 2320 struct nfsmount *nmp; 2321 struct timeval timeo, now; 2322 int error, on = 1, ip; 2323 2324 if (so == nfs4_cb_so) 2325 ip = 4; 2326 else if (so == nfs4_cb_so6) 2327 ip = 6; 2328 else 2329 return; 2330 2331 /* allocate/initialize a new nfs_callback_socket */ 2332 MALLOC(ncbsp, struct nfs_callback_socket *, sizeof(struct nfs_callback_socket), M_TEMP, M_WAITOK); 2333 if (!ncbsp) { 2334 log(LOG_ERR, "nfs callback accept: no memory for new socket\n"); 2335 return; 2336 } 2337 bzero(ncbsp, sizeof(*ncbsp)); 2338 ncbsp->ncbs_saddr.ss_len = (ip == 4) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); 2339 nfs_rpc_record_state_init(&ncbsp->ncbs_rrs); 2340 2341 /* accept a new socket */ 2342 error = sock_accept(so, (struct sockaddr*)&ncbsp->ncbs_saddr, 2343 ncbsp->ncbs_saddr.ss_len, MSG_DONTWAIT, 2344 nfs4_cb_rcv, ncbsp, &newso); 2345 if (error) { 2346 log(LOG_INFO, "nfs callback accept: error %d accepting IPv%d socket\n", error, ip); 2347 FREE(ncbsp, M_TEMP); 2348 return; 2349 } 2350 2351 /* set up the new socket */ 2352 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ 2353 timeo.tv_usec = 0; 2354 timeo.tv_sec = 60; 2355 error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 2356 if (error) 2357 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket rx timeout\n", error, ip); 2358 error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 2359 if (error) 2360 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket tx timeout\n", error, ip); 2361 sock_setsockopt(newso, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 2362 sock_setsockopt(newso, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); 2363 sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); 2364 sock_setsockopt(newso, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); 2365 2366 ncbsp->ncbs_so = newso; 2367 microuptime(&now); 2368 ncbsp->ncbs_stamp = now.tv_sec; 2369 2370 lck_mtx_lock(nfs_global_mutex); 2371 2372 /* add it to the list */ 2373 TAILQ_INSERT_HEAD(&nfs4_cb_socks, ncbsp, ncbs_link); 2374 2375 /* verify it's from a host we have mounted */ 2376 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) { 2377 /* check if socket's source address matches this mount's server address */ 2378 if (!nmp->nm_saddr) 2379 continue; 2380 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0) 2381 break; 2382 } 2383 if (!nmp) /* we don't want this socket, mark it dead */ 2384 ncbsp->ncbs_flags |= NCBSOCK_DEAD; 2385 2386 /* make sure the callback socket cleanup timer is running */ 2387 /* (shorten the timer if we've got a socket we don't want) */ 2388 if (!nfs4_callback_timer_on) { 2389 nfs4_callback_timer_on = 1; 2390 nfs_interval_timer_start(nfs4_callback_timer_call, 2391 !nmp ? 500 : (NFS4_CB_TIMER_PERIOD * 1000)); 2392 } else if (!nmp && (nfs4_callback_timer_on < 2)) { 2393 nfs4_callback_timer_on = 2; 2394 thread_call_cancel(nfs4_callback_timer_call); 2395 nfs_interval_timer_start(nfs4_callback_timer_call, 500); 2396 } 2397 2398 lck_mtx_unlock(nfs_global_mutex); 2399} 2400 2401/* 2402 * Receive mbufs from callback sockets into RPC records and process each record. 2403 * Detect connection has been closed and shut down. 2404 */ 2405void 2406nfs4_cb_rcv(socket_t so, void *arg, __unused int waitflag) 2407{ 2408 struct nfs_callback_socket *ncbsp = arg; 2409 struct timespec ts = {1,0}; 2410 struct timeval now; 2411 mbuf_t m; 2412 int error = 0, recv = 1; 2413 2414 lck_mtx_lock(nfs_global_mutex); 2415 while (ncbsp->ncbs_flags & NCBSOCK_UPCALL) { 2416 /* wait if upcall is already in progress */ 2417 ncbsp->ncbs_flags |= NCBSOCK_UPCALLWANT; 2418 msleep(ncbsp, nfs_global_mutex, PSOCK, "cbupcall", &ts); 2419 } 2420 ncbsp->ncbs_flags |= NCBSOCK_UPCALL; 2421 lck_mtx_unlock(nfs_global_mutex); 2422 2423 /* loop while we make error-free progress */ 2424 while (!error && recv) { 2425 error = nfs_rpc_record_read(so, &ncbsp->ncbs_rrs, MSG_DONTWAIT, &recv, &m); 2426 if (m) /* handle the request */ 2427 error = nfs4_cb_handler(ncbsp, m); 2428 } 2429 2430 /* note: no error and no data indicates server closed its end */ 2431 if ((error != EWOULDBLOCK) && (error || !recv)) { 2432 /* 2433 * Socket is either being closed or should be. 2434 * We can't close the socket in the context of the upcall. 2435 * So we mark it as dead and leave it for the cleanup timer to reap. 2436 */ 2437 ncbsp->ncbs_stamp = 0; 2438 ncbsp->ncbs_flags |= NCBSOCK_DEAD; 2439 } else { 2440 microuptime(&now); 2441 ncbsp->ncbs_stamp = now.tv_sec; 2442 } 2443 2444 lck_mtx_lock(nfs_global_mutex); 2445 ncbsp->ncbs_flags &= ~NCBSOCK_UPCALL; 2446 lck_mtx_unlock(nfs_global_mutex); 2447 wakeup(ncbsp); 2448} 2449 2450/* 2451 * Handle an NFS callback channel request. 2452 */ 2453int 2454nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq) 2455{ 2456 socket_t so = ncbsp->ncbs_so; 2457 struct nfsm_chain nmreq, nmrep; 2458 mbuf_t mhead = NULL, mrest = NULL, m; 2459 struct msghdr msg; 2460 struct nfsmount *nmp; 2461 fhandle_t fh; 2462 nfsnode_t np; 2463 nfs_stateid stateid; 2464 uint32_t bitmap[NFS_ATTR_BITMAP_LEN], rbitmap[NFS_ATTR_BITMAP_LEN], bmlen, truncate, attrbytes; 2465 uint32_t val, xid, procnum, taglen, cbid, numops, op, status; 2466 uint32_t auth_type, auth_len; 2467 uint32_t numres, *pnumres; 2468 int error = 0, replen, len; 2469 size_t sentlen = 0; 2470 2471 xid = numops = op = status = procnum = taglen = cbid = 0; 2472 2473 nfsm_chain_dissect_init(error, &nmreq, mreq); 2474 nfsm_chain_get_32(error, &nmreq, xid); // RPC XID 2475 nfsm_chain_get_32(error, &nmreq, val); // RPC Call 2476 nfsm_assert(error, (val == RPC_CALL), EBADRPC); 2477 nfsm_chain_get_32(error, &nmreq, val); // RPC Version 2478 nfsm_assert(error, (val == RPC_VER2), ERPCMISMATCH); 2479 nfsm_chain_get_32(error, &nmreq, val); // RPC Program Number 2480 nfsm_assert(error, (val == NFS4_CALLBACK_PROG), EPROGUNAVAIL); 2481 nfsm_chain_get_32(error, &nmreq, val); // NFS Callback Program Version Number 2482 nfsm_assert(error, (val == NFS4_CALLBACK_PROG_VERSION), EPROGMISMATCH); 2483 nfsm_chain_get_32(error, &nmreq, procnum); // NFS Callback Procedure Number 2484 nfsm_assert(error, (procnum <= NFSPROC4_CB_COMPOUND), EPROCUNAVAIL); 2485 2486 /* Handle authentication */ 2487 /* XXX just ignore auth for now - handling kerberos may be tricky */ 2488 nfsm_chain_get_32(error, &nmreq, auth_type); // RPC Auth Flavor 2489 nfsm_chain_get_32(error, &nmreq, auth_len); // RPC Auth Length 2490 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC); 2491 if (!error && (auth_len > 0)) 2492 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len)); 2493 nfsm_chain_adv(error, &nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE) 2494 nfsm_chain_get_32(error, &nmreq, auth_len); // verifier length 2495 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC); 2496 if (!error && (auth_len > 0)) 2497 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len)); 2498 if (error) { 2499 status = error; 2500 error = 0; 2501 goto nfsmout; 2502 } 2503 2504 switch (procnum) { 2505 case NFSPROC4_CB_NULL: 2506 status = NFSERR_RETVOID; 2507 break; 2508 case NFSPROC4_CB_COMPOUND: 2509 /* tag, minorversion, cb ident, numops, op array */ 2510 nfsm_chain_get_32(error, &nmreq, taglen); /* tag length */ 2511 nfsm_assert(error, (val <= NFS4_OPAQUE_LIMIT), EBADRPC); 2512 2513 /* start building the body of the response */ 2514 nfsm_mbuf_get(error, &mrest, nfsm_rndup(taglen) + 5*NFSX_UNSIGNED); 2515 nfsm_chain_init(&nmrep, mrest); 2516 2517 /* copy tag from request to response */ 2518 nfsm_chain_add_32(error, &nmrep, taglen); /* tag length */ 2519 for (len = (int)taglen; !error && (len > 0); len -= NFSX_UNSIGNED) { 2520 nfsm_chain_get_32(error, &nmreq, val); 2521 nfsm_chain_add_32(error, &nmrep, val); 2522 } 2523 2524 /* insert number of results placeholder */ 2525 numres = 0; 2526 nfsm_chain_add_32(error, &nmrep, numres); 2527 pnumres = (uint32_t*)(nmrep.nmc_ptr - NFSX_UNSIGNED); 2528 2529 nfsm_chain_get_32(error, &nmreq, val); /* minorversion */ 2530 nfsm_assert(error, (val == 0), NFSERR_MINOR_VERS_MISMATCH); 2531 nfsm_chain_get_32(error, &nmreq, cbid); /* callback ID */ 2532 nfsm_chain_get_32(error, &nmreq, numops); /* number of operations */ 2533 if (error) { 2534 if ((error == EBADRPC) || (error == NFSERR_MINOR_VERS_MISMATCH)) 2535 status = error; 2536 else if ((error == ENOBUFS) || (error == ENOMEM)) 2537 status = NFSERR_RESOURCE; 2538 else 2539 status = NFSERR_SERVERFAULT; 2540 error = 0; 2541 nfsm_chain_null(&nmrep); 2542 goto nfsmout; 2543 } 2544 /* match the callback ID to a registered mount */ 2545 lck_mtx_lock(nfs_global_mutex); 2546 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) { 2547 if (nmp->nm_cbid != cbid) 2548 continue; 2549 /* verify socket's source address matches this mount's server address */ 2550 if (!nmp->nm_saddr) 2551 continue; 2552 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0) 2553 break; 2554 } 2555 /* mark the NFS mount as busy */ 2556 if (nmp) 2557 nmp->nm_cbrefs++; 2558 lck_mtx_unlock(nfs_global_mutex); 2559 if (!nmp) { 2560 /* if no mount match, just drop socket. */ 2561 error = EPERM; 2562 nfsm_chain_null(&nmrep); 2563 goto out; 2564 } 2565 2566 /* process ops, adding results to mrest */ 2567 while (numops > 0) { 2568 numops--; 2569 nfsm_chain_get_32(error, &nmreq, op); 2570 if (error) 2571 break; 2572 switch (op) { 2573 case NFS_OP_CB_GETATTR: 2574 // (FH, BITMAP) -> (STATUS, BITMAP, ATTRS) 2575 np = NULL; 2576 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh); 2577 bmlen = NFS_ATTR_BITMAP_LEN; 2578 nfsm_chain_get_bitmap(error, &nmreq, bitmap, bmlen); 2579 if (error) { 2580 status = error; 2581 error = 0; 2582 numops = 0; /* don't process any more ops */ 2583 } else { 2584 /* find the node for the file handle */ 2585 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np); 2586 if (error || !np) { 2587 status = NFSERR_BADHANDLE; 2588 error = 0; 2589 np = NULL; 2590 numops = 0; /* don't process any more ops */ 2591 } 2592 } 2593 nfsm_chain_add_32(error, &nmrep, op); 2594 nfsm_chain_add_32(error, &nmrep, status); 2595 if (!error && (status == EBADRPC)) 2596 error = status; 2597 if (np) { 2598 /* only allow returning size, change, and mtime attrs */ 2599 NFS_CLEAR_ATTRIBUTES(&rbitmap); 2600 attrbytes = 0; 2601 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) { 2602 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_CHANGE); 2603 attrbytes += 2 * NFSX_UNSIGNED; 2604 } 2605 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) { 2606 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_SIZE); 2607 attrbytes += 2 * NFSX_UNSIGNED; 2608 } 2609 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) { 2610 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_TIME_MODIFY); 2611 attrbytes += 3 * NFSX_UNSIGNED; 2612 } 2613 nfsm_chain_add_bitmap(error, &nmrep, rbitmap, NFS_ATTR_BITMAP_LEN); 2614 nfsm_chain_add_32(error, &nmrep, attrbytes); 2615 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) 2616 nfsm_chain_add_64(error, &nmrep, 2617 np->n_vattr.nva_change + ((np->n_flag & NMODIFIED) ? 1 : 0)); 2618 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) 2619 nfsm_chain_add_64(error, &nmrep, np->n_size); 2620 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) { 2621 nfsm_chain_add_64(error, &nmrep, np->n_vattr.nva_timesec[NFSTIME_MODIFY]); 2622 nfsm_chain_add_32(error, &nmrep, np->n_vattr.nva_timensec[NFSTIME_MODIFY]); 2623 } 2624 nfs_node_unlock(np); 2625 vnode_put(NFSTOV(np)); 2626 np = NULL; 2627 } 2628 /* 2629 * If we hit an error building the reply, we can't easily back up. 2630 * So we'll just update the status and hope the server ignores the 2631 * extra garbage. 2632 */ 2633 break; 2634 case NFS_OP_CB_RECALL: 2635 // (STATEID, TRUNCATE, FH) -> (STATUS) 2636 np = NULL; 2637 nfsm_chain_get_stateid(error, &nmreq, &stateid); 2638 nfsm_chain_get_32(error, &nmreq, truncate); 2639 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh); 2640 if (error) { 2641 status = error; 2642 error = 0; 2643 numops = 0; /* don't process any more ops */ 2644 } else { 2645 /* find the node for the file handle */ 2646 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np); 2647 if (error || !np) { 2648 status = NFSERR_BADHANDLE; 2649 error = 0; 2650 np = NULL; 2651 numops = 0; /* don't process any more ops */ 2652 } else if (!(np->n_openflags & N_DELEG_MASK) || 2653 bcmp(&np->n_dstateid, &stateid, sizeof(stateid))) { 2654 /* delegation stateid state doesn't match */ 2655 status = NFSERR_BAD_STATEID; 2656 numops = 0; /* don't process any more ops */ 2657 } 2658 if (!status) /* add node to recall queue, and wake socket thread */ 2659 nfs4_delegation_return_enqueue(np); 2660 if (np) { 2661 nfs_node_unlock(np); 2662 vnode_put(NFSTOV(np)); 2663 } 2664 } 2665 nfsm_chain_add_32(error, &nmrep, op); 2666 nfsm_chain_add_32(error, &nmrep, status); 2667 if (!error && (status == EBADRPC)) 2668 error = status; 2669 break; 2670 case NFS_OP_CB_ILLEGAL: 2671 default: 2672 nfsm_chain_add_32(error, &nmrep, NFS_OP_CB_ILLEGAL); 2673 status = NFSERR_OP_ILLEGAL; 2674 nfsm_chain_add_32(error, &nmrep, status); 2675 numops = 0; /* don't process any more ops */ 2676 break; 2677 } 2678 numres++; 2679 } 2680 2681 if (!status && error) { 2682 if (error == EBADRPC) 2683 status = error; 2684 else if ((error == ENOBUFS) || (error == ENOMEM)) 2685 status = NFSERR_RESOURCE; 2686 else 2687 status = NFSERR_SERVERFAULT; 2688 error = 0; 2689 } 2690 2691 /* Now, set the numres field */ 2692 *pnumres = txdr_unsigned(numres); 2693 nfsm_chain_build_done(error, &nmrep); 2694 nfsm_chain_null(&nmrep); 2695 2696 /* drop the callback reference on the mount */ 2697 lck_mtx_lock(nfs_global_mutex); 2698 nmp->nm_cbrefs--; 2699 if (!nmp->nm_cbid) 2700 wakeup(&nmp->nm_cbrefs); 2701 lck_mtx_unlock(nfs_global_mutex); 2702 break; 2703 } 2704 2705nfsmout: 2706 if (status == EBADRPC) 2707 OSAddAtomic64(1, &nfsstats.rpcinvalid); 2708 2709 /* build reply header */ 2710 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mhead); 2711 nfsm_chain_init(&nmrep, mhead); 2712 nfsm_chain_add_32(error, &nmrep, 0); /* insert space for an RPC record mark */ 2713 nfsm_chain_add_32(error, &nmrep, xid); 2714 nfsm_chain_add_32(error, &nmrep, RPC_REPLY); 2715 if ((status == ERPCMISMATCH) || (status & NFSERR_AUTHERR)) { 2716 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED); 2717 if (status & NFSERR_AUTHERR) { 2718 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR); 2719 nfsm_chain_add_32(error, &nmrep, (status & ~NFSERR_AUTHERR)); 2720 } else { 2721 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH); 2722 nfsm_chain_add_32(error, &nmrep, RPC_VER2); 2723 nfsm_chain_add_32(error, &nmrep, RPC_VER2); 2724 } 2725 } else { 2726 /* reply status */ 2727 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED); 2728 /* XXX RPCAUTH_NULL verifier */ 2729 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL); 2730 nfsm_chain_add_32(error, &nmrep, 0); 2731 /* accepted status */ 2732 switch (status) { 2733 case EPROGUNAVAIL: 2734 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL); 2735 break; 2736 case EPROGMISMATCH: 2737 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH); 2738 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION); 2739 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION); 2740 break; 2741 case EPROCUNAVAIL: 2742 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL); 2743 break; 2744 case EBADRPC: 2745 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE); 2746 break; 2747 default: 2748 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS); 2749 if (status != NFSERR_RETVOID) 2750 nfsm_chain_add_32(error, &nmrep, status); 2751 break; 2752 } 2753 } 2754 nfsm_chain_build_done(error, &nmrep); 2755 if (error) { 2756 nfsm_chain_null(&nmrep); 2757 goto out; 2758 } 2759 error = mbuf_setnext(nmrep.nmc_mcur, mrest); 2760 if (error) { 2761 printf("nfs cb: mbuf_setnext failed %d\n", error); 2762 goto out; 2763 } 2764 mrest = NULL; 2765 /* Calculate the size of the reply */ 2766 replen = 0; 2767 for (m = nmrep.nmc_mhead; m; m = mbuf_next(m)) 2768 replen += mbuf_len(m); 2769 mbuf_pkthdr_setlen(mhead, replen); 2770 error = mbuf_pkthdr_setrcvif(mhead, NULL); 2771 nfsm_chain_set_recmark(error, &nmrep, (replen - NFSX_UNSIGNED) | 0x80000000); 2772 nfsm_chain_null(&nmrep); 2773 2774 /* send the reply */ 2775 bzero(&msg, sizeof(msg)); 2776 error = sock_sendmbuf(so, &msg, mhead, 0, &sentlen); 2777 mhead = NULL; 2778 if (!error && ((int)sentlen != replen)) 2779 error = EWOULDBLOCK; 2780 if (error == EWOULDBLOCK) /* inability to send response is considered fatal */ 2781 error = ETIMEDOUT; 2782out: 2783 if (error) 2784 nfsm_chain_cleanup(&nmrep); 2785 if (mhead) 2786 mbuf_freem(mhead); 2787 if (mrest) 2788 mbuf_freem(mrest); 2789 if (mreq) 2790 mbuf_freem(mreq); 2791 return (error); 2792} 2793 2794 2795/* 2796 * Initialize an nfs_rpc_record_state structure. 2797 */ 2798void 2799nfs_rpc_record_state_init(struct nfs_rpc_record_state *nrrsp) 2800{ 2801 bzero(nrrsp, sizeof(*nrrsp)); 2802 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft); 2803} 2804 2805/* 2806 * Clean up an nfs_rpc_record_state structure. 2807 */ 2808void 2809nfs_rpc_record_state_cleanup(struct nfs_rpc_record_state *nrrsp) 2810{ 2811 if (nrrsp->nrrs_m) { 2812 mbuf_freem(nrrsp->nrrs_m); 2813 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL; 2814 } 2815} 2816 2817/* 2818 * Read the next (marked) RPC record from the socket. 2819 * 2820 * *recvp returns if any data was received. 2821 * *mp returns the next complete RPC record 2822 */ 2823int 2824nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int flags, int *recvp, mbuf_t *mp) 2825{ 2826 struct iovec aio; 2827 struct msghdr msg; 2828 size_t rcvlen; 2829 int error = 0; 2830 mbuf_t m; 2831 2832 *recvp = 0; 2833 *mp = NULL; 2834 2835 /* read the TCP RPC record marker */ 2836 while (!error && nrrsp->nrrs_markerleft) { 2837 aio.iov_base = ((char*)&nrrsp->nrrs_fragleft + 2838 sizeof(nrrsp->nrrs_fragleft) - nrrsp->nrrs_markerleft); 2839 aio.iov_len = nrrsp->nrrs_markerleft; 2840 bzero(&msg, sizeof(msg)); 2841 msg.msg_iov = &aio; 2842 msg.msg_iovlen = 1; 2843 error = sock_receive(so, &msg, flags, &rcvlen); 2844 if (error || !rcvlen) 2845 break; 2846 *recvp = 1; 2847 nrrsp->nrrs_markerleft -= rcvlen; 2848 if (nrrsp->nrrs_markerleft) 2849 continue; 2850 /* record marker complete */ 2851 nrrsp->nrrs_fragleft = ntohl(nrrsp->nrrs_fragleft); 2852 if (nrrsp->nrrs_fragleft & 0x80000000) { 2853 nrrsp->nrrs_lastfrag = 1; 2854 nrrsp->nrrs_fragleft &= ~0x80000000; 2855 } 2856 nrrsp->nrrs_reclen += nrrsp->nrrs_fragleft; 2857 if (nrrsp->nrrs_reclen > NFS_MAXPACKET) { 2858 /* This is SERIOUS! We are out of sync with the sender. */ 2859 log(LOG_ERR, "impossible RPC record length (%d) on callback", nrrsp->nrrs_reclen); 2860 error = EFBIG; 2861 } 2862 } 2863 2864 /* read the TCP RPC record fragment */ 2865 while (!error && !nrrsp->nrrs_markerleft && nrrsp->nrrs_fragleft) { 2866 m = NULL; 2867 rcvlen = nrrsp->nrrs_fragleft; 2868 error = sock_receivembuf(so, NULL, &m, flags, &rcvlen); 2869 if (error || !rcvlen || !m) 2870 break; 2871 *recvp = 1; 2872 /* append mbufs to list */ 2873 nrrsp->nrrs_fragleft -= rcvlen; 2874 if (!nrrsp->nrrs_m) { 2875 nrrsp->nrrs_m = m; 2876 } else { 2877 error = mbuf_setnext(nrrsp->nrrs_mlast, m); 2878 if (error) { 2879 printf("nfs tcp rcv: mbuf_setnext failed %d\n", error); 2880 mbuf_freem(m); 2881 break; 2882 } 2883 } 2884 while (mbuf_next(m)) 2885 m = mbuf_next(m); 2886 nrrsp->nrrs_mlast = m; 2887 } 2888 2889 /* done reading fragment? */ 2890 if (!error && !nrrsp->nrrs_markerleft && !nrrsp->nrrs_fragleft) { 2891 /* reset socket fragment parsing state */ 2892 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft); 2893 if (nrrsp->nrrs_lastfrag) { 2894 /* RPC record complete */ 2895 *mp = nrrsp->nrrs_m; 2896 /* reset socket record parsing state */ 2897 nrrsp->nrrs_reclen = 0; 2898 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL; 2899 nrrsp->nrrs_lastfrag = 0; 2900 } 2901 } 2902 2903 return (error); 2904} 2905 2906 2907 2908/* 2909 * The NFS client send routine. 2910 * 2911 * Send the given NFS request out the mount's socket. 2912 * Holds nfs_sndlock() for the duration of this call. 2913 * 2914 * - check for request termination (sigintr) 2915 * - wait for reconnect, if necessary 2916 * - UDP: check the congestion window 2917 * - make a copy of the request to send 2918 * - UDP: update the congestion window 2919 * - send the request 2920 * 2921 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared. 2922 * rexmit count is also updated if this isn't the first send. 2923 * 2924 * If the send is not successful, make sure R_MUSTRESEND is set. 2925 * If this wasn't the first transmit, set R_RESENDERR. 2926 * Also, undo any UDP congestion window changes made. 2927 * 2928 * If the error appears to indicate that the socket should 2929 * be reconnected, mark the socket for reconnection. 2930 * 2931 * Only return errors when the request should be aborted. 2932 */ 2933int 2934nfs_send(struct nfsreq *req, int wait) 2935{ 2936 struct nfsmount *nmp; 2937 struct nfs_socket *nso; 2938 int error, error2, sotype, rexmit, slpflag = 0, needrecon; 2939 struct msghdr msg; 2940 struct sockaddr *sendnam; 2941 mbuf_t mreqcopy; 2942 size_t sentlen = 0; 2943 struct timespec ts = { 2, 0 }; 2944 2945again: 2946 error = nfs_sndlock(req); 2947 if (error) { 2948 lck_mtx_lock(&req->r_mtx); 2949 req->r_error = error; 2950 req->r_flags &= ~R_SENDING; 2951 lck_mtx_unlock(&req->r_mtx); 2952 return (error); 2953 } 2954 2955 error = nfs_sigintr(req->r_nmp, req, NULL, 0); 2956 if (error) { 2957 nfs_sndunlock(req); 2958 lck_mtx_lock(&req->r_mtx); 2959 req->r_error = error; 2960 req->r_flags &= ~R_SENDING; 2961 lck_mtx_unlock(&req->r_mtx); 2962 return (error); 2963 } 2964 nmp = req->r_nmp; 2965 sotype = nmp->nm_sotype; 2966 2967 /* 2968 * If it's a setup RPC but we're not in SETUP... must need reconnect. 2969 * If it's a recovery RPC but the socket's not ready... must need reconnect. 2970 */ 2971 if (((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) || 2972 ((req->r_flags & R_RECOVER) && !(nmp->nm_sockflags & NMSOCK_READY))) { 2973 error = ETIMEDOUT; 2974 nfs_sndunlock(req); 2975 lck_mtx_lock(&req->r_mtx); 2976 req->r_error = error; 2977 req->r_flags &= ~R_SENDING; 2978 lck_mtx_unlock(&req->r_mtx); 2979 return (error); 2980 } 2981 2982 /* If the socket needs reconnection, do that now. */ 2983 /* wait until socket is ready - unless this request is part of setup */ 2984 lck_mtx_lock(&nmp->nm_lock); 2985 if (!(nmp->nm_sockflags & NMSOCK_READY) && 2986 !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) { 2987 if (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) 2988 slpflag |= PCATCH; 2989 lck_mtx_unlock(&nmp->nm_lock); 2990 nfs_sndunlock(req); 2991 if (!wait) { 2992 lck_mtx_lock(&req->r_mtx); 2993 req->r_flags &= ~R_SENDING; 2994 req->r_flags |= R_MUSTRESEND; 2995 req->r_rtt = 0; 2996 lck_mtx_unlock(&req->r_mtx); 2997 return (0); 2998 } 2999 NFS_SOCK_DBG("nfs_send: 0x%llx wait reconnect\n", req->r_xid); 3000 lck_mtx_lock(&req->r_mtx); 3001 req->r_flags &= ~R_MUSTRESEND; 3002 req->r_rtt = 0; 3003 lck_mtx_unlock(&req->r_mtx); 3004 lck_mtx_lock(&nmp->nm_lock); 3005 while (!(nmp->nm_sockflags & NMSOCK_READY)) { 3006 /* don't bother waiting if the socket thread won't be reconnecting it */ 3007 if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) { 3008 error = EIO; 3009 break; 3010 } 3011 if ((NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && (nmp->nm_reconnect_start > 0)) { 3012 struct timeval now; 3013 microuptime(&now); 3014 if ((now.tv_sec - nmp->nm_reconnect_start) >= 8) { 3015 /* soft mount in reconnect for a while... terminate ASAP */ 3016 OSAddAtomic64(1, &nfsstats.rpctimeouts); 3017 req->r_flags |= R_SOFTTERM; 3018 req->r_error = error = ETIMEDOUT; 3019 break; 3020 } 3021 } 3022 /* make sure socket thread is running, then wait */ 3023 nfs_mount_sock_thread_wake(nmp); 3024 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1))) 3025 break; 3026 msleep(req, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectwait", &ts); 3027 slpflag = 0; 3028 } 3029 lck_mtx_unlock(&nmp->nm_lock); 3030 if (error) { 3031 lck_mtx_lock(&req->r_mtx); 3032 req->r_error = error; 3033 req->r_flags &= ~R_SENDING; 3034 lck_mtx_unlock(&req->r_mtx); 3035 return (error); 3036 } 3037 goto again; 3038 } 3039 nso = nmp->nm_nso; 3040 /* note that we're using the mount's socket to do the send */ 3041 nmp->nm_state |= NFSSTA_SENDING; /* will be cleared by nfs_sndunlock() */ 3042 lck_mtx_unlock(&nmp->nm_lock); 3043 if (!nso) { 3044 nfs_sndunlock(req); 3045 lck_mtx_lock(&req->r_mtx); 3046 req->r_flags &= ~R_SENDING; 3047 req->r_flags |= R_MUSTRESEND; 3048 req->r_rtt = 0; 3049 lck_mtx_unlock(&req->r_mtx); 3050 return (0); 3051 } 3052 3053 lck_mtx_lock(&req->r_mtx); 3054 rexmit = (req->r_flags & R_SENT); 3055 3056 if (sotype == SOCK_DGRAM) { 3057 lck_mtx_lock(&nmp->nm_lock); 3058 if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) { 3059 /* if we can't send this out yet, wait on the cwnd queue */ 3060 slpflag = (NMFLAG(nmp, INTR) && req->r_thread) ? PCATCH : 0; 3061 lck_mtx_unlock(&nmp->nm_lock); 3062 nfs_sndunlock(req); 3063 req->r_flags &= ~R_SENDING; 3064 req->r_flags |= R_MUSTRESEND; 3065 lck_mtx_unlock(&req->r_mtx); 3066 if (!wait) { 3067 req->r_rtt = 0; 3068 return (0); 3069 } 3070 lck_mtx_lock(&nmp->nm_lock); 3071 while (nmp->nm_sent >= nmp->nm_cwnd) { 3072 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1))) 3073 break; 3074 TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain); 3075 msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd", &ts); 3076 slpflag = 0; 3077 if ((req->r_cchain.tqe_next != NFSREQNOLIST)) { 3078 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain); 3079 req->r_cchain.tqe_next = NFSREQNOLIST; 3080 } 3081 } 3082 lck_mtx_unlock(&nmp->nm_lock); 3083 goto again; 3084 } 3085 /* 3086 * We update these *before* the send to avoid racing 3087 * against others who may be looking to send requests. 3088 */ 3089 if (!rexmit) { 3090 /* first transmit */ 3091 req->r_flags |= R_CWND; 3092 nmp->nm_sent += NFS_CWNDSCALE; 3093 } else { 3094 /* 3095 * When retransmitting, turn timing off 3096 * and divide congestion window by 2. 3097 */ 3098 req->r_flags &= ~R_TIMING; 3099 nmp->nm_cwnd >>= 1; 3100 if (nmp->nm_cwnd < NFS_CWNDSCALE) 3101 nmp->nm_cwnd = NFS_CWNDSCALE; 3102 } 3103 lck_mtx_unlock(&nmp->nm_lock); 3104 } 3105 3106 req->r_flags &= ~R_MUSTRESEND; 3107 lck_mtx_unlock(&req->r_mtx); 3108 3109 error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL, 3110 wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy); 3111 if (error) { 3112 if (wait) 3113 log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error); 3114 nfs_sndunlock(req); 3115 lck_mtx_lock(&req->r_mtx); 3116 req->r_flags &= ~R_SENDING; 3117 req->r_flags |= R_MUSTRESEND; 3118 req->r_rtt = 0; 3119 lck_mtx_unlock(&req->r_mtx); 3120 return (0); 3121 } 3122 3123 bzero(&msg, sizeof(msg)); 3124 if ((sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so) && ((sendnam = nmp->nm_saddr))) { 3125 msg.msg_name = (caddr_t)sendnam; 3126 msg.msg_namelen = sendnam->sa_len; 3127 } 3128 error = sock_sendmbuf(nso->nso_so, &msg, mreqcopy, 0, &sentlen); 3129 if (error || (sentlen != req->r_mreqlen)) { 3130 NFS_SOCK_DBG("nfs_send: 0x%llx sent %d/%d error %d\n", 3131 req->r_xid, (int)sentlen, (int)req->r_mreqlen, error); 3132 } 3133 3134 if (!error && (sentlen != req->r_mreqlen)) 3135 error = EWOULDBLOCK; 3136 needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen)); 3137 3138 lck_mtx_lock(&req->r_mtx); 3139 req->r_flags &= ~R_SENDING; 3140 req->r_rtt = 0; 3141 if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT)) 3142 req->r_rexmit = NFS_MAXREXMIT; 3143 3144 if (!error) { 3145 /* SUCCESS */ 3146 req->r_flags &= ~R_RESENDERR; 3147 if (rexmit) 3148 OSAddAtomic64(1, &nfsstats.rpcretries); 3149 req->r_flags |= R_SENT; 3150 if (req->r_flags & R_WAITSENT) { 3151 req->r_flags &= ~R_WAITSENT; 3152 wakeup(req); 3153 } 3154 nfs_sndunlock(req); 3155 lck_mtx_unlock(&req->r_mtx); 3156 return (0); 3157 } 3158 3159 /* send failed */ 3160 req->r_flags |= R_MUSTRESEND; 3161 if (rexmit) 3162 req->r_flags |= R_RESENDERR; 3163 if ((error == EINTR) || (error == ERESTART)) 3164 req->r_error = error; 3165 lck_mtx_unlock(&req->r_mtx); 3166 3167 if (sotype == SOCK_DGRAM) { 3168 /* 3169 * Note: even though a first send may fail, we consider 3170 * the request sent for congestion window purposes. 3171 * So we don't need to undo any of the changes made above. 3172 */ 3173 /* 3174 * Socket errors ignored for connectionless sockets?? 3175 * For now, ignore them all 3176 */ 3177 if ((error != EINTR) && (error != ERESTART) && 3178 (error != EWOULDBLOCK) && (error != EIO) && (nso == nmp->nm_nso)) { 3179 int clearerror = 0, optlen = sizeof(clearerror); 3180 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen); 3181#ifdef NFS_SOCKET_DEBUGGING 3182 if (clearerror) 3183 NFS_SOCK_DBG("nfs_send: ignoring UDP socket error %d so %d\n", 3184 error, clearerror); 3185#endif 3186 } 3187 } 3188 3189 /* check if it appears we should reconnect the socket */ 3190 switch (error) { 3191 case EWOULDBLOCK: 3192 /* if send timed out, reconnect if on TCP */ 3193 if (sotype != SOCK_STREAM) 3194 break; 3195 case EPIPE: 3196 case EADDRNOTAVAIL: 3197 case ENETDOWN: 3198 case ENETUNREACH: 3199 case ENETRESET: 3200 case ECONNABORTED: 3201 case ECONNRESET: 3202 case ENOTCONN: 3203 case ESHUTDOWN: 3204 case ECONNREFUSED: 3205 case EHOSTDOWN: 3206 case EHOSTUNREACH: 3207 needrecon = 1; 3208 break; 3209 } 3210 if (needrecon && (nso == nmp->nm_nso)) { /* mark socket as needing reconnect */ 3211 NFS_SOCK_DBG("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error); 3212 nfs_need_reconnect(nmp); 3213 } 3214 3215 nfs_sndunlock(req); 3216 3217 /* 3218 * Don't log some errors: 3219 * EPIPE errors may be common with servers that drop idle connections. 3220 * EADDRNOTAVAIL may occur on network transitions. 3221 * ENOTCONN may occur under some network conditions. 3222 */ 3223 if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN)) 3224 error = 0; 3225 if (error && (error != EINTR) && (error != ERESTART)) 3226 log(LOG_INFO, "nfs send error %d for server %s\n", error, 3227 !req->r_nmp ? "<unmounted>" : 3228 vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname); 3229 3230 if (nfs_is_dead(error, nmp)) 3231 error = EIO; 3232 3233 /* prefer request termination error over other errors */ 3234 error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0); 3235 if (error2) 3236 error = error2; 3237 3238 /* only allow the following errors to be returned */ 3239 if ((error != EINTR) && (error != ERESTART) && (error != EIO) && 3240 (error != ENXIO) && (error != ETIMEDOUT)) 3241 error = 0; 3242 return (error); 3243} 3244 3245/* 3246 * NFS client socket upcalls 3247 * 3248 * Pull RPC replies out of an NFS mount's socket and match them 3249 * up with the pending request. 3250 * 3251 * The datagram code is simple because we always get whole 3252 * messages out of the socket. 3253 * 3254 * The stream code is more involved because we have to parse 3255 * the RPC records out of the stream. 3256 */ 3257 3258/* NFS client UDP socket upcall */ 3259void 3260nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag) 3261{ 3262 struct nfsmount *nmp = arg; 3263 struct nfs_socket *nso = nmp->nm_nso; 3264 size_t rcvlen; 3265 mbuf_t m; 3266 int error = 0; 3267 3268 if (nmp->nm_sockflags & NMSOCK_CONNECTING) 3269 return; 3270 3271 do { 3272 /* make sure we're on the current socket */ 3273 if (!nso || (nso->nso_so != so)) 3274 return; 3275 3276 m = NULL; 3277 rcvlen = 1000000; 3278 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen); 3279 if (m) 3280 nfs_request_match_reply(nmp, m); 3281 } while (m && !error); 3282 3283 if (error && (error != EWOULDBLOCK)) { 3284 /* problems with the socket... mark for reconnection */ 3285 NFS_SOCK_DBG("nfs_udp_rcv: need reconnect %d\n", error); 3286 nfs_need_reconnect(nmp); 3287 } 3288} 3289 3290/* NFS client TCP socket upcall */ 3291void 3292nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag) 3293{ 3294 struct nfsmount *nmp = arg; 3295 struct nfs_socket *nso = nmp->nm_nso; 3296 struct nfs_rpc_record_state nrrs; 3297 mbuf_t m; 3298 int error = 0; 3299 int recv = 1; 3300 int wup = 0; 3301 3302 if (nmp->nm_sockflags & NMSOCK_CONNECTING) 3303 return; 3304 3305 /* make sure we're on the current socket */ 3306 lck_mtx_lock(&nmp->nm_lock); 3307 nso = nmp->nm_nso; 3308 if (!nso || (nso->nso_so != so) || (nmp->nm_sockflags & (NMSOCK_DISCONNECTING))) { 3309 lck_mtx_unlock(&nmp->nm_lock); 3310 return; 3311 } 3312 lck_mtx_unlock(&nmp->nm_lock); 3313 3314 /* make sure this upcall should be trying to do work */ 3315 lck_mtx_lock(&nso->nso_lock); 3316 if (nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) { 3317 lck_mtx_unlock(&nso->nso_lock); 3318 return; 3319 } 3320 nso->nso_flags |= NSO_UPCALL; 3321 nrrs = nso->nso_rrs; 3322 lck_mtx_unlock(&nso->nso_lock); 3323 3324 /* loop while we make error-free progress */ 3325 while (!error && recv) { 3326 error = nfs_rpc_record_read(so, &nrrs, MSG_DONTWAIT, &recv, &m); 3327 if (m) /* match completed response with request */ 3328 nfs_request_match_reply(nmp, m); 3329 } 3330 3331 /* Update the sockets's rpc parsing state */ 3332 lck_mtx_lock(&nso->nso_lock); 3333 nso->nso_rrs = nrrs; 3334 if (nso->nso_flags & NSO_DISCONNECTING) 3335 wup = 1; 3336 nso->nso_flags &= ~NSO_UPCALL; 3337 lck_mtx_unlock(&nso->nso_lock); 3338 if (wup) 3339 wakeup(&nso->nso_flags); 3340 3341#ifdef NFS_SOCKET_DEBUGGING 3342 if (!recv && (error != EWOULDBLOCK)) 3343 NFS_SOCK_DBG("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error); 3344#endif 3345 /* note: no error and no data indicates server closed its end */ 3346 if ((error != EWOULDBLOCK) && (error || !recv)) { 3347 /* problems with the socket... mark for reconnection */ 3348 NFS_SOCK_DBG("nfs_tcp_rcv: need reconnect %d\n", error); 3349 nfs_need_reconnect(nmp); 3350 } 3351} 3352 3353/* 3354 * "poke" a socket to try to provoke any pending errors 3355 */ 3356void 3357nfs_sock_poke(struct nfsmount *nmp) 3358{ 3359 struct iovec aio; 3360 struct msghdr msg; 3361 size_t len; 3362 int error = 0; 3363 int dummy; 3364 3365 lck_mtx_lock(&nmp->nm_lock); 3366 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) || 3367 !(nmp->nm_sockflags & NMSOCK_READY) || !nmp->nm_nso || !nmp->nm_nso->nso_so) { 3368 /* Nothing to poke */ 3369 nmp->nm_sockflags &= ~NMSOCK_POKE; 3370 wakeup(&nmp->nm_sockflags); 3371 lck_mtx_unlock(&nmp->nm_lock); 3372 return; 3373 } 3374 lck_mtx_unlock(&nmp->nm_lock); 3375 aio.iov_base = &dummy; 3376 aio.iov_len = 0; 3377 len = 0; 3378 bzero(&msg, sizeof(msg)); 3379 msg.msg_iov = &aio; 3380 msg.msg_iovlen = 1; 3381 error = sock_send(nmp->nm_nso->nso_so, &msg, MSG_DONTWAIT, &len); 3382 NFS_SOCK_DBG("nfs_sock_poke: error %d\n", error); 3383 lck_mtx_lock(&nmp->nm_lock); 3384 nmp->nm_sockflags &= ~NMSOCK_POKE; 3385 wakeup(&nmp->nm_sockflags); 3386 lck_mtx_unlock(&nmp->nm_lock); 3387 nfs_is_dead(error, nmp); 3388} 3389 3390/* 3391 * Match an RPC reply with the corresponding request 3392 */ 3393void 3394nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep) 3395{ 3396 struct nfsreq *req; 3397 struct nfsm_chain nmrep; 3398 u_int32_t reply = 0, rxid = 0; 3399 int error = 0, asyncioq, t1; 3400 3401 /* Get the xid and check that it is an rpc reply */ 3402 nfsm_chain_dissect_init(error, &nmrep, mrep); 3403 nfsm_chain_get_32(error, &nmrep, rxid); 3404 nfsm_chain_get_32(error, &nmrep, reply); 3405 if (error || (reply != RPC_REPLY)) { 3406 OSAddAtomic64(1, &nfsstats.rpcinvalid); 3407 mbuf_freem(mrep); 3408 return; 3409 } 3410 3411 /* 3412 * Loop through the request list to match up the reply 3413 * Iff no match, just drop it. 3414 */ 3415 lck_mtx_lock(nfs_request_mutex); 3416 TAILQ_FOREACH(req, &nfs_reqq, r_chain) { 3417 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) 3418 continue; 3419 /* looks like we have it, grab lock and double check */ 3420 lck_mtx_lock(&req->r_mtx); 3421 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) { 3422 lck_mtx_unlock(&req->r_mtx); 3423 continue; 3424 } 3425 /* Found it.. */ 3426 req->r_nmrep = nmrep; 3427 lck_mtx_lock(&nmp->nm_lock); 3428 if (nmp->nm_sotype == SOCK_DGRAM) { 3429 /* 3430 * Update congestion window. 3431 * Do the additive increase of one rpc/rtt. 3432 */ 3433 FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd); 3434 if (nmp->nm_cwnd <= nmp->nm_sent) { 3435 nmp->nm_cwnd += 3436 ((NFS_CWNDSCALE * NFS_CWNDSCALE) + 3437 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 3438 if (nmp->nm_cwnd > NFS_MAXCWND) 3439 nmp->nm_cwnd = NFS_MAXCWND; 3440 } 3441 if (req->r_flags & R_CWND) { 3442 nmp->nm_sent -= NFS_CWNDSCALE; 3443 req->r_flags &= ~R_CWND; 3444 } 3445 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { 3446 /* congestion window is open, poke the cwnd queue */ 3447 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); 3448 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); 3449 req2->r_cchain.tqe_next = NFSREQNOLIST; 3450 wakeup(req2); 3451 } 3452 } 3453 /* 3454 * Update rtt using a gain of 0.125 on the mean 3455 * and a gain of 0.25 on the deviation. 3456 */ 3457 if (req->r_flags & R_TIMING) { 3458 /* 3459 * Since the timer resolution of 3460 * NFS_HZ is so course, it can often 3461 * result in r_rtt == 0. Since 3462 * r_rtt == N means that the actual 3463 * rtt is between N+dt and N+2-dt ticks, 3464 * add 1. 3465 */ 3466 if (proct[req->r_procnum] == 0) 3467 panic("nfs_request_match_reply: proct[%d] is zero", req->r_procnum); 3468 t1 = req->r_rtt + 1; 3469 t1 -= (NFS_SRTT(req) >> 3); 3470 NFS_SRTT(req) += t1; 3471 if (t1 < 0) 3472 t1 = -t1; 3473 t1 -= (NFS_SDRTT(req) >> 2); 3474 NFS_SDRTT(req) += t1; 3475 } 3476 nmp->nm_timeouts = 0; 3477 lck_mtx_unlock(&nmp->nm_lock); 3478 /* signal anyone waiting on this request */ 3479 wakeup(req); 3480 asyncioq = (req->r_callback.rcb_func != NULL); 3481 if (nfs_request_using_gss(req)) 3482 nfs_gss_clnt_rpcdone(req); 3483 lck_mtx_unlock(&req->r_mtx); 3484 lck_mtx_unlock(nfs_request_mutex); 3485 /* if it's an async RPC with a callback, queue it up */ 3486 if (asyncioq) 3487 nfs_asyncio_finish(req); 3488 break; 3489 } 3490 3491 if (!req) { 3492 /* not matched to a request, so drop it. */ 3493 lck_mtx_unlock(nfs_request_mutex); 3494 OSAddAtomic64(1, &nfsstats.rpcunexpected); 3495 mbuf_freem(mrep); 3496 } 3497} 3498 3499/* 3500 * Wait for the reply for a given request... 3501 * ...potentially resending the request if necessary. 3502 */ 3503int 3504nfs_wait_reply(struct nfsreq *req) 3505{ 3506 struct timespec ts = { 2, 0 }; 3507 int error = 0, slpflag, first = 1; 3508 3509 if (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) 3510 slpflag = PCATCH; 3511 else 3512 slpflag = 0; 3513 3514 lck_mtx_lock(&req->r_mtx); 3515 while (!req->r_nmrep.nmc_mhead) { 3516 if ((error = nfs_sigintr(req->r_nmp, req, first ? NULL : req->r_thread, 0))) 3517 break; 3518 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead) 3519 break; 3520 /* check if we need to resend */ 3521 if (req->r_flags & R_MUSTRESEND) { 3522 NFS_SOCK_DBG("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n", 3523 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); 3524 req->r_flags |= R_SENDING; 3525 lck_mtx_unlock(&req->r_mtx); 3526 if (nfs_request_using_gss(req)) { 3527 /* 3528 * It's an RPCSEC_GSS request. 3529 * Can't just resend the original request 3530 * without bumping the cred sequence number. 3531 * Go back and re-build the request. 3532 */ 3533 lck_mtx_lock(&req->r_mtx); 3534 req->r_flags &= ~R_SENDING; 3535 lck_mtx_unlock(&req->r_mtx); 3536 return (EAGAIN); 3537 } 3538 error = nfs_send(req, 1); 3539 lck_mtx_lock(&req->r_mtx); 3540 NFS_SOCK_DBG("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n", 3541 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error); 3542 if (error) 3543 break; 3544 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead) 3545 break; 3546 } 3547 /* need to poll if we're P_NOREMOTEHANG */ 3548 if (nfs_noremotehang(req->r_thread)) 3549 ts.tv_sec = 1; 3550 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts); 3551 first = slpflag = 0; 3552 } 3553 lck_mtx_unlock(&req->r_mtx); 3554 3555 return (error); 3556} 3557 3558/* 3559 * An NFS request goes something like this: 3560 * (nb: always frees up mreq mbuf list) 3561 * nfs_request_create() 3562 * - allocates a request struct if one is not provided 3563 * - initial fill-in of the request struct 3564 * nfs_request_add_header() 3565 * - add the RPC header 3566 * nfs_request_send() 3567 * - link it into list 3568 * - call nfs_send() for first transmit 3569 * nfs_request_wait() 3570 * - call nfs_wait_reply() to wait for the reply 3571 * nfs_request_finish() 3572 * - break down rpc header and return with error or nfs reply 3573 * pointed to by nmrep. 3574 * nfs_request_rele() 3575 * nfs_request_destroy() 3576 * - clean up the request struct 3577 * - free the request struct if it was allocated by nfs_request_create() 3578 */ 3579 3580/* 3581 * Set up an NFS request struct (allocating if no request passed in). 3582 */ 3583int 3584nfs_request_create( 3585 nfsnode_t np, 3586 mount_t mp, /* used only if !np */ 3587 struct nfsm_chain *nmrest, 3588 int procnum, 3589 thread_t thd, 3590 kauth_cred_t cred, 3591 struct nfsreq **reqp) 3592{ 3593 struct nfsreq *req, *newreq = NULL; 3594 struct nfsmount *nmp; 3595 3596 req = *reqp; 3597 if (!req) { 3598 /* allocate a new NFS request structure */ 3599 MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK); 3600 if (!newreq) { 3601 mbuf_freem(nmrest->nmc_mhead); 3602 nmrest->nmc_mhead = NULL; 3603 return (ENOMEM); 3604 } 3605 req = newreq; 3606 } 3607 3608 bzero(req, sizeof(*req)); 3609 if (req == newreq) 3610 req->r_flags = R_ALLOCATED; 3611 3612 nmp = VFSTONFS(np ? NFSTOMP(np) : mp); 3613 if (nfs_mount_gone(nmp)) { 3614 if (newreq) 3615 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ); 3616 return (ENXIO); 3617 } 3618 lck_mtx_lock(&nmp->nm_lock); 3619 if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) && 3620 (nmp->nm_state & NFSSTA_TIMEO)) { 3621 lck_mtx_unlock(&nmp->nm_lock); 3622 mbuf_freem(nmrest->nmc_mhead); 3623 nmrest->nmc_mhead = NULL; 3624 if (newreq) 3625 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ); 3626 return (ENXIO); 3627 } 3628 3629 if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS)) 3630 OSAddAtomic64(1, &nfsstats.rpccnt[procnum]); 3631 if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL)) 3632 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum); 3633 3634 lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL); 3635 req->r_nmp = nmp; 3636 nmp->nm_ref++; 3637 req->r_np = np; 3638 req->r_thread = thd; 3639 if (!thd) 3640 req->r_flags |= R_NOINTR; 3641 if (IS_VALID_CRED(cred)) { 3642 kauth_cred_ref(cred); 3643 req->r_cred = cred; 3644 } 3645 req->r_procnum = procnum; 3646 if (proct[procnum] > 0) 3647 req->r_flags |= R_TIMING; 3648 req->r_nmrep.nmc_mhead = NULL; 3649 SLIST_INIT(&req->r_gss_seqlist); 3650 req->r_achain.tqe_next = NFSREQNOLIST; 3651 req->r_rchain.tqe_next = NFSREQNOLIST; 3652 req->r_cchain.tqe_next = NFSREQNOLIST; 3653 3654 /* set auth flavor to use for request */ 3655 if (!req->r_cred) 3656 req->r_auth = RPCAUTH_NONE; 3657 else if (req->r_np && (req->r_np->n_auth != RPCAUTH_INVALID)) 3658 req->r_auth = req->r_np->n_auth; 3659 else 3660 req->r_auth = nmp->nm_auth; 3661 3662 lck_mtx_unlock(&nmp->nm_lock); 3663 3664 /* move the request mbuf chain to the nfsreq */ 3665 req->r_mrest = nmrest->nmc_mhead; 3666 nmrest->nmc_mhead = NULL; 3667 3668 req->r_flags |= R_INITTED; 3669 req->r_refs = 1; 3670 if (newreq) 3671 *reqp = req; 3672 return (0); 3673} 3674 3675/* 3676 * Clean up and free an NFS request structure. 3677 */ 3678void 3679nfs_request_destroy(struct nfsreq *req) 3680{ 3681 struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3682 struct gss_seq *gsp, *ngsp; 3683 int clearjbtimeo = 0; 3684 struct timespec ts = { 1, 0 }; 3685 3686 if (!req || !(req->r_flags & R_INITTED)) 3687 return; 3688 req->r_flags &= ~R_INITTED; 3689 if (req->r_lflags & RL_QUEUED) 3690 nfs_reqdequeue(req); 3691 3692 if (req->r_achain.tqe_next != NFSREQNOLIST && 3693 req->r_achain.tqe_next != NFSIODCOMPLETING) { 3694 /* 3695 * Still on an async I/O queue? 3696 * %%% But which one, we may be on a local iod. 3697 */ 3698 lck_mtx_lock(nfsiod_mutex); 3699 if (nmp && req->r_achain.tqe_next != NFSREQNOLIST && 3700 req->r_achain.tqe_next != NFSIODCOMPLETING) { 3701 TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain); 3702 req->r_achain.tqe_next = NFSREQNOLIST; 3703 } 3704 lck_mtx_unlock(nfsiod_mutex); 3705 } 3706 3707 lck_mtx_lock(&req->r_mtx); 3708 if (nmp) { 3709 lck_mtx_lock(&nmp->nm_lock); 3710 if (req->r_flags & R_CWND) { 3711 /* Decrement the outstanding request count. */ 3712 req->r_flags &= ~R_CWND; 3713 nmp->nm_sent -= NFS_CWNDSCALE; 3714 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { 3715 /* congestion window is open, poke the cwnd queue */ 3716 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); 3717 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); 3718 req2->r_cchain.tqe_next = NFSREQNOLIST; 3719 wakeup(req2); 3720 } 3721 } 3722 if (req->r_rchain.tqe_next != NFSREQNOLIST) { 3723 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); 3724 req->r_rchain.tqe_next = NFSREQNOLIST; 3725 if (req->r_flags & R_RESENDQ) 3726 req->r_flags &= ~R_RESENDQ; 3727 } 3728 if (req->r_cchain.tqe_next != NFSREQNOLIST) { 3729 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain); 3730 req->r_cchain.tqe_next = NFSREQNOLIST; 3731 } 3732 if (req->r_flags & R_JBTPRINTFMSG) { 3733 req->r_flags &= ~R_JBTPRINTFMSG; 3734 nmp->nm_jbreqs--; 3735 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; 3736 } 3737 lck_mtx_unlock(&nmp->nm_lock); 3738 } 3739 /* Wait for the mount_sock_thread to finish with the resend */ 3740 while (req->r_flags & R_RESENDQ) 3741 msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts); 3742 lck_mtx_unlock(&req->r_mtx); 3743 3744 if (clearjbtimeo) 3745 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL); 3746 if (req->r_mhead) 3747 mbuf_freem(req->r_mhead); 3748 else if (req->r_mrest) 3749 mbuf_freem(req->r_mrest); 3750 if (req->r_nmrep.nmc_mhead) 3751 mbuf_freem(req->r_nmrep.nmc_mhead); 3752 if (IS_VALID_CRED(req->r_cred)) 3753 kauth_cred_unref(&req->r_cred); 3754 if (nfs_request_using_gss(req)) 3755 nfs_gss_clnt_rpcdone(req); 3756 SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp) 3757 FREE(gsp, M_TEMP); 3758 if (req->r_gss_ctx) 3759 nfs_gss_clnt_ctx_unref(req); 3760 if (req->r_wrongsec) 3761 FREE(req->r_wrongsec, M_TEMP); 3762 if (nmp) 3763 nfs_mount_rele(nmp); 3764 lck_mtx_destroy(&req->r_mtx, nfs_request_grp); 3765 if (req->r_flags & R_ALLOCATED) 3766 FREE_ZONE(req, sizeof(*req), M_NFSREQ); 3767} 3768 3769void 3770nfs_request_ref(struct nfsreq *req, int locked) 3771{ 3772 if (!locked) 3773 lck_mtx_lock(&req->r_mtx); 3774 if (req->r_refs <= 0) 3775 panic("nfsreq reference error"); 3776 req->r_refs++; 3777 if (!locked) 3778 lck_mtx_unlock(&req->r_mtx); 3779} 3780 3781void 3782nfs_request_rele(struct nfsreq *req) 3783{ 3784 int destroy; 3785 3786 lck_mtx_lock(&req->r_mtx); 3787 if (req->r_refs <= 0) 3788 panic("nfsreq reference underflow"); 3789 req->r_refs--; 3790 destroy = (req->r_refs == 0); 3791 lck_mtx_unlock(&req->r_mtx); 3792 if (destroy) 3793 nfs_request_destroy(req); 3794} 3795 3796 3797/* 3798 * Add an (updated) RPC header with authorization to an NFS request. 3799 */ 3800int 3801nfs_request_add_header(struct nfsreq *req) 3802{ 3803 struct nfsmount *nmp; 3804 int error = 0; 3805 mbuf_t m; 3806 3807 /* free up any previous header */ 3808 if ((m = req->r_mhead)) { 3809 while (m && (m != req->r_mrest)) 3810 m = mbuf_free(m); 3811 req->r_mhead = NULL; 3812 } 3813 3814 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3815 if (nfs_mount_gone(nmp)) 3816 return (ENXIO); 3817 3818 error = nfsm_rpchead(req, req->r_mrest, &req->r_xid, &req->r_mhead); 3819 if (error) 3820 return (error); 3821 3822 req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead); 3823 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3824 if (nfs_mount_gone(nmp)) 3825 return (ENXIO); 3826 lck_mtx_lock(&nmp->nm_lock); 3827 if (NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) 3828 req->r_retry = nmp->nm_retry; 3829 else 3830 req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 3831 lck_mtx_unlock(&nmp->nm_lock); 3832 3833 return (error); 3834} 3835 3836 3837/* 3838 * Queue an NFS request up and send it out. 3839 */ 3840int 3841nfs_request_send(struct nfsreq *req, int wait) 3842{ 3843 struct nfsmount *nmp; 3844 struct timeval now; 3845 3846 lck_mtx_lock(&req->r_mtx); 3847 req->r_flags |= R_SENDING; 3848 lck_mtx_unlock(&req->r_mtx); 3849 3850 lck_mtx_lock(nfs_request_mutex); 3851 3852 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3853 if (nfs_mount_gone(nmp)) { 3854 lck_mtx_unlock(nfs_request_mutex); 3855 return (ENXIO); 3856 } 3857 3858 microuptime(&now); 3859 if (!req->r_start) { 3860 req->r_start = now.tv_sec; 3861 req->r_lastmsg = now.tv_sec - 3862 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); 3863 } 3864 3865 OSAddAtomic64(1, &nfsstats.rpcrequests); 3866 3867 /* 3868 * Chain request into list of outstanding requests. Be sure 3869 * to put it LAST so timer finds oldest requests first. 3870 * Make sure that the request queue timer is running 3871 * to check for possible request timeout. 3872 */ 3873 TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain); 3874 req->r_lflags |= RL_QUEUED; 3875 if (!nfs_request_timer_on) { 3876 nfs_request_timer_on = 1; 3877 nfs_interval_timer_start(nfs_request_timer_call, 3878 NFS_REQUESTDELAY); 3879 } 3880 lck_mtx_unlock(nfs_request_mutex); 3881 3882 /* Send the request... */ 3883 return (nfs_send(req, wait)); 3884} 3885 3886/* 3887 * Call nfs_wait_reply() to wait for the reply. 3888 */ 3889void 3890nfs_request_wait(struct nfsreq *req) 3891{ 3892 req->r_error = nfs_wait_reply(req); 3893} 3894 3895/* 3896 * Finish up an NFS request by dequeueing it and 3897 * doing the initial NFS request reply processing. 3898 */ 3899int 3900nfs_request_finish( 3901 struct nfsreq *req, 3902 struct nfsm_chain *nmrepp, 3903 int *status) 3904{ 3905 struct nfsmount *nmp; 3906 mbuf_t mrep; 3907 int verf_type = 0; 3908 uint32_t verf_len = 0; 3909 uint32_t reply_status = 0; 3910 uint32_t rejected_status = 0; 3911 uint32_t auth_status = 0; 3912 uint32_t accepted_status = 0; 3913 struct nfsm_chain nmrep; 3914 int error, clearjbtimeo; 3915 3916 error = req->r_error; 3917 3918 if (nmrepp) 3919 nmrepp->nmc_mhead = NULL; 3920 3921 /* RPC done, unlink the request. */ 3922 nfs_reqdequeue(req); 3923 3924 mrep = req->r_nmrep.nmc_mhead; 3925 3926 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3927 3928 if ((req->r_flags & R_CWND) && nmp) { 3929 /* 3930 * Decrement the outstanding request count. 3931 */ 3932 req->r_flags &= ~R_CWND; 3933 lck_mtx_lock(&nmp->nm_lock); 3934 FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd); 3935 nmp->nm_sent -= NFS_CWNDSCALE; 3936 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { 3937 /* congestion window is open, poke the cwnd queue */ 3938 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); 3939 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); 3940 req2->r_cchain.tqe_next = NFSREQNOLIST; 3941 wakeup(req2); 3942 } 3943 lck_mtx_unlock(&nmp->nm_lock); 3944 } 3945 3946 if (nfs_request_using_gss(req)) { 3947 /* 3948 * If the request used an RPCSEC_GSS credential 3949 * then reset its sequence number bit in the 3950 * request window. 3951 */ 3952 nfs_gss_clnt_rpcdone(req); 3953 3954 /* 3955 * If we need to re-send, go back and re-build the 3956 * request based on a new sequence number. 3957 * Note that we're using the original XID. 3958 */ 3959 if (error == EAGAIN) { 3960 req->r_error = 0; 3961 if (mrep) 3962 mbuf_freem(mrep); 3963 error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs 3964 req->r_nmrep.nmc_mhead = NULL; 3965 req->r_flags |= R_RESTART; 3966 if (error == ENEEDAUTH) { 3967 req->r_xid = 0; // get a new XID 3968 error = 0; 3969 } 3970 goto nfsmout; 3971 } 3972 } 3973 3974 /* 3975 * If there was a successful reply, make sure to mark the mount as up. 3976 * If a tprintf message was given (or if this is a timed-out soft mount) 3977 * then post a tprintf message indicating the server is alive again. 3978 */ 3979 if (!error) { 3980 if ((req->r_flags & R_TPRINTFMSG) || 3981 (nmp && (NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && 3982 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE|NFSSTA_DEAD)) == NFSSTA_TIMEO))) 3983 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again"); 3984 else 3985 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL); 3986 } 3987 if (!error && !nmp) 3988 error = ENXIO; 3989 nfsmout_if(error); 3990 3991 /* 3992 * break down the RPC header and check if ok 3993 */ 3994 nmrep = req->r_nmrep; 3995 nfsm_chain_get_32(error, &nmrep, reply_status); 3996 nfsmout_if(error); 3997 if (reply_status == RPC_MSGDENIED) { 3998 nfsm_chain_get_32(error, &nmrep, rejected_status); 3999 nfsmout_if(error); 4000 if (rejected_status == RPC_MISMATCH) { 4001 error = ENOTSUP; 4002 goto nfsmout; 4003 } 4004 nfsm_chain_get_32(error, &nmrep, auth_status); 4005 nfsmout_if(error); 4006 switch (auth_status) { 4007 case RPCSEC_GSS_CREDPROBLEM: 4008 case RPCSEC_GSS_CTXPROBLEM: 4009 /* 4010 * An RPCSEC_GSS cred or context problem. 4011 * We can't use it anymore. 4012 * Restore the args, renew the context 4013 * and set up for a resend. 4014 */ 4015 error = nfs_gss_clnt_args_restore(req); 4016 if (error && error != ENEEDAUTH) 4017 break; 4018 4019 if (!error) { 4020 error = nfs_gss_clnt_ctx_renew(req); 4021 if (error) 4022 break; 4023 } 4024 mbuf_freem(mrep); 4025 req->r_nmrep.nmc_mhead = NULL; 4026 req->r_xid = 0; // get a new XID 4027 req->r_flags |= R_RESTART; 4028 goto nfsmout; 4029 default: 4030 error = EACCES; 4031 break; 4032 } 4033 goto nfsmout; 4034 } 4035 4036 /* Now check the verifier */ 4037 nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor 4038 nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length 4039 nfsmout_if(error); 4040 4041 switch (req->r_auth) { 4042 case RPCAUTH_NONE: 4043 case RPCAUTH_SYS: 4044 /* Any AUTH_SYS verifier is ignored */ 4045 if (verf_len > 0) 4046 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len)); 4047 nfsm_chain_get_32(error, &nmrep, accepted_status); 4048 break; 4049 case RPCAUTH_KRB5: 4050 case RPCAUTH_KRB5I: 4051 case RPCAUTH_KRB5P: 4052 error = nfs_gss_clnt_verf_get(req, &nmrep, 4053 verf_type, verf_len, &accepted_status); 4054 break; 4055 } 4056 nfsmout_if(error); 4057 4058 switch (accepted_status) { 4059 case RPC_SUCCESS: 4060 if (req->r_procnum == NFSPROC_NULL) { 4061 /* 4062 * The NFS null procedure is unique, 4063 * in not returning an NFS status. 4064 */ 4065 *status = NFS_OK; 4066 } else { 4067 nfsm_chain_get_32(error, &nmrep, *status); 4068 nfsmout_if(error); 4069 } 4070 4071 if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) { 4072 /* 4073 * It's a JUKEBOX error - delay and try again 4074 */ 4075 int delay, slpflag = (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) ? PCATCH : 0; 4076 4077 mbuf_freem(mrep); 4078 req->r_nmrep.nmc_mhead = NULL; 4079 if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) { 4080 /* we're not yet completely mounted and */ 4081 /* we can't complete an RPC, so we fail */ 4082 OSAddAtomic64(1, &nfsstats.rpctimeouts); 4083 nfs_softterm(req); 4084 error = req->r_error; 4085 goto nfsmout; 4086 } 4087 req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2); 4088 if (req->r_delay > 30) 4089 req->r_delay = 30; 4090 if (nmp->nm_tprintf_initial_delay && (req->r_delay >= nmp->nm_tprintf_initial_delay)) { 4091 if (!(req->r_flags & R_JBTPRINTFMSG)) { 4092 req->r_flags |= R_JBTPRINTFMSG; 4093 lck_mtx_lock(&nmp->nm_lock); 4094 nmp->nm_jbreqs++; 4095 lck_mtx_unlock(&nmp->nm_lock); 4096 } 4097 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO, 4098 "resource temporarily unavailable (jukebox)", 0); 4099 } 4100 if ((NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && (req->r_delay == 30) && 4101 !(req->r_flags & R_NOINTR)) { 4102 /* for soft mounts, just give up after a short while */ 4103 OSAddAtomic64(1, &nfsstats.rpctimeouts); 4104 nfs_softterm(req); 4105 error = req->r_error; 4106 goto nfsmout; 4107 } 4108 delay = req->r_delay; 4109 if (req->r_callback.rcb_func) { 4110 struct timeval now; 4111 microuptime(&now); 4112 req->r_resendtime = now.tv_sec + delay; 4113 } else { 4114 do { 4115 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) 4116 goto nfsmout; 4117 tsleep(nfs_request_finish, PSOCK|slpflag, "nfs_jukebox_trylater", hz); 4118 slpflag = 0; 4119 } while (--delay > 0); 4120 } 4121 req->r_xid = 0; // get a new XID 4122 req->r_flags |= R_RESTART; 4123 req->r_start = 0; 4124 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER); 4125 return (0); 4126 } 4127 4128 if (req->r_flags & R_JBTPRINTFMSG) { 4129 req->r_flags &= ~R_JBTPRINTFMSG; 4130 lck_mtx_lock(&nmp->nm_lock); 4131 nmp->nm_jbreqs--; 4132 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; 4133 lck_mtx_unlock(&nmp->nm_lock); 4134 nfs_up(nmp, req->r_thread, clearjbtimeo, "resource available again"); 4135 } 4136 4137 if ((nmp->nm_vers >= NFS_VER4) && (*status == NFSERR_WRONGSEC)) { 4138 /* 4139 * Hmmm... we need to try a different security flavor. 4140 * The first time a request hits this, we will allocate an array 4141 * to track flavors to try. We fill the array with the mount's 4142 * preferred flavors or the server's preferred flavors or just the 4143 * flavors we support. 4144 */ 4145 uint32_t srvflavors[NX_MAX_SEC_FLAVORS]; 4146 int srvcount, i, j; 4147 4148 /* Call SECINFO to try to get list of flavors from server. */ 4149 srvcount = NX_MAX_SEC_FLAVORS; 4150 nfs4_secinfo_rpc(nmp, &req->r_secinfo, req->r_cred, srvflavors, &srvcount); 4151 4152 if (!req->r_wrongsec) { 4153 /* first time... set up flavor array */ 4154 MALLOC(req->r_wrongsec, uint32_t*, NX_MAX_SEC_FLAVORS*sizeof(uint32_t), M_TEMP, M_WAITOK); 4155 if (!req->r_wrongsec) { 4156 error = EACCES; 4157 goto nfsmout; 4158 } 4159 i=0; 4160 if (nmp->nm_sec.count) { /* use the mount's preferred list of flavors */ 4161 for(; i < nmp->nm_sec.count; i++) 4162 req->r_wrongsec[i] = nmp->nm_sec.flavors[i]; 4163 } else if (srvcount) { /* otherwise use the server's list of flavors */ 4164 for(; i < srvcount; i++) 4165 req->r_wrongsec[i] = srvflavors[i]; 4166 } else { /* otherwise, just try the flavors we support. */ 4167 req->r_wrongsec[i++] = RPCAUTH_KRB5P; 4168 req->r_wrongsec[i++] = RPCAUTH_KRB5I; 4169 req->r_wrongsec[i++] = RPCAUTH_KRB5; 4170 req->r_wrongsec[i++] = RPCAUTH_SYS; 4171 req->r_wrongsec[i++] = RPCAUTH_NONE; 4172 } 4173 for(; i < NX_MAX_SEC_FLAVORS; i++) /* invalidate any remaining slots */ 4174 req->r_wrongsec[i] = RPCAUTH_INVALID; 4175 } 4176 4177 /* clear the current flavor from the list */ 4178 for(i=0; i < NX_MAX_SEC_FLAVORS; i++) 4179 if (req->r_wrongsec[i] == req->r_auth) 4180 req->r_wrongsec[i] = RPCAUTH_INVALID; 4181 4182 /* find the next flavor to try */ 4183 for(i=0; i < NX_MAX_SEC_FLAVORS; i++) 4184 if (req->r_wrongsec[i] != RPCAUTH_INVALID) { 4185 if (!srvcount) /* no server list, just try it */ 4186 break; 4187 /* check that it's in the server's list */ 4188 for(j=0; j < srvcount; j++) 4189 if (req->r_wrongsec[i] == srvflavors[j]) 4190 break; 4191 if (j < srvcount) /* found */ 4192 break; 4193 /* not found in server list */ 4194 req->r_wrongsec[i] = RPCAUTH_INVALID; 4195 } 4196 if (i == NX_MAX_SEC_FLAVORS) { 4197 /* nothing left to try! */ 4198 error = EACCES; 4199 goto nfsmout; 4200 } 4201 4202 /* retry with the next auth flavor */ 4203 req->r_auth = req->r_wrongsec[i]; 4204 req->r_xid = 0; // get a new XID 4205 req->r_flags |= R_RESTART; 4206 req->r_start = 0; 4207 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_WRONGSEC); 4208 return (0); 4209 } 4210 if ((nmp->nm_vers >= NFS_VER4) && req->r_wrongsec) { 4211 /* 4212 * We renegotiated security for this request; so update the 4213 * default security flavor for the associated node. 4214 */ 4215 if (req->r_np) 4216 req->r_np->n_auth = req->r_auth; 4217 } 4218 4219 if (*status == NFS_OK) { 4220 /* 4221 * Successful NFS request 4222 */ 4223 *nmrepp = nmrep; 4224 req->r_nmrep.nmc_mhead = NULL; 4225 break; 4226 } 4227 /* Got an NFS error of some kind */ 4228 4229 /* 4230 * If the File Handle was stale, invalidate the 4231 * lookup cache, just in case. 4232 */ 4233 if ((*status == ESTALE) && req->r_np) { 4234 cache_purge(NFSTOV(req->r_np)); 4235 /* if monitored, also send delete event */ 4236 if (vnode_ismonitored(NFSTOV(req->r_np))) 4237 nfs_vnode_notify(req->r_np, (VNODE_EVENT_ATTRIB|VNODE_EVENT_DELETE)); 4238 } 4239 if (nmp->nm_vers == NFS_VER2) 4240 mbuf_freem(mrep); 4241 else 4242 *nmrepp = nmrep; 4243 req->r_nmrep.nmc_mhead = NULL; 4244 error = 0; 4245 break; 4246 case RPC_PROGUNAVAIL: 4247 error = EPROGUNAVAIL; 4248 break; 4249 case RPC_PROGMISMATCH: 4250 error = ERPCMISMATCH; 4251 break; 4252 case RPC_PROCUNAVAIL: 4253 error = EPROCUNAVAIL; 4254 break; 4255 case RPC_GARBAGE: 4256 error = EBADRPC; 4257 break; 4258 case RPC_SYSTEM_ERR: 4259 default: 4260 error = EIO; 4261 break; 4262 } 4263nfsmout: 4264 if (req->r_flags & R_JBTPRINTFMSG) { 4265 req->r_flags &= ~R_JBTPRINTFMSG; 4266 lck_mtx_lock(&nmp->nm_lock); 4267 nmp->nm_jbreqs--; 4268 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; 4269 lck_mtx_unlock(&nmp->nm_lock); 4270 if (clearjbtimeo) 4271 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL); 4272 } 4273 FSDBG(273, R_XID32(req->r_xid), nmp, req, 4274 (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error); 4275 return (error); 4276} 4277 4278/* 4279 * NFS request using a GSS/Kerberos security flavor? 4280 */ 4281int 4282nfs_request_using_gss(struct nfsreq *req) 4283{ 4284 if (!req->r_gss_ctx) 4285 return (0); 4286 switch (req->r_auth) { 4287 case RPCAUTH_KRB5: 4288 case RPCAUTH_KRB5I: 4289 case RPCAUTH_KRB5P: 4290 return (1); 4291 } 4292 return (0); 4293} 4294 4295/* 4296 * Perform an NFS request synchronously. 4297 */ 4298 4299int 4300nfs_request( 4301 nfsnode_t np, 4302 mount_t mp, /* used only if !np */ 4303 struct nfsm_chain *nmrest, 4304 int procnum, 4305 vfs_context_t ctx, 4306 struct nfsreq_secinfo_args *si, 4307 struct nfsm_chain *nmrepp, 4308 u_int64_t *xidp, 4309 int *status) 4310{ 4311 return nfs_request2(np, mp, nmrest, procnum, 4312 vfs_context_thread(ctx), vfs_context_ucred(ctx), 4313 si, 0, nmrepp, xidp, status); 4314} 4315 4316int 4317nfs_request2( 4318 nfsnode_t np, 4319 mount_t mp, /* used only if !np */ 4320 struct nfsm_chain *nmrest, 4321 int procnum, 4322 thread_t thd, 4323 kauth_cred_t cred, 4324 struct nfsreq_secinfo_args *si, 4325 int flags, 4326 struct nfsm_chain *nmrepp, 4327 u_int64_t *xidp, 4328 int *status) 4329{ 4330 struct nfsreq rq, *req = &rq; 4331 int error; 4332 4333 if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req))) 4334 return (error); 4335 req->r_flags |= (flags & (R_OPTMASK | R_SOFT)); 4336 if (si) 4337 req->r_secinfo = *si; 4338 4339 FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0); 4340 do { 4341 req->r_error = 0; 4342 req->r_flags &= ~R_RESTART; 4343 if ((error = nfs_request_add_header(req))) 4344 break; 4345 if (xidp) 4346 *xidp = req->r_xid; 4347 if ((error = nfs_request_send(req, 1))) 4348 break; 4349 nfs_request_wait(req); 4350 if ((error = nfs_request_finish(req, nmrepp, status))) 4351 break; 4352 } while (req->r_flags & R_RESTART); 4353 4354 FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error); 4355 nfs_request_rele(req); 4356 return (error); 4357} 4358 4359 4360/* 4361 * Set up a new null proc request to exchange GSS context tokens with the 4362 * server. Associate the context that we are setting up with the request that we 4363 * are sending. 4364 */ 4365 4366int 4367nfs_request_gss( 4368 mount_t mp, 4369 struct nfsm_chain *nmrest, 4370 thread_t thd, 4371 kauth_cred_t cred, 4372 int flags, 4373 struct nfs_gss_clnt_ctx *cp, /* Set to gss context to renew or setup */ 4374 struct nfsm_chain *nmrepp, 4375 int *status) 4376{ 4377 struct nfsreq rq, *req = &rq; 4378 int error, wait = 1; 4379 4380 if ((error = nfs_request_create(NULL, mp, nmrest, NFSPROC_NULL, thd, cred, &req))) 4381 return (error); 4382 req->r_flags |= (flags & R_OPTMASK); 4383 4384 if (cp == NULL) { 4385 printf("nfs_request_gss request has no context\n"); 4386 nfs_request_rele(req); 4387 return (NFSERR_EAUTH); 4388 } 4389 nfs_gss_clnt_ctx_ref(req, cp); 4390 4391 /* 4392 * Don't wait for a reply to a context destroy advisory 4393 * to avoid hanging on a dead server. 4394 */ 4395 if (cp->gss_clnt_proc == RPCSEC_GSS_DESTROY) 4396 wait = 0; 4397 4398 FSDBG_TOP(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, 0); 4399 do { 4400 req->r_error = 0; 4401 req->r_flags &= ~R_RESTART; 4402 if ((error = nfs_request_add_header(req))) 4403 break; 4404 4405 if ((error = nfs_request_send(req, wait))) 4406 break; 4407 if (!wait) 4408 break; 4409 4410 nfs_request_wait(req); 4411 if ((error = nfs_request_finish(req, nmrepp, status))) 4412 break; 4413 } while (req->r_flags & R_RESTART); 4414 4415 FSDBG_BOT(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, error); 4416 4417 nfs_gss_clnt_ctx_unref(req); 4418 nfs_request_rele(req); 4419 4420 return (error); 4421} 4422 4423/* 4424 * Create and start an asynchronous NFS request. 4425 */ 4426int 4427nfs_request_async( 4428 nfsnode_t np, 4429 mount_t mp, /* used only if !np */ 4430 struct nfsm_chain *nmrest, 4431 int procnum, 4432 thread_t thd, 4433 kauth_cred_t cred, 4434 struct nfsreq_secinfo_args *si, 4435 int flags, 4436 struct nfsreq_cbinfo *cb, 4437 struct nfsreq **reqp) 4438{ 4439 struct nfsreq *req; 4440 struct nfsmount *nmp; 4441 int error, sent; 4442 4443 error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp); 4444 req = *reqp; 4445 FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error); 4446 if (error) 4447 return (error); 4448 req->r_flags |= (flags & R_OPTMASK); 4449 req->r_flags |= R_ASYNC; 4450 if (si) 4451 req->r_secinfo = *si; 4452 if (cb) 4453 req->r_callback = *cb; 4454 error = nfs_request_add_header(req); 4455 if (!error) { 4456 req->r_flags |= R_WAITSENT; 4457 if (req->r_callback.rcb_func) 4458 nfs_request_ref(req, 0); 4459 error = nfs_request_send(req, 1); 4460 lck_mtx_lock(&req->r_mtx); 4461 if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) { 4462 /* make sure to wait until this async I/O request gets sent */ 4463 int slpflag = (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) ? PCATCH : 0; 4464 struct timespec ts = { 2, 0 }; 4465 while (!(req->r_flags & R_SENT)) { 4466 nmp = req->r_nmp; 4467 if ((req->r_flags & R_RESENDQ) && !nfs_mount_gone(nmp)) { 4468 lck_mtx_lock(&nmp->nm_lock); 4469 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) { 4470 /* 4471 * It's not going to get off the resend queue if we're in recovery. 4472 * So, just take it off ourselves. We could be holding mount state 4473 * busy and thus holding up the start of recovery. 4474 */ 4475 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); 4476 req->r_rchain.tqe_next = NFSREQNOLIST; 4477 if (req->r_flags & R_RESENDQ) 4478 req->r_flags &= ~R_RESENDQ; 4479 lck_mtx_unlock(&nmp->nm_lock); 4480 req->r_flags |= R_SENDING; 4481 lck_mtx_unlock(&req->r_mtx); 4482 error = nfs_send(req, 1); 4483 lck_mtx_lock(&req->r_mtx); 4484 if (error) 4485 break; 4486 continue; 4487 } 4488 lck_mtx_unlock(&nmp->nm_lock); 4489 } 4490 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) 4491 break; 4492 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts); 4493 slpflag = 0; 4494 } 4495 } 4496 sent = req->r_flags & R_SENT; 4497 lck_mtx_unlock(&req->r_mtx); 4498 if (error && req->r_callback.rcb_func && !sent) { 4499 nfs_request_rele(req); 4500 } 4501 } 4502 FSDBG(274, R_XID32(req->r_xid), np, procnum, error); 4503 if (error || req->r_callback.rcb_func) 4504 nfs_request_rele(req); 4505 4506 return (error); 4507} 4508 4509/* 4510 * Wait for and finish an asynchronous NFS request. 4511 */ 4512int 4513nfs_request_async_finish( 4514 struct nfsreq *req, 4515 struct nfsm_chain *nmrepp, 4516 u_int64_t *xidp, 4517 int *status) 4518{ 4519 int error = 0, asyncio = req->r_callback.rcb_func ? 1 : 0; 4520 struct nfsmount *nmp; 4521 4522 lck_mtx_lock(&req->r_mtx); 4523 if (!asyncio) 4524 req->r_flags |= R_ASYNCWAIT; 4525 while (req->r_flags & R_RESENDQ) { /* wait until the request is off the resend queue */ 4526 struct timespec ts = { 2, 0 }; 4527 4528 if ((nmp = req->r_nmp)) { 4529 lck_mtx_lock(&nmp->nm_lock); 4530 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) { 4531 /* 4532 * It's not going to get off the resend queue if we're in recovery. 4533 * So, just take it off ourselves. We could be holding mount state 4534 * busy and thus holding up the start of recovery. 4535 */ 4536 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); 4537 req->r_rchain.tqe_next = NFSREQNOLIST; 4538 if (req->r_flags & R_RESENDQ) 4539 req->r_flags &= ~R_RESENDQ; 4540 lck_mtx_unlock(&nmp->nm_lock); 4541 break; 4542 } 4543 lck_mtx_unlock(&nmp->nm_lock); 4544 } 4545 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) 4546 break; 4547 msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait", &ts); 4548 } 4549 lck_mtx_unlock(&req->r_mtx); 4550 4551 if (!error) { 4552 nfs_request_wait(req); 4553 error = nfs_request_finish(req, nmrepp, status); 4554 } 4555 4556 while (!error && (req->r_flags & R_RESTART)) { 4557 if (asyncio && req->r_resendtime) { /* send later */ 4558 lck_mtx_lock(&req->r_mtx); 4559 nfs_asyncio_resend(req); 4560 lck_mtx_unlock(&req->r_mtx); 4561 return (EINPROGRESS); 4562 } 4563 req->r_error = 0; 4564 req->r_flags &= ~R_RESTART; 4565 if ((error = nfs_request_add_header(req))) 4566 break; 4567 if ((error = nfs_request_send(req, !asyncio))) 4568 break; 4569 if (asyncio) 4570 return (EINPROGRESS); 4571 nfs_request_wait(req); 4572 if ((error = nfs_request_finish(req, nmrepp, status))) 4573 break; 4574 } 4575 if (xidp) 4576 *xidp = req->r_xid; 4577 4578 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error); 4579 nfs_request_rele(req); 4580 return (error); 4581} 4582 4583/* 4584 * Cancel a pending asynchronous NFS request. 4585 */ 4586void 4587nfs_request_async_cancel(struct nfsreq *req) 4588{ 4589 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E); 4590 nfs_request_rele(req); 4591} 4592 4593/* 4594 * Flag a request as being terminated. 4595 */ 4596void 4597nfs_softterm(struct nfsreq *req) 4598{ 4599 struct nfsmount *nmp = req->r_nmp; 4600 req->r_flags |= R_SOFTTERM; 4601 req->r_error = ETIMEDOUT; 4602 if (!(req->r_flags & R_CWND) || nfs_mount_gone(nmp)) 4603 return; 4604 /* update congestion window */ 4605 req->r_flags &= ~R_CWND; 4606 lck_mtx_lock(&nmp->nm_lock); 4607 FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd); 4608 nmp->nm_sent -= NFS_CWNDSCALE; 4609 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { 4610 /* congestion window is open, poke the cwnd queue */ 4611 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); 4612 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); 4613 req2->r_cchain.tqe_next = NFSREQNOLIST; 4614 wakeup(req2); 4615 } 4616 lck_mtx_unlock(&nmp->nm_lock); 4617} 4618 4619/* 4620 * Ensure req isn't in use by the timer, then dequeue it. 4621 */ 4622void 4623nfs_reqdequeue(struct nfsreq *req) 4624{ 4625 lck_mtx_lock(nfs_request_mutex); 4626 while (req->r_lflags & RL_BUSY) { 4627 req->r_lflags |= RL_WAITING; 4628 msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL); 4629 } 4630 if (req->r_lflags & RL_QUEUED) { 4631 TAILQ_REMOVE(&nfs_reqq, req, r_chain); 4632 req->r_lflags &= ~RL_QUEUED; 4633 } 4634 lck_mtx_unlock(nfs_request_mutex); 4635} 4636 4637/* 4638 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not 4639 * free()'d out from under it. 4640 */ 4641void 4642nfs_reqbusy(struct nfsreq *req) 4643{ 4644 if (req->r_lflags & RL_BUSY) 4645 panic("req locked"); 4646 req->r_lflags |= RL_BUSY; 4647} 4648 4649/* 4650 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied. 4651 */ 4652struct nfsreq * 4653nfs_reqnext(struct nfsreq *req) 4654{ 4655 struct nfsreq * nextreq; 4656 4657 if (req == NULL) 4658 return (NULL); 4659 /* 4660 * We need to get and busy the next req before signalling the 4661 * current one, otherwise wakeup() may block us and we'll race to 4662 * grab the next req. 4663 */ 4664 nextreq = TAILQ_NEXT(req, r_chain); 4665 if (nextreq != NULL) 4666 nfs_reqbusy(nextreq); 4667 /* unbusy and signal. */ 4668 req->r_lflags &= ~RL_BUSY; 4669 if (req->r_lflags & RL_WAITING) { 4670 req->r_lflags &= ~RL_WAITING; 4671 wakeup(&req->r_lflags); 4672 } 4673 return (nextreq); 4674} 4675 4676/* 4677 * NFS request queue timer routine 4678 * 4679 * Scan the NFS request queue for any requests that have timed out. 4680 * 4681 * Alert the system of unresponsive servers. 4682 * Mark expired requests on soft mounts as terminated. 4683 * For UDP, mark/signal requests for retransmission. 4684 */ 4685void 4686nfs_request_timer(__unused void *param0, __unused void *param1) 4687{ 4688 struct nfsreq *req; 4689 struct nfsmount *nmp; 4690 int timeo, maxtime, finish_asyncio, error; 4691 struct timeval now; 4692 TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue; 4693 4694restart: 4695 lck_mtx_lock(nfs_request_mutex); 4696 req = TAILQ_FIRST(&nfs_reqq); 4697 if (req == NULL) { /* no requests - turn timer off */ 4698 nfs_request_timer_on = 0; 4699 lck_mtx_unlock(nfs_request_mutex); 4700 return; 4701 } 4702 4703 nfs_reqbusy(req); 4704 TAILQ_INIT(&nfs_mount_poke_queue); 4705 4706 microuptime(&now); 4707 for ( ; req != NULL ; req = nfs_reqnext(req)) { 4708 nmp = req->r_nmp; 4709 if (nmp == NULL) { 4710 NFS_SOCK_DBG("Found a request with out a mount!\n"); 4711 continue; 4712 } 4713 if (req->r_error || req->r_nmrep.nmc_mhead) 4714 continue; 4715 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) { 4716 if (req->r_callback.rcb_func != NULL) { 4717 /* async I/O RPC needs to be finished */ 4718 lck_mtx_lock(&req->r_mtx); 4719 req->r_error = error; 4720 finish_asyncio = !(req->r_flags & R_WAITSENT); 4721 wakeup(req); 4722 lck_mtx_unlock(&req->r_mtx); 4723 if (finish_asyncio) 4724 nfs_asyncio_finish(req); 4725 } 4726 continue; 4727 } 4728 4729 lck_mtx_lock(&req->r_mtx); 4730 4731 if (nmp->nm_tprintf_initial_delay && 4732 ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) && 4733 ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) { 4734 req->r_lastmsg = now.tv_sec; 4735 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO, 4736 "not responding", 1); 4737 req->r_flags |= R_TPRINTFMSG; 4738 lck_mtx_lock(&nmp->nm_lock); 4739 if (!(nmp->nm_state & NFSSTA_MOUNTED)) { 4740 lck_mtx_unlock(&nmp->nm_lock); 4741 /* we're not yet completely mounted and */ 4742 /* we can't complete an RPC, so we fail */ 4743 OSAddAtomic64(1, &nfsstats.rpctimeouts); 4744 nfs_softterm(req); 4745 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT)); 4746 wakeup(req); 4747 lck_mtx_unlock(&req->r_mtx); 4748 if (finish_asyncio) 4749 nfs_asyncio_finish(req); 4750 continue; 4751 } 4752 lck_mtx_unlock(&nmp->nm_lock); 4753 } 4754 4755 /* 4756 * Put a reasonable limit on the maximum timeout, 4757 * and reduce that limit when soft mounts get timeouts or are in reconnect. 4758 */ 4759 if (!(NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && !nfs_can_squish(nmp)) 4760 maxtime = NFS_MAXTIMEO; 4761 else if ((req->r_flags & (R_SETUP|R_RECOVER)) || 4762 ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8))) 4763 maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2; 4764 else 4765 maxtime = NFS_MINTIMEO/4; 4766 4767 /* 4768 * Check for request timeout. 4769 */ 4770 if (req->r_rtt >= 0) { 4771 req->r_rtt++; 4772 lck_mtx_lock(&nmp->nm_lock); 4773 if (req->r_flags & R_RESENDERR) { 4774 /* with resend errors, retry every few seconds */ 4775 timeo = 4*hz; 4776 } else { 4777 if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL) 4778 timeo = NFS_MINIDEMTIMEO; // gss context setup 4779 else if (NMFLAG(nmp, DUMBTIMER)) 4780 timeo = nmp->nm_timeo; 4781 else 4782 timeo = NFS_RTO(nmp, proct[req->r_procnum]); 4783 4784 /* ensure 62.5 ms floor */ 4785 while (16 * timeo < hz) 4786 timeo *= 2; 4787 if (nmp->nm_timeouts > 0) 4788 timeo *= nfs_backoff[nmp->nm_timeouts - 1]; 4789 } 4790 /* limit timeout to max */ 4791 if (timeo > maxtime) 4792 timeo = maxtime; 4793 if (req->r_rtt <= timeo) { 4794 NFS_SOCK_DBG("nfs timeout: req time %d and timeo is %d continue\n", req->r_rtt, timeo); 4795 lck_mtx_unlock(&nmp->nm_lock); 4796 lck_mtx_unlock(&req->r_mtx); 4797 continue; 4798 } 4799 /* The request has timed out */ 4800 NFS_SOCK_DBG("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n", 4801 req->r_procnum, proct[req->r_procnum], 4802 req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts, 4803 (now.tv_sec - req->r_start)*NFS_HZ, maxtime); 4804 if (nmp->nm_timeouts < 8) 4805 nmp->nm_timeouts++; 4806 if (nfs_mount_check_dead_timeout(nmp)) { 4807 /* Unbusy this request */ 4808 req->r_lflags &= ~RL_BUSY; 4809 if (req->r_lflags & RL_WAITING) { 4810 req->r_lflags &= ~RL_WAITING; 4811 wakeup(&req->r_lflags); 4812 } 4813 lck_mtx_unlock(&req->r_mtx); 4814 4815 /* No need to poke this mount */ 4816 if (nmp->nm_sockflags & NMSOCK_POKE) { 4817 nmp->nm_sockflags &= ~NMSOCK_POKE; 4818 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq); 4819 } 4820 /* Release our lock state, so we can become a zombie */ 4821 lck_mtx_unlock(nfs_request_mutex); 4822 4823 /* 4824 * Note nfs_mount_make zombie(nmp) must be 4825 * called with nm_lock held. After doing some 4826 * work we release nm_lock in 4827 * nfs_make_mount_zombie with out acquiring any 4828 * other locks. (Later, in nfs_mount_zombie we 4829 * will acquire nfs_request_mutex, r_mtx, 4830 * nm_lock in that order). So we should not be 4831 * introducing deadlock here. We take a reference 4832 * on the mount so that its still there when we 4833 * release the lock. 4834 */ 4835 nmp->nm_ref++; 4836 nfs_mount_make_zombie(nmp); 4837 lck_mtx_unlock(&nmp->nm_lock); 4838 nfs_mount_rele(nmp); 4839 4840 /* 4841 * All the request for this mount have now been 4842 * removed from the request queue. Restart to 4843 * process the remaining mounts 4844 */ 4845 goto restart; 4846 } 4847 4848 /* if it's been a few seconds, try poking the socket */ 4849 if ((nmp->nm_sotype == SOCK_STREAM) && 4850 ((now.tv_sec - req->r_start) >= 3) && 4851 !(nmp->nm_sockflags & (NMSOCK_POKE|NMSOCK_UNMOUNT)) && 4852 (nmp->nm_sockflags & NMSOCK_READY)) { 4853 nmp->nm_sockflags |= NMSOCK_POKE; 4854 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq); 4855 } 4856 lck_mtx_unlock(&nmp->nm_lock); 4857 } 4858 4859 /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */ 4860 if ((NMFLAG(nmp, SOFT) || (req->r_flags & (R_SETUP|R_RECOVER|R_SOFT))) && 4861 ((req->r_rexmit >= req->r_retry) || /* too many */ 4862 ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */ 4863 OSAddAtomic64(1, &nfsstats.rpctimeouts); 4864 lck_mtx_lock(&nmp->nm_lock); 4865 if (!(nmp->nm_state & NFSSTA_TIMEO)) { 4866 lck_mtx_unlock(&nmp->nm_lock); 4867 /* make sure we note the unresponsive server */ 4868 /* (maxtime may be less than tprintf delay) */ 4869 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO, 4870 "not responding", 1); 4871 req->r_lastmsg = now.tv_sec; 4872 req->r_flags |= R_TPRINTFMSG; 4873 } else { 4874 lck_mtx_unlock(&nmp->nm_lock); 4875 } 4876 if (req->r_flags & R_NOINTR) { 4877 /* don't terminate nointr requests on timeout */ 4878 lck_mtx_unlock(&req->r_mtx); 4879 continue; 4880 } 4881 NFS_SOCK_DBG("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n", 4882 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, 4883 now.tv_sec - req->r_start); 4884 nfs_softterm(req); 4885 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT)); 4886 wakeup(req); 4887 lck_mtx_unlock(&req->r_mtx); 4888 if (finish_asyncio) 4889 nfs_asyncio_finish(req); 4890 continue; 4891 } 4892 4893 /* for TCP, only resend if explicitly requested */ 4894 if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) { 4895 if (++req->r_rexmit > NFS_MAXREXMIT) 4896 req->r_rexmit = NFS_MAXREXMIT; 4897 req->r_rtt = 0; 4898 lck_mtx_unlock(&req->r_mtx); 4899 continue; 4900 } 4901 4902 /* 4903 * The request needs to be (re)sent. Kick the requester to resend it. 4904 * (unless it's already marked as needing a resend) 4905 */ 4906 if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) { 4907 lck_mtx_unlock(&req->r_mtx); 4908 continue; 4909 } 4910 NFS_SOCK_DBG("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n", 4911 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); 4912 req->r_flags |= R_MUSTRESEND; 4913 req->r_rtt = -1; 4914 wakeup(req); 4915 if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) 4916 nfs_asyncio_resend(req); 4917 lck_mtx_unlock(&req->r_mtx); 4918 } 4919 4920 lck_mtx_unlock(nfs_request_mutex); 4921 4922 /* poke any sockets */ 4923 while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) { 4924 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq); 4925 nfs_sock_poke(nmp); 4926 } 4927 4928 nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY); 4929} 4930 4931/* 4932 * check a thread's proc for the "noremotehang" flag. 4933 */ 4934int 4935nfs_noremotehang(thread_t thd) 4936{ 4937 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL; 4938 return (p && proc_noremotehang(p)); 4939} 4940 4941/* 4942 * Test for a termination condition pending on the process. 4943 * This is used to determine if we need to bail on a mount. 4944 * ETIMEDOUT is returned if there has been a soft timeout. 4945 * EINTR is returned if there is a signal pending that is not being ignored 4946 * and the mount is interruptable, or if we are a thread that is in the process 4947 * of cancellation (also SIGKILL posted). 4948 */ 4949extern int sigprop[NSIG+1]; 4950int 4951nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked) 4952{ 4953 proc_t p; 4954 int error = 0; 4955 4956 if (!nmp) 4957 return (ENXIO); 4958 4959 if (req && (req->r_flags & R_SOFTTERM)) 4960 return (ETIMEDOUT); /* request has been terminated. */ 4961 if (req && (req->r_flags & R_NOINTR)) 4962 thd = NULL; /* don't check for signal on R_NOINTR */ 4963 4964 if (!nmplocked) 4965 lck_mtx_lock(&nmp->nm_lock); 4966 if (nmp->nm_state & NFSSTA_FORCE) { 4967 /* If a force unmount is in progress then fail. */ 4968 error = EIO; 4969 } else if (vfs_isforce(nmp->nm_mountp)) { 4970 /* Someone is unmounting us, go soft and mark it. */ 4971 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT); 4972 nmp->nm_state |= NFSSTA_FORCE; 4973 } 4974 4975 /* Check if the mount is marked dead. */ 4976 if (!error && (nmp->nm_state & NFSSTA_DEAD)) 4977 error = ENXIO; 4978 4979 /* 4980 * If the mount is hung and we've requested not to hang 4981 * on remote filesystems, then bail now. 4982 */ 4983 if (current_proc() != kernproc && 4984 !error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd)) 4985 error = EIO; 4986 4987 if (!nmplocked) 4988 lck_mtx_unlock(&nmp->nm_lock); 4989 if (error) 4990 return (error); 4991 4992 /* may not have a thread for async I/O */ 4993 if (thd == NULL || current_proc() == kernproc) 4994 return (0); 4995 4996 /* 4997 * Check if the process is aborted, but don't interrupt if we 4998 * were killed by a signal and this is the exiting thread which 4999 * is attempting to dump core. 5000 */ 5001 if (((p = current_proc()) != kernproc) && current_thread_aborted() && 5002 (!(p->p_acflag & AXSIG) || (p->exit_thread != current_thread()) || 5003 (p->p_sigacts == NULL) || 5004 (p->p_sigacts->ps_sig < 1) || (p->p_sigacts->ps_sig > NSIG) || 5005 !(sigprop[p->p_sigacts->ps_sig] & SA_CORE))) 5006 return (EINTR); 5007 5008 /* mask off thread and process blocked signals. */ 5009 if (NMFLAG(nmp, INTR) && ((p = get_bsdthreadtask_info(thd))) && 5010 proc_pendingsignals(p, NFSINT_SIGMASK)) 5011 return (EINTR); 5012 return (0); 5013} 5014 5015/* 5016 * Lock a socket against others. 5017 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 5018 * and also to avoid race conditions between the processes with nfs requests 5019 * in progress when a reconnect is necessary. 5020 */ 5021int 5022nfs_sndlock(struct nfsreq *req) 5023{ 5024 struct nfsmount *nmp = req->r_nmp; 5025 int *statep; 5026 int error = 0, slpflag = 0; 5027 struct timespec ts = { 0, 0 }; 5028 5029 if (nfs_mount_gone(nmp)) 5030 return (ENXIO); 5031 5032 lck_mtx_lock(&nmp->nm_lock); 5033 statep = &nmp->nm_state; 5034 5035 if (NMFLAG(nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) 5036 slpflag = PCATCH; 5037 while (*statep & NFSSTA_SNDLOCK) { 5038 if ((error = nfs_sigintr(nmp, req, req->r_thread, 1))) 5039 break; 5040 *statep |= NFSSTA_WANTSND; 5041 if (nfs_noremotehang(req->r_thread)) 5042 ts.tv_sec = 1; 5043 msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck", &ts); 5044 if (slpflag == PCATCH) { 5045 slpflag = 0; 5046 ts.tv_sec = 2; 5047 } 5048 } 5049 if (!error) 5050 *statep |= NFSSTA_SNDLOCK; 5051 lck_mtx_unlock(&nmp->nm_lock); 5052 return (error); 5053} 5054 5055/* 5056 * Unlock the stream socket for others. 5057 */ 5058void 5059nfs_sndunlock(struct nfsreq *req) 5060{ 5061 struct nfsmount *nmp = req->r_nmp; 5062 int *statep, wake = 0; 5063 5064 if (!nmp) 5065 return; 5066 lck_mtx_lock(&nmp->nm_lock); 5067 statep = &nmp->nm_state; 5068 if ((*statep & NFSSTA_SNDLOCK) == 0) 5069 panic("nfs sndunlock"); 5070 *statep &= ~(NFSSTA_SNDLOCK|NFSSTA_SENDING); 5071 if (*statep & NFSSTA_WANTSND) { 5072 *statep &= ~NFSSTA_WANTSND; 5073 wake = 1; 5074 } 5075 lck_mtx_unlock(&nmp->nm_lock); 5076 if (wake) 5077 wakeup(statep); 5078} 5079 5080int 5081nfs_aux_request( 5082 struct nfsmount *nmp, 5083 thread_t thd, 5084 struct sockaddr *saddr, 5085 socket_t so, 5086 int sotype, 5087 mbuf_t mreq, 5088 uint32_t xid, 5089 int bindresv, 5090 int timeo, 5091 struct nfsm_chain *nmrep) 5092{ 5093 int error = 0, on = 1, try, sendat = 2, soproto, recv, optlen, restoreto = 0; 5094 socket_t newso = NULL; 5095 struct sockaddr_storage ss; 5096 struct timeval orig_rcvto, orig_sndto, tv = { 1, 0 }; 5097 mbuf_t m, mrep = NULL; 5098 struct msghdr msg; 5099 uint32_t rxid = 0, reply = 0, reply_status, rejected_status; 5100 uint32_t verf_type, verf_len, accepted_status; 5101 size_t readlen, sentlen; 5102 struct nfs_rpc_record_state nrrs; 5103 5104 if (!so) { 5105 /* create socket and set options */ 5106 soproto = (sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP; 5107 if ((error = sock_socket(saddr->sa_family, sotype, soproto, NULL, NULL, &newso))) 5108 goto nfsmout; 5109 5110 if (bindresv) { 5111 int level = (saddr->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6; 5112 int optname = (saddr->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE; 5113 int portrange = IP_PORTRANGE_LOW; 5114 error = sock_setsockopt(newso, level, optname, &portrange, sizeof(portrange)); 5115 nfsmout_if(error); 5116 ss.ss_len = saddr->sa_len; 5117 ss.ss_family = saddr->sa_family; 5118 if (ss.ss_family == AF_INET) { 5119 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY; 5120 ((struct sockaddr_in*)&ss)->sin_port = htons(0); 5121 } else if (ss.ss_family == AF_INET6) { 5122 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any; 5123 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0); 5124 } else { 5125 error = EINVAL; 5126 } 5127 if (!error) 5128 error = sock_bind(newso, (struct sockaddr *)&ss); 5129 nfsmout_if(error); 5130 } 5131 5132 if (sotype == SOCK_STREAM) { 5133# define NFS_AUX_CONNECTION_TIMEOUT 4 /* 4 second timeout for connections */ 5134 int count = 0; 5135 5136 error = sock_connect(newso, saddr, MSG_DONTWAIT); 5137 if (error == EINPROGRESS) 5138 error = 0; 5139 nfsmout_if(error); 5140 5141 while ((error = sock_connectwait(newso, &tv)) == EINPROGRESS) { 5142 /* After NFS_AUX_CONNECTION_TIMEOUT bail */ 5143 if (++count >= NFS_AUX_CONNECTION_TIMEOUT) { 5144 error = ETIMEDOUT; 5145 break; 5146 } 5147 } 5148 nfsmout_if(error); 5149 } 5150 if (((error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) || 5151 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)))) || 5152 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on))))) 5153 goto nfsmout; 5154 so = newso; 5155 } else { 5156 /* make sure socket is using a one second timeout in this function */ 5157 optlen = sizeof(orig_rcvto); 5158 error = sock_getsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, &optlen); 5159 if (!error) { 5160 optlen = sizeof(orig_sndto); 5161 error = sock_getsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, &optlen); 5162 } 5163 if (!error) { 5164 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); 5165 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); 5166 restoreto = 1; 5167 } 5168 } 5169 5170 if (sotype == SOCK_STREAM) { 5171 sendat = 0; /* we only resend the request for UDP */ 5172 nfs_rpc_record_state_init(&nrrs); 5173 } 5174 5175 for (try=0; try < timeo; try++) { 5176 if ((error = nfs_sigintr(nmp, NULL, !try ? NULL : thd, 0))) 5177 break; 5178 if (!try || (try == sendat)) { 5179 /* send the request (resending periodically for UDP) */ 5180 if ((error = mbuf_copym(mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m))) 5181 goto nfsmout; 5182 bzero(&msg, sizeof(msg)); 5183 if ((sotype == SOCK_DGRAM) && !sock_isconnected(so)) { 5184 msg.msg_name = saddr; 5185 msg.msg_namelen = saddr->sa_len; 5186 } 5187 if ((error = sock_sendmbuf(so, &msg, m, 0, &sentlen))) 5188 goto nfsmout; 5189 sendat *= 2; 5190 if (sendat > 30) 5191 sendat = 30; 5192 } 5193 /* wait for the response */ 5194 if (sotype == SOCK_STREAM) { 5195 /* try to read (more of) record */ 5196 error = nfs_rpc_record_read(so, &nrrs, 0, &recv, &mrep); 5197 /* if we don't have the whole record yet, we'll keep trying */ 5198 } else { 5199 readlen = 1<<18; 5200 bzero(&msg, sizeof(msg)); 5201 error = sock_receivembuf(so, &msg, &mrep, 0, &readlen); 5202 } 5203 if (error == EWOULDBLOCK) 5204 continue; 5205 nfsmout_if(error); 5206 /* parse the response */ 5207 nfsm_chain_dissect_init(error, nmrep, mrep); 5208 nfsm_chain_get_32(error, nmrep, rxid); 5209 nfsm_chain_get_32(error, nmrep, reply); 5210 nfsmout_if(error); 5211 if ((rxid != xid) || (reply != RPC_REPLY)) 5212 error = EBADRPC; 5213 nfsm_chain_get_32(error, nmrep, reply_status); 5214 nfsmout_if(error); 5215 if (reply_status == RPC_MSGDENIED) { 5216 nfsm_chain_get_32(error, nmrep, rejected_status); 5217 nfsmout_if(error); 5218 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES; 5219 goto nfsmout; 5220 } 5221 nfsm_chain_get_32(error, nmrep, verf_type); /* verifier flavor */ 5222 nfsm_chain_get_32(error, nmrep, verf_len); /* verifier length */ 5223 nfsmout_if(error); 5224 if (verf_len) 5225 nfsm_chain_adv(error, nmrep, nfsm_rndup(verf_len)); 5226 nfsm_chain_get_32(error, nmrep, accepted_status); 5227 nfsmout_if(error); 5228 switch (accepted_status) { 5229 case RPC_SUCCESS: 5230 error = 0; 5231 break; 5232 case RPC_PROGUNAVAIL: 5233 error = EPROGUNAVAIL; 5234 break; 5235 case RPC_PROGMISMATCH: 5236 error = EPROGMISMATCH; 5237 break; 5238 case RPC_PROCUNAVAIL: 5239 error = EPROCUNAVAIL; 5240 break; 5241 case RPC_GARBAGE: 5242 error = EBADRPC; 5243 break; 5244 case RPC_SYSTEM_ERR: 5245 default: 5246 error = EIO; 5247 break; 5248 } 5249 break; 5250 } 5251nfsmout: 5252 if (restoreto) { 5253 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, sizeof(tv)); 5254 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, sizeof(tv)); 5255 } 5256 if (newso) { 5257 sock_shutdown(newso, SHUT_RDWR); 5258 sock_close(newso); 5259 } 5260 mbuf_freem(mreq); 5261 return (error); 5262} 5263 5264int 5265nfs_portmap_lookup( 5266 struct nfsmount *nmp, 5267 vfs_context_t ctx, 5268 struct sockaddr *sa, 5269 socket_t so, 5270 uint32_t protocol, 5271 uint32_t vers, 5272 uint32_t ipproto, 5273 int timeo) 5274{ 5275 thread_t thd = vfs_context_thread(ctx); 5276 kauth_cred_t cred = vfs_context_ucred(ctx); 5277 struct sockaddr_storage ss; 5278 struct sockaddr *saddr = (struct sockaddr*)&ss; 5279 struct nfsm_chain nmreq, nmrep; 5280 mbuf_t mreq; 5281 int error = 0, ip, pmprog, pmvers, pmproc, ualen = 0; 5282 uint32_t port; 5283 uint64_t xid = 0; 5284 char uaddr[MAX_IPv6_STR_LEN+16]; 5285 5286 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len)); 5287 if (saddr->sa_family == AF_INET) { 5288 ip = 4; 5289 pmprog = PMAPPROG; 5290 pmvers = PMAPVERS; 5291 pmproc = PMAPPROC_GETPORT; 5292 } else if (saddr->sa_family == AF_INET6) { 5293 ip = 6; 5294 pmprog = RPCBPROG; 5295 pmvers = RPCBVERS4; 5296 pmproc = RPCBPROC_GETVERSADDR; 5297 } else { 5298 return (EINVAL); 5299 } 5300 nfsm_chain_null(&nmreq); 5301 nfsm_chain_null(&nmrep); 5302 5303tryagain: 5304 /* send portmapper request to get port/uaddr */ 5305 if (ip == 4) 5306 ((struct sockaddr_in*)saddr)->sin_port = htons(PMAPPORT); 5307 else 5308 ((struct sockaddr_in6*)saddr)->sin6_port = htons(PMAPPORT); 5309 nfsm_chain_build_alloc_init(error, &nmreq, 8*NFSX_UNSIGNED); 5310 nfsm_chain_add_32(error, &nmreq, protocol); 5311 nfsm_chain_add_32(error, &nmreq, vers); 5312 if (ip == 4) { 5313 nfsm_chain_add_32(error, &nmreq, ipproto); 5314 nfsm_chain_add_32(error, &nmreq, 0); 5315 } else { 5316 if (ipproto == IPPROTO_TCP) 5317 nfsm_chain_add_string(error, &nmreq, "tcp6", 4); 5318 else 5319 nfsm_chain_add_string(error, &nmreq, "udp6", 4); 5320 nfsm_chain_add_string(error, &nmreq, "", 0); /* uaddr */ 5321 nfsm_chain_add_string(error, &nmreq, "", 0); /* owner */ 5322 } 5323 nfsm_chain_build_done(error, &nmreq); 5324 nfsmout_if(error); 5325 error = nfsm_rpchead2(nmp, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM, 5326 pmprog, pmvers, pmproc, RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, 5327 &xid, &mreq); 5328 nfsmout_if(error); 5329 nmreq.nmc_mhead = NULL; 5330 error = nfs_aux_request(nmp, thd, saddr, so, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM, 5331 mreq, R_XID32(xid), 0, timeo, &nmrep); 5332 5333 /* grab port from portmap response */ 5334 if (ip == 4) { 5335 nfsm_chain_get_32(error, &nmrep, port); 5336 if (!error) 5337 ((struct sockaddr_in*)sa)->sin_port = htons(port); 5338 } else { 5339 /* get uaddr string and convert to sockaddr */ 5340 nfsm_chain_get_32(error, &nmrep, ualen); 5341 if (!error) { 5342 if (ualen > ((int)sizeof(uaddr)-1)) 5343 error = EIO; 5344 if (ualen < 1) { 5345 /* program is not available, just return a zero port */ 5346 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len)); 5347 ((struct sockaddr_in6*)saddr)->sin6_port = htons(0); 5348 } else { 5349 nfsm_chain_get_opaque(error, &nmrep, ualen, uaddr); 5350 if (!error) { 5351 uaddr[ualen] = '\0'; 5352 if (!nfs_uaddr2sockaddr(uaddr, saddr)) 5353 error = EIO; 5354 } 5355 } 5356 } 5357 if ((error == EPROGMISMATCH) || (error == EPROCUNAVAIL) || (error == EIO) || (error == EBADRPC)) { 5358 /* remote doesn't support rpcbind version or proc (or we couldn't parse uaddr) */ 5359 if (pmvers == RPCBVERS4) { 5360 /* fall back to v3 and GETADDR */ 5361 pmvers = RPCBVERS3; 5362 pmproc = RPCBPROC_GETADDR; 5363 nfsm_chain_cleanup(&nmreq); 5364 nfsm_chain_cleanup(&nmrep); 5365 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len)); 5366 xid = 0; 5367 error = 0; 5368 goto tryagain; 5369 } 5370 } 5371 if (!error) 5372 bcopy(saddr, sa, min(saddr->sa_len, sa->sa_len)); 5373 } 5374nfsmout: 5375 nfsm_chain_cleanup(&nmreq); 5376 nfsm_chain_cleanup(&nmrep); 5377 return (error); 5378} 5379 5380int 5381nfs_msg(thread_t thd, 5382 const char *server, 5383 const char *msg, 5384 int error) 5385{ 5386 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL; 5387 tpr_t tpr; 5388 5389 if (p) 5390 tpr = tprintf_open(p); 5391 else 5392 tpr = NULL; 5393 if (error) 5394 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error); 5395 else 5396 tprintf(tpr, "nfs server %s: %s\n", server, msg); 5397 tprintf_close(tpr); 5398 return (0); 5399} 5400 5401#define NFS_SQUISH_MOBILE_ONLY 0x0001 /* Squish mounts only on mobile machines */ 5402#define NFS_SQUISH_AUTOMOUNTED_ONLY 0x0002 /* Squish mounts only if the are automounted */ 5403#define NFS_SQUISH_SOFT 0x0004 /* Treat all soft mounts as though they were on a mobile machine */ 5404#define NFS_SQUISH_QUICK 0x0008 /* Try to squish mounts more quickly. */ 5405#define NFS_SQUISH_SHUTDOWN 0x1000 /* Squish all mounts on shutdown. Currently not implemented */ 5406 5407uint32_t nfs_squishy_flags = NFS_SQUISH_MOBILE_ONLY | NFS_SQUISH_AUTOMOUNTED_ONLY | NFS_SQUISH_QUICK; 5408int32_t nfs_is_mobile; 5409 5410#define NFS_SQUISHY_DEADTIMEOUT 8 /* Dead time out for squishy mounts */ 5411#define NFS_SQUISHY_QUICKTIMEOUT 4 /* Quicker dead time out when nfs_squish_flags NFS_SQUISH_QUICK bit is set*/ 5412 5413/* 5414 * Could this mount be squished? 5415 */ 5416int 5417nfs_can_squish(struct nfsmount *nmp) 5418{ 5419 uint64_t flags = vfs_flags(nmp->nm_mountp); 5420 int softsquish = ((nfs_squishy_flags & NFS_SQUISH_SOFT) & NMFLAG(nmp, SOFT)); 5421 5422 if (!softsquish && (nfs_squishy_flags & NFS_SQUISH_MOBILE_ONLY) && nfs_is_mobile == 0) 5423 return (0); 5424 5425 if ((nfs_squishy_flags & NFS_SQUISH_AUTOMOUNTED_ONLY) && (flags & MNT_AUTOMOUNTED) == 0) 5426 return (0); 5427 5428 return (1); 5429} 5430 5431/* 5432 * NFS mounts default to "rw,hard" - but frequently on mobile clients 5433 * the mount may become "not responding". It's desirable to be able 5434 * to unmount these dead mounts, but only if there is no risk of 5435 * losing data or crashing applications. A "squishy" NFS mount is one 5436 * that can be force unmounted with little risk of harm. 5437 * 5438 * nfs_is_squishy checks if a mount is in a squishy state. A mount is 5439 * in a squishy state iff it is allowed to be squishy and there are no 5440 * dirty pages and there are no mmapped files and there are no files 5441 * open for write. Mounts are allowed to be squishy is controlled by 5442 * the settings of the nfs_squishy_flags and its mobility state. These 5443 * flags can be set by sysctls. 5444 * 5445 * If nfs_is_squishy determines that we are in a squishy state we will 5446 * update the current dead timeout to at least NFS_SQUISHY_DEADTIMEOUT 5447 * (or NFS_SQUISHY_QUICKTIMEOUT if NFS_SQUISH_QUICK is set) (see 5448 * above) or 1/8th of the mount's nm_deadtimeout value, otherwise we just 5449 * update the current dead timeout with the mount's nm_deadtimeout 5450 * value set at mount time. 5451 * 5452 * Assumes that nm_lock is held. 5453 * 5454 * Note this routine is racey, but its effects on setting the 5455 * dead timeout only have effects when we're in trouble and are likely 5456 * to stay that way. Since by default its only for automounted 5457 * volumes on mobile machines; this is a reasonable trade off between 5458 * data integrity and user experience. It can be disabled or set via 5459 * nfs.conf file. 5460 */ 5461 5462int 5463nfs_is_squishy(struct nfsmount *nmp) 5464{ 5465 mount_t mp = nmp->nm_mountp; 5466 int squishy = 0; 5467 int timeo = (nfs_squishy_flags & NFS_SQUISH_QUICK) ? NFS_SQUISHY_QUICKTIMEOUT : NFS_SQUISHY_DEADTIMEOUT; 5468 5469 NFS_SOCK_DBG("%s: nm_curdeadtimeout = %d, nfs_is_mobile = %d\n", 5470 vfs_statfs(mp)->f_mntfromname, nmp->nm_curdeadtimeout, nfs_is_mobile); 5471 5472 if (!nfs_can_squish(nmp)) 5473 goto out; 5474 5475 timeo = (nmp->nm_deadtimeout > timeo) ? max(nmp->nm_deadtimeout/8, timeo) : timeo; 5476 NFS_SOCK_DBG("nm_writers = %d nm_mappers = %d timeo = %d\n", nmp->nm_writers, nmp->nm_mappers, timeo); 5477 5478 if (nmp->nm_writers == 0 && nmp->nm_mappers == 0) { 5479 uint64_t flags = mp ? vfs_flags(mp) : 0; 5480 squishy = 1; 5481 5482 /* 5483 * Walk the nfs nodes and check for dirty buffers it we're not 5484 * RDONLY and we've not already been declared as squishy since 5485 * this can be a bit expensive. 5486 */ 5487 if (!(flags & MNT_RDONLY) && !(nmp->nm_state & NFSSTA_SQUISHY)) 5488 squishy = !nfs_mount_is_dirty(mp); 5489 } 5490 5491out: 5492 if (squishy) 5493 nmp->nm_state |= NFSSTA_SQUISHY; 5494 else 5495 nmp->nm_state &= ~NFSSTA_SQUISHY; 5496 5497 nmp->nm_curdeadtimeout = squishy ? timeo : nmp->nm_deadtimeout; 5498 5499 NFS_SOCK_DBG("nm_curdeadtimeout = %d\n", nmp->nm_curdeadtimeout); 5500 5501 return (squishy); 5502} 5503 5504/* 5505 * On a send operation, if we can't reach the server and we've got only one server to talk to 5506 * and NFS_SQUISH_QUICK flag is set and we are in a squishy state then mark the mount as dead 5507 * and ask to be forcibly unmounted. Return 1 if we're dead and 0 otherwise. 5508 */ 5509int 5510nfs_is_dead(int error, struct nfsmount *nmp) 5511{ 5512 fsid_t fsid; 5513 5514 lck_mtx_lock(&nmp->nm_lock); 5515 if (nmp->nm_state & NFSSTA_DEAD) { 5516 lck_mtx_unlock(&nmp->nm_lock); 5517 return (1); 5518 } 5519 5520 if ((error != ENETUNREACH && error != EHOSTUNREACH && error != EADDRNOTAVAIL) || 5521 !(nmp->nm_locations.nl_numlocs == 1 && nmp->nm_locations.nl_locations[0]->nl_servcount == 1)) { 5522 lck_mtx_unlock(&nmp->nm_lock); 5523 return (0); 5524 } 5525 5526 if ((nfs_squishy_flags & NFS_SQUISH_QUICK) && nfs_is_squishy(nmp)) { 5527 printf("nfs_is_dead: nfs server %s: unreachable. Squished dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); 5528 fsid = vfs_statfs(nmp->nm_mountp)->f_fsid; 5529 lck_mtx_unlock(&nmp->nm_lock); 5530 nfs_mount_zombie(nmp, NFSSTA_DEAD); 5531 vfs_event_signal(&fsid, VQ_DEAD, 0); 5532 return (1); 5533 } 5534 lck_mtx_unlock(&nmp->nm_lock); 5535 return (0); 5536} 5537 5538/* 5539 * If we've experienced timeouts and we're not really a 5540 * classic hard mount, then just return cached data to 5541 * the caller instead of likely hanging on an RPC. 5542 */ 5543int 5544nfs_use_cache(struct nfsmount *nmp) 5545{ 5546 /* 5547 *%%% We always let mobile users goto the cache, 5548 * perhaps we should not even require them to have 5549 * a timeout? 5550 */ 5551 int cache_ok = (nfs_is_mobile || NMFLAG(nmp, SOFT) || 5552 nfs_can_squish(nmp) || nmp->nm_deadtimeout); 5553 5554 int timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; 5555 5556 /* 5557 * So if we have a timeout and we're not really a hard hard-mount, 5558 * return 1 to not get things out of the cache. 5559 */ 5560 5561 return ((nmp->nm_state & timeoutmask) && cache_ok); 5562} 5563 5564/* 5565 * Log a message that nfs or lockd server is unresponsive. Check if we 5566 * can be squished and if we can, or that our dead timeout has 5567 * expired, and we're not holding state, set our mount as dead, remove 5568 * our mount state and ask to be unmounted. If we are holding state 5569 * we're being called from the nfs_request_timer and will soon detect 5570 * that we need to unmount. 5571 */ 5572void 5573nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg, int holding_state) 5574{ 5575 int timeoutmask, wasunresponsive, unresponsive, softnobrowse; 5576 uint32_t do_vfs_signal = 0; 5577 struct timeval now; 5578 5579 if (nfs_mount_gone(nmp)) 5580 return; 5581 5582 lck_mtx_lock(&nmp->nm_lock); 5583 5584 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; 5585 if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */ 5586 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO; 5587 wasunresponsive = (nmp->nm_state & timeoutmask); 5588 5589 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */ 5590 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE)); 5591 5592 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) 5593 nmp->nm_state |= NFSSTA_TIMEO; 5594 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) 5595 nmp->nm_state |= NFSSTA_LOCKTIMEO; 5596 if ((flags & NFSSTA_JUKEBOXTIMEO) && !(nmp->nm_state & NFSSTA_JUKEBOXTIMEO)) 5597 nmp->nm_state |= NFSSTA_JUKEBOXTIMEO; 5598 5599 unresponsive = (nmp->nm_state & timeoutmask); 5600 5601 nfs_is_squishy(nmp); 5602 5603 if (unresponsive && (nmp->nm_curdeadtimeout > 0)) { 5604 microuptime(&now); 5605 if (!wasunresponsive) { 5606 nmp->nm_deadto_start = now.tv_sec; 5607 nfs_mount_sock_thread_wake(nmp); 5608 } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_curdeadtimeout && !holding_state) { 5609 if (!(nmp->nm_state & NFSSTA_DEAD)) 5610 printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, 5611 (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : ""); 5612 do_vfs_signal = VQ_DEAD; 5613 } 5614 } 5615 lck_mtx_unlock(&nmp->nm_lock); 5616 5617 if (do_vfs_signal == VQ_DEAD && !(nmp->nm_state & NFSSTA_DEAD)) 5618 nfs_mount_zombie(nmp, NFSSTA_DEAD); 5619 else if (softnobrowse || wasunresponsive || !unresponsive) 5620 do_vfs_signal = 0; 5621 else 5622 do_vfs_signal = VQ_NOTRESP; 5623 if (do_vfs_signal) 5624 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, do_vfs_signal, 0); 5625 5626 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error); 5627} 5628 5629void 5630nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg) 5631{ 5632 int timeoutmask, wasunresponsive, unresponsive, softnobrowse; 5633 int do_vfs_signal; 5634 5635 if (nfs_mount_gone(nmp)) 5636 return; 5637 5638 if (msg) 5639 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0); 5640 5641 lck_mtx_lock(&nmp->nm_lock); 5642 5643 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; 5644 if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */ 5645 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO; 5646 wasunresponsive = (nmp->nm_state & timeoutmask); 5647 5648 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */ 5649 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE)); 5650 5651 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) 5652 nmp->nm_state &= ~NFSSTA_TIMEO; 5653 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) 5654 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 5655 if ((flags & NFSSTA_JUKEBOXTIMEO) && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO)) 5656 nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO; 5657 5658 unresponsive = (nmp->nm_state & timeoutmask); 5659 5660 nmp->nm_deadto_start = 0; 5661 nmp->nm_curdeadtimeout = nmp->nm_deadtimeout; 5662 nmp->nm_state &= ~NFSSTA_SQUISHY; 5663 lck_mtx_unlock(&nmp->nm_lock); 5664 5665 if (softnobrowse) 5666 do_vfs_signal = 0; 5667 else 5668 do_vfs_signal = (wasunresponsive && !unresponsive); 5669 if (do_vfs_signal) 5670 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1); 5671} 5672 5673 5674#endif /* NFSCLIENT */ 5675 5676#if NFSSERVER 5677 5678/* 5679 * Generate the rpc reply header 5680 * siz arg. is used to decide if adding a cluster is worthwhile 5681 */ 5682int 5683nfsrv_rephead( 5684 struct nfsrv_descript *nd, 5685 __unused struct nfsrv_sock *slp, 5686 struct nfsm_chain *nmrepp, 5687 size_t siz) 5688{ 5689 mbuf_t mrep; 5690 u_int32_t *tl; 5691 struct nfsm_chain nmrep; 5692 int err, error; 5693 5694 err = nd->nd_repstat; 5695 if (err && (nd->nd_vers == NFS_VER2)) 5696 siz = 0; 5697 5698 /* 5699 * If this is a big reply, use a cluster else 5700 * try and leave leading space for the lower level headers. 5701 */ 5702 siz += RPC_REPLYSIZ; 5703 if (siz >= nfs_mbuf_minclsize) { 5704 error = mbuf_getpacket(MBUF_WAITOK, &mrep); 5705 } else { 5706 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep); 5707 } 5708 if (error) { 5709 /* unable to allocate packet */ 5710 /* XXX should we keep statistics for these errors? */ 5711 return (error); 5712 } 5713 if (siz < nfs_mbuf_minclsize) { 5714 /* leave space for lower level headers */ 5715 tl = mbuf_data(mrep); 5716 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */ 5717 mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED); 5718 } 5719 nfsm_chain_init(&nmrep, mrep); 5720 nfsm_chain_add_32(error, &nmrep, nd->nd_retxid); 5721 nfsm_chain_add_32(error, &nmrep, RPC_REPLY); 5722 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) { 5723 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED); 5724 if (err & NFSERR_AUTHERR) { 5725 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR); 5726 nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR)); 5727 } else { 5728 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH); 5729 nfsm_chain_add_32(error, &nmrep, RPC_VER2); 5730 nfsm_chain_add_32(error, &nmrep, RPC_VER2); 5731 } 5732 } else { 5733 /* reply status */ 5734 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED); 5735 if (nd->nd_gss_context != NULL) { 5736 /* RPCSEC_GSS verifier */ 5737 error = nfs_gss_svc_verf_put(nd, &nmrep); 5738 if (error) { 5739 nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR); 5740 goto done; 5741 } 5742 } else { 5743 /* RPCAUTH_NULL verifier */ 5744 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL); 5745 nfsm_chain_add_32(error, &nmrep, 0); 5746 } 5747 /* accepted status */ 5748 switch (err) { 5749 case EPROGUNAVAIL: 5750 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL); 5751 break; 5752 case EPROGMISMATCH: 5753 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH); 5754 /* XXX hard coded versions? */ 5755 nfsm_chain_add_32(error, &nmrep, NFS_VER2); 5756 nfsm_chain_add_32(error, &nmrep, NFS_VER3); 5757 break; 5758 case EPROCUNAVAIL: 5759 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL); 5760 break; 5761 case EBADRPC: 5762 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE); 5763 break; 5764 default: 5765 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS); 5766 if (nd->nd_gss_context != NULL) 5767 error = nfs_gss_svc_prepare_reply(nd, &nmrep); 5768 if (err != NFSERR_RETVOID) 5769 nfsm_chain_add_32(error, &nmrep, 5770 (err ? nfsrv_errmap(nd, err) : 0)); 5771 break; 5772 } 5773 } 5774 5775done: 5776 nfsm_chain_build_done(error, &nmrep); 5777 if (error) { 5778 /* error composing reply header */ 5779 /* XXX should we keep statistics for these errors? */ 5780 mbuf_freem(mrep); 5781 return (error); 5782 } 5783 5784 *nmrepp = nmrep; 5785 if ((err != 0) && (err != NFSERR_RETVOID)) 5786 OSAddAtomic64(1, &nfsstats.srvrpc_errs); 5787 return (0); 5788} 5789 5790/* 5791 * The nfs server send routine. 5792 * 5793 * - return EINTR or ERESTART if interrupted by a signal 5794 * - return EPIPE if a connection is lost for connection based sockets (TCP...) 5795 * - do any cleanup required by recoverable socket errors (???) 5796 */ 5797int 5798nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top) 5799{ 5800 int error; 5801 socket_t so = slp->ns_so; 5802 struct sockaddr *sendnam; 5803 struct msghdr msg; 5804 5805 bzero(&msg, sizeof(msg)); 5806 if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) { 5807 if ((sendnam = mbuf_data(nam))) { 5808 msg.msg_name = (caddr_t)sendnam; 5809 msg.msg_namelen = sendnam->sa_len; 5810 } 5811 } 5812 error = sock_sendmbuf(so, &msg, top, 0, NULL); 5813 if (!error) 5814 return (0); 5815 log(LOG_INFO, "nfsd send error %d\n", error); 5816 5817 if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM)) 5818 error = EPIPE; /* zap TCP sockets if they time out on send */ 5819 5820 /* Handle any recoverable (soft) socket errors here. (???) */ 5821 if (error != EINTR && error != ERESTART && error != EIO && 5822 error != EWOULDBLOCK && error != EPIPE) 5823 error = 0; 5824 5825 return (error); 5826} 5827 5828/* 5829 * Socket upcall routine for the nfsd sockets. 5830 * The caddr_t arg is a pointer to the "struct nfsrv_sock". 5831 * Essentially do as much as possible non-blocking, else punt and it will 5832 * be called with MBUF_WAITOK from an nfsd. 5833 */ 5834void 5835nfsrv_rcv(socket_t so, void *arg, int waitflag) 5836{ 5837 struct nfsrv_sock *slp = arg; 5838 5839 if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID)) 5840 return; 5841 5842 lck_rw_lock_exclusive(&slp->ns_rwlock); 5843 nfsrv_rcv_locked(so, slp, waitflag); 5844 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */ 5845} 5846void 5847nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag) 5848{ 5849 mbuf_t m, mp, mhck, m2; 5850 int ns_flag=0, error; 5851 struct msghdr msg; 5852 size_t bytes_read; 5853 5854 if ((slp->ns_flag & SLP_VALID) == 0) { 5855 if (waitflag == MBUF_DONTWAIT) 5856 lck_rw_done(&slp->ns_rwlock); 5857 return; 5858 } 5859 5860#ifdef notdef 5861 /* 5862 * Define this to test for nfsds handling this under heavy load. 5863 */ 5864 if (waitflag == MBUF_DONTWAIT) { 5865 ns_flag = SLP_NEEDQ; 5866 goto dorecs; 5867 } 5868#endif 5869 if (slp->ns_sotype == SOCK_STREAM) { 5870 /* 5871 * If there are already records on the queue, defer soreceive() 5872 * to an(other) nfsd so that there is feedback to the TCP layer that 5873 * the nfs servers are heavily loaded. 5874 */ 5875 if (slp->ns_rec) { 5876 ns_flag = SLP_NEEDQ; 5877 goto dorecs; 5878 } 5879 5880 /* 5881 * Do soreceive(). 5882 */ 5883 bytes_read = 1000000000; 5884 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read); 5885 if (error || mp == NULL) { 5886 if (error == EWOULDBLOCK) 5887 ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0; 5888 else 5889 ns_flag = SLP_DISCONN; 5890 goto dorecs; 5891 } 5892 m = mp; 5893 if (slp->ns_rawend) { 5894 if ((error = mbuf_setnext(slp->ns_rawend, m))) 5895 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error); 5896 slp->ns_cc += bytes_read; 5897 } else { 5898 slp->ns_raw = m; 5899 slp->ns_cc = bytes_read; 5900 } 5901 while ((m2 = mbuf_next(m))) 5902 m = m2; 5903 slp->ns_rawend = m; 5904 5905 /* 5906 * Now try and parse record(s) out of the raw stream data. 5907 */ 5908 error = nfsrv_getstream(slp, waitflag); 5909 if (error) { 5910 if (error == EPERM) 5911 ns_flag = SLP_DISCONN; 5912 else 5913 ns_flag = SLP_NEEDQ; 5914 } 5915 } else { 5916 struct sockaddr_storage nam; 5917 5918 if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) { 5919 /* already have max # RPC records queued on this socket */ 5920 ns_flag = SLP_NEEDQ; 5921 goto dorecs; 5922 } 5923 5924 bzero(&msg, sizeof(msg)); 5925 msg.msg_name = (caddr_t)&nam; 5926 msg.msg_namelen = sizeof(nam); 5927 5928 do { 5929 bytes_read = 1000000000; 5930 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read); 5931 if (mp) { 5932 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) { 5933 mbuf_setlen(mhck, nam.ss_len); 5934 bcopy(&nam, mbuf_data(mhck), nam.ss_len); 5935 m = mhck; 5936 if (mbuf_setnext(m, mp)) { 5937 /* trouble... just drop it */ 5938 printf("nfsrv_rcv: mbuf_setnext failed\n"); 5939 mbuf_free(mhck); 5940 m = mp; 5941 } 5942 } else { 5943 m = mp; 5944 } 5945 if (slp->ns_recend) 5946 mbuf_setnextpkt(slp->ns_recend, m); 5947 else { 5948 slp->ns_rec = m; 5949 slp->ns_flag |= SLP_DOREC; 5950 } 5951 slp->ns_recend = m; 5952 mbuf_setnextpkt(m, NULL); 5953 slp->ns_reccnt++; 5954 } 5955 } while (mp); 5956 } 5957 5958 /* 5959 * Now try and process the request records, non-blocking. 5960 */ 5961dorecs: 5962 if (ns_flag) 5963 slp->ns_flag |= ns_flag; 5964 if (waitflag == MBUF_DONTWAIT) { 5965 int wake = (slp->ns_flag & SLP_WORKTODO); 5966 lck_rw_done(&slp->ns_rwlock); 5967 if (wake && nfsd_thread_count) { 5968 lck_mtx_lock(nfsd_mutex); 5969 nfsrv_wakenfsd(slp); 5970 lck_mtx_unlock(nfsd_mutex); 5971 } 5972 } 5973} 5974 5975/* 5976 * Try and extract an RPC request from the mbuf data list received on a 5977 * stream socket. The "waitflag" argument indicates whether or not it 5978 * can sleep. 5979 */ 5980int 5981nfsrv_getstream(struct nfsrv_sock *slp, int waitflag) 5982{ 5983 mbuf_t m; 5984 char *cp1, *cp2, *mdata; 5985 int len, mlen, error; 5986 mbuf_t om, m2, recm; 5987 u_int32_t recmark; 5988 5989 if (slp->ns_flag & SLP_GETSTREAM) 5990 panic("nfs getstream"); 5991 slp->ns_flag |= SLP_GETSTREAM; 5992 for (;;) { 5993 if (slp->ns_reclen == 0) { 5994 if (slp->ns_cc < NFSX_UNSIGNED) { 5995 slp->ns_flag &= ~SLP_GETSTREAM; 5996 return (0); 5997 } 5998 m = slp->ns_raw; 5999 mdata = mbuf_data(m); 6000 mlen = mbuf_len(m); 6001 if (mlen >= NFSX_UNSIGNED) { 6002 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED); 6003 mdata += NFSX_UNSIGNED; 6004 mlen -= NFSX_UNSIGNED; 6005 mbuf_setdata(m, mdata, mlen); 6006 } else { 6007 cp1 = (caddr_t)&recmark; 6008 cp2 = mdata; 6009 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { 6010 while (mlen == 0) { 6011 m = mbuf_next(m); 6012 cp2 = mbuf_data(m); 6013 mlen = mbuf_len(m); 6014 } 6015 *cp1++ = *cp2++; 6016 mlen--; 6017 mbuf_setdata(m, cp2, mlen); 6018 } 6019 } 6020 slp->ns_cc -= NFSX_UNSIGNED; 6021 recmark = ntohl(recmark); 6022 slp->ns_reclen = recmark & ~0x80000000; 6023 if (recmark & 0x80000000) 6024 slp->ns_flag |= SLP_LASTFRAG; 6025 else 6026 slp->ns_flag &= ~SLP_LASTFRAG; 6027 if (slp->ns_reclen <= 0 || slp->ns_reclen > NFS_MAXPACKET) { 6028 slp->ns_flag &= ~SLP_GETSTREAM; 6029 return (EPERM); 6030 } 6031 } 6032 6033 /* 6034 * Now get the record part. 6035 * 6036 * Note that slp->ns_reclen may be 0. Linux sometimes 6037 * generates 0-length RPCs 6038 */ 6039 recm = NULL; 6040 if (slp->ns_cc == slp->ns_reclen) { 6041 recm = slp->ns_raw; 6042 slp->ns_raw = slp->ns_rawend = NULL; 6043 slp->ns_cc = slp->ns_reclen = 0; 6044 } else if (slp->ns_cc > slp->ns_reclen) { 6045 len = 0; 6046 m = slp->ns_raw; 6047 mlen = mbuf_len(m); 6048 mdata = mbuf_data(m); 6049 om = NULL; 6050 while (len < slp->ns_reclen) { 6051 if ((len + mlen) > slp->ns_reclen) { 6052 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) { 6053 slp->ns_flag &= ~SLP_GETSTREAM; 6054 return (EWOULDBLOCK); 6055 } 6056 if (om) { 6057 if (mbuf_setnext(om, m2)) { 6058 /* trouble... just drop it */ 6059 printf("nfsrv_getstream: mbuf_setnext failed\n"); 6060 mbuf_freem(m2); 6061 slp->ns_flag &= ~SLP_GETSTREAM; 6062 return (EWOULDBLOCK); 6063 } 6064 recm = slp->ns_raw; 6065 } else { 6066 recm = m2; 6067 } 6068 mdata += slp->ns_reclen - len; 6069 mlen -= slp->ns_reclen - len; 6070 mbuf_setdata(m, mdata, mlen); 6071 len = slp->ns_reclen; 6072 } else if ((len + mlen) == slp->ns_reclen) { 6073 om = m; 6074 len += mlen; 6075 m = mbuf_next(m); 6076 recm = slp->ns_raw; 6077 if (mbuf_setnext(om, NULL)) { 6078 printf("nfsrv_getstream: mbuf_setnext failed 2\n"); 6079 slp->ns_flag &= ~SLP_GETSTREAM; 6080 return (EWOULDBLOCK); 6081 } 6082 mlen = mbuf_len(m); 6083 mdata = mbuf_data(m); 6084 } else { 6085 om = m; 6086 len += mlen; 6087 m = mbuf_next(m); 6088 mlen = mbuf_len(m); 6089 mdata = mbuf_data(m); 6090 } 6091 } 6092 slp->ns_raw = m; 6093 slp->ns_cc -= len; 6094 slp->ns_reclen = 0; 6095 } else { 6096 slp->ns_flag &= ~SLP_GETSTREAM; 6097 return (0); 6098 } 6099 6100 /* 6101 * Accumulate the fragments into a record. 6102 */ 6103 if (slp->ns_frag == NULL) { 6104 slp->ns_frag = recm; 6105 } else { 6106 m = slp->ns_frag; 6107 while ((m2 = mbuf_next(m))) 6108 m = m2; 6109 if ((error = mbuf_setnext(m, recm))) 6110 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error); 6111 } 6112 if (slp->ns_flag & SLP_LASTFRAG) { 6113 if (slp->ns_recend) 6114 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag); 6115 else { 6116 slp->ns_rec = slp->ns_frag; 6117 slp->ns_flag |= SLP_DOREC; 6118 } 6119 slp->ns_recend = slp->ns_frag; 6120 slp->ns_frag = NULL; 6121 } 6122 } 6123} 6124 6125/* 6126 * Parse an RPC header. 6127 */ 6128int 6129nfsrv_dorec( 6130 struct nfsrv_sock *slp, 6131 struct nfsd *nfsd, 6132 struct nfsrv_descript **ndp) 6133{ 6134 mbuf_t m; 6135 mbuf_t nam; 6136 struct nfsrv_descript *nd; 6137 int error = 0; 6138 6139 *ndp = NULL; 6140 if (!(slp->ns_flag & (SLP_VALID|SLP_DOREC)) || (slp->ns_rec == NULL)) 6141 return (ENOBUFS); 6142 MALLOC_ZONE(nd, struct nfsrv_descript *, 6143 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK); 6144 if (!nd) 6145 return (ENOMEM); 6146 m = slp->ns_rec; 6147 slp->ns_rec = mbuf_nextpkt(m); 6148 if (slp->ns_rec) 6149 mbuf_setnextpkt(m, NULL); 6150 else { 6151 slp->ns_flag &= ~SLP_DOREC; 6152 slp->ns_recend = NULL; 6153 } 6154 slp->ns_reccnt--; 6155 if (mbuf_type(m) == MBUF_TYPE_SONAME) { 6156 nam = m; 6157 m = mbuf_next(m); 6158 if ((error = mbuf_setnext(nam, NULL))) 6159 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error); 6160 } else 6161 nam = NULL; 6162 nd->nd_nam2 = nam; 6163 nfsm_chain_dissect_init(error, &nd->nd_nmreq, m); 6164 if (!error) 6165 error = nfsrv_getreq(nd); 6166 if (error) { 6167 if (nam) 6168 mbuf_freem(nam); 6169 if (nd->nd_gss_context) 6170 nfs_gss_svc_ctx_deref(nd->nd_gss_context); 6171 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); 6172 return (error); 6173 } 6174 nd->nd_mrep = NULL; 6175 *ndp = nd; 6176 nfsd->nfsd_nd = nd; 6177 return (0); 6178} 6179 6180/* 6181 * Parse an RPC request 6182 * - verify it 6183 * - fill in the cred struct. 6184 */ 6185int 6186nfsrv_getreq(struct nfsrv_descript *nd) 6187{ 6188 struct nfsm_chain *nmreq; 6189 int len, i; 6190 u_int32_t nfsvers, auth_type; 6191 int error = 0; 6192 uid_t user_id; 6193 gid_t group_id; 6194 int ngroups; 6195 uint32_t val; 6196 6197 nd->nd_cr = NULL; 6198 nd->nd_gss_context = NULL; 6199 nd->nd_gss_seqnum = 0; 6200 nd->nd_gss_mb = NULL; 6201 6202 user_id = group_id = -2; 6203 val = auth_type = len = 0; 6204 6205 nmreq = &nd->nd_nmreq; 6206 nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID 6207 nfsm_chain_get_32(error, nmreq, val); // RPC Call 6208 if (!error && (val != RPC_CALL)) 6209 error = EBADRPC; 6210 nfsmout_if(error); 6211 nd->nd_repstat = 0; 6212 nfsm_chain_get_32(error, nmreq, val); // RPC Version 6213 nfsmout_if(error); 6214 if (val != RPC_VER2) { 6215 nd->nd_repstat = ERPCMISMATCH; 6216 nd->nd_procnum = NFSPROC_NOOP; 6217 return (0); 6218 } 6219 nfsm_chain_get_32(error, nmreq, val); // RPC Program Number 6220 nfsmout_if(error); 6221 if (val != NFS_PROG) { 6222 nd->nd_repstat = EPROGUNAVAIL; 6223 nd->nd_procnum = NFSPROC_NOOP; 6224 return (0); 6225 } 6226 nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number 6227 nfsmout_if(error); 6228 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) { 6229 nd->nd_repstat = EPROGMISMATCH; 6230 nd->nd_procnum = NFSPROC_NOOP; 6231 return (0); 6232 } 6233 nd->nd_vers = nfsvers; 6234 nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number 6235 nfsmout_if(error); 6236 if ((nd->nd_procnum >= NFS_NPROCS) || 6237 ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) { 6238 nd->nd_repstat = EPROCUNAVAIL; 6239 nd->nd_procnum = NFSPROC_NOOP; 6240 return (0); 6241 } 6242 if (nfsvers != NFS_VER3) 6243 nd->nd_procnum = nfsv3_procid[nd->nd_procnum]; 6244 nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor 6245 nfsm_chain_get_32(error, nmreq, len); // Auth Length 6246 if (!error && (len < 0 || len > RPCAUTH_MAXSIZ)) 6247 error = EBADRPC; 6248 nfsmout_if(error); 6249 6250 /* Handle authentication */ 6251 if (auth_type == RPCAUTH_SYS) { 6252 struct posix_cred temp_pcred; 6253 if (nd->nd_procnum == NFSPROC_NULL) 6254 return (0); 6255 nd->nd_sec = RPCAUTH_SYS; 6256 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp 6257 nfsm_chain_get_32(error, nmreq, len); // hostname length 6258 if (len < 0 || len > NFS_MAXNAMLEN) 6259 error = EBADRPC; 6260 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname 6261 nfsmout_if(error); 6262 6263 /* create a temporary credential using the bits from the wire */ 6264 bzero(&temp_pcred, sizeof(temp_pcred)); 6265 nfsm_chain_get_32(error, nmreq, user_id); 6266 nfsm_chain_get_32(error, nmreq, group_id); 6267 temp_pcred.cr_groups[0] = group_id; 6268 nfsm_chain_get_32(error, nmreq, len); // extra GID count 6269 if ((len < 0) || (len > RPCAUTH_UNIXGIDS)) 6270 error = EBADRPC; 6271 nfsmout_if(error); 6272 for (i = 1; i <= len; i++) 6273 if (i < NGROUPS) 6274 nfsm_chain_get_32(error, nmreq, temp_pcred.cr_groups[i]); 6275 else 6276 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); 6277 nfsmout_if(error); 6278 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 6279 if (ngroups > 1) 6280 nfsrv_group_sort(&temp_pcred.cr_groups[0], ngroups); 6281 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE) 6282 nfsm_chain_get_32(error, nmreq, len); // verifier length 6283 if (len < 0 || len > RPCAUTH_MAXSIZ) 6284 error = EBADRPC; 6285 if (len > 0) 6286 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); 6287 6288 /* request creation of a real credential */ 6289 temp_pcred.cr_uid = user_id; 6290 temp_pcred.cr_ngroups = ngroups; 6291 nd->nd_cr = posix_cred_create(&temp_pcred); 6292 if (nd->nd_cr == NULL) { 6293 nd->nd_repstat = ENOMEM; 6294 nd->nd_procnum = NFSPROC_NOOP; 6295 return (0); 6296 } 6297 } else if (auth_type == RPCSEC_GSS) { 6298 error = nfs_gss_svc_cred_get(nd, nmreq); 6299 if (error) { 6300 if (error == EINVAL) 6301 goto nfsmout; // drop the request 6302 nd->nd_repstat = error; 6303 nd->nd_procnum = NFSPROC_NOOP; 6304 return (0); 6305 } 6306 } else { 6307 if (nd->nd_procnum == NFSPROC_NULL) // assume it's AUTH_NONE 6308 return (0); 6309 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED); 6310 nd->nd_procnum = NFSPROC_NOOP; 6311 return (0); 6312 } 6313 return (0); 6314nfsmout: 6315 if (IS_VALID_CRED(nd->nd_cr)) 6316 kauth_cred_unref(&nd->nd_cr); 6317 nfsm_chain_cleanup(nmreq); 6318 return (error); 6319} 6320 6321/* 6322 * Search for a sleeping nfsd and wake it up. 6323 * SIDE EFFECT: If none found, make sure the socket is queued up so that one 6324 * of the running nfsds will go look for the work in the nfsrv_sockwait list. 6325 * Note: Must be called with nfsd_mutex held. 6326 */ 6327void 6328nfsrv_wakenfsd(struct nfsrv_sock *slp) 6329{ 6330 struct nfsd *nd; 6331 6332 if ((slp->ns_flag & SLP_VALID) == 0) 6333 return; 6334 6335 lck_rw_lock_exclusive(&slp->ns_rwlock); 6336 /* if there's work to do on this socket, make sure it's queued up */ 6337 if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) { 6338 TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq); 6339 slp->ns_flag |= SLP_WAITQ; 6340 } 6341 lck_rw_done(&slp->ns_rwlock); 6342 6343 /* wake up a waiting nfsd, if possible */ 6344 nd = TAILQ_FIRST(&nfsd_queue); 6345 if (!nd) 6346 return; 6347 6348 TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue); 6349 nd->nfsd_flag &= ~NFSD_WAITING; 6350 wakeup(nd); 6351} 6352 6353#endif /* NFSSERVER */ 6354