1/* 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1989, 1991, 1993, 1995 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Rick Macklem at The University of Guelph. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 65 * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $ 66 */ 67 68/* 69 * Socket operations for use by nfs 70 */ 71 72#include <sys/param.h> 73#include <sys/systm.h> 74#include <sys/proc.h> 75#include <sys/signalvar.h> 76#include <sys/kauth.h> 77#include <sys/mount_internal.h> 78#include <sys/kernel.h> 79#include <sys/kpi_mbuf.h> 80#include <sys/malloc.h> 81#include <sys/vnode.h> 82#include <sys/domain.h> 83#include <sys/protosw.h> 84#include <sys/socket.h> 85#include <sys/syslog.h> 86#include <sys/tprintf.h> 87#include <libkern/OSAtomic.h> 88 89#include <sys/time.h> 90#include <kern/clock.h> 91#include <kern/task.h> 92#include <kern/thread.h> 93#include <kern/thread_call.h> 94#include <sys/user.h> 95#include <sys/acct.h> 96 97#include <netinet/in.h> 98#include <netinet/tcp.h> 99 100#include <nfs/rpcv2.h> 101#include <nfs/krpc.h> 102#include <nfs/nfsproto.h> 103#include <nfs/nfs.h> 104#include <nfs/xdr_subs.h> 105#include <nfs/nfsm_subs.h> 106#include <nfs/nfs_gss.h> 107#include <nfs/nfsmount.h> 108#include <nfs/nfsnode.h> 109 110#define NFS_SOCK_DBG(...) NFS_DBG(NFS_FAC_SOCK, 7, ## __VA_ARGS__) 111 112/* XXX */ 113boolean_t current_thread_aborted(void); 114kern_return_t thread_terminate(thread_t); 115 116 117#if NFSSERVER 118int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */ 119 120int nfsrv_getstream(struct nfsrv_sock *,int); 121int nfsrv_getreq(struct nfsrv_descript *); 122extern int nfsv3_procid[NFS_NPROCS]; 123#endif /* NFSSERVER */ 124 125/* 126 * compare two sockaddr structures 127 */ 128int 129nfs_sockaddr_cmp(struct sockaddr *sa1, struct sockaddr *sa2) 130{ 131 if (!sa1) 132 return (-1); 133 if (!sa2) 134 return (1); 135 if (sa1->sa_family != sa2->sa_family) 136 return ((sa1->sa_family < sa2->sa_family) ? -1 : 1); 137 if (sa1->sa_len != sa2->sa_len) 138 return ((sa1->sa_len < sa2->sa_len) ? -1 : 1); 139 if (sa1->sa_family == AF_INET) 140 return (bcmp(&((struct sockaddr_in*)sa1)->sin_addr, 141 &((struct sockaddr_in*)sa2)->sin_addr, sizeof(((struct sockaddr_in*)sa1)->sin_addr))); 142 if (sa1->sa_family == AF_INET6) 143 return (bcmp(&((struct sockaddr_in6*)sa1)->sin6_addr, 144 &((struct sockaddr_in6*)sa2)->sin6_addr, sizeof(((struct sockaddr_in6*)sa1)->sin6_addr))); 145 return (-1); 146} 147 148#if NFSCLIENT 149 150int nfs_connect_search_new_socket(struct nfsmount *, struct nfs_socket_search *, struct timeval *); 151int nfs_connect_search_socket_connect(struct nfsmount *, struct nfs_socket *, int); 152int nfs_connect_search_ping(struct nfsmount *, struct nfs_socket *, struct timeval *); 153void nfs_connect_search_socket_found(struct nfsmount *, struct nfs_socket_search *, struct nfs_socket *); 154void nfs_connect_search_socket_reap(struct nfsmount *, struct nfs_socket_search *, struct timeval *); 155int nfs_connect_search_check(struct nfsmount *, struct nfs_socket_search *, struct timeval *); 156int nfs_reconnect(struct nfsmount *); 157int nfs_connect_setup(struct nfsmount *); 158void nfs_mount_sock_thread(void *, wait_result_t); 159void nfs_udp_rcv(socket_t, void*, int); 160void nfs_tcp_rcv(socket_t, void*, int); 161void nfs_sock_poke(struct nfsmount *); 162void nfs_request_match_reply(struct nfsmount *, mbuf_t); 163void nfs_reqdequeue(struct nfsreq *); 164void nfs_reqbusy(struct nfsreq *); 165struct nfsreq *nfs_reqnext(struct nfsreq *); 166int nfs_wait_reply(struct nfsreq *); 167void nfs_softterm(struct nfsreq *); 168int nfs_can_squish(struct nfsmount *); 169int nfs_is_squishy(struct nfsmount *); 170int nfs_is_dead(int, struct nfsmount *); 171 172/* 173 * Estimate rto for an nfs rpc sent via. an unreliable datagram. 174 * Use the mean and mean deviation of rtt for the appropriate type of rpc 175 * for the frequent rpcs and a default for the others. 176 * The justification for doing "other" this way is that these rpcs 177 * happen so infrequently that timer est. would probably be stale. 178 * Also, since many of these rpcs are 179 * non-idempotent, a conservative timeout is desired. 180 * getattr, lookup - A+2D 181 * read, write - A+4D 182 * other - nm_timeo 183 */ 184#define NFS_RTO(n, t) \ 185 ((t) == 0 ? (n)->nm_timeo : \ 186 ((t) < 3 ? \ 187 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 188 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 189#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 190#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 191 192/* 193 * Defines which timer to use for the procnum. 194 * 0 - default 195 * 1 - getattr 196 * 2 - lookup 197 * 3 - read 198 * 4 - write 199 */ 200static int proct[NFS_NPROCS] = { 201 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0 202}; 203 204/* 205 * There is a congestion window for outstanding rpcs maintained per mount 206 * point. The cwnd size is adjusted in roughly the way that: 207 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 208 * SIGCOMM '88". ACM, August 1988. 209 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 210 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 211 * of rpcs is in progress. 212 * (The sent count and cwnd are scaled for integer arith.) 213 * Variants of "slow start" were tried and were found to be too much of a 214 * performance hit (ave. rtt 3 times larger), 215 * I suspect due to the large rtt that nfs rpcs have. 216 */ 217#define NFS_CWNDSCALE 256 218#define NFS_MAXCWND (NFS_CWNDSCALE * 32) 219static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 220 221/* 222 * Increment location index to next address/server/location. 223 */ 224void 225nfs_location_next(struct nfs_fs_locations *nlp, struct nfs_location_index *nlip) 226{ 227 uint8_t loc = nlip->nli_loc; 228 uint8_t serv = nlip->nli_serv; 229 uint8_t addr = nlip->nli_addr; 230 231 /* move to next address */ 232 addr++; 233 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) { 234 /* no more addresses on current server, go to first address of next server */ 235next_server: 236 addr = 0; 237 serv++; 238 if (serv >= nlp->nl_locations[loc]->nl_servcount) { 239 /* no more servers on current location, go to first server of next location */ 240 serv = 0; 241 loc++; 242 if (loc >= nlp->nl_numlocs) 243 loc = 0; /* after last location, wrap back around to first location */ 244 } 245 } 246 /* 247 * It's possible for this next server to not have any addresses. 248 * Check for that here and go to the next server. 249 * But bail out if we've managed to come back around to the original 250 * location that was passed in. (That would mean no servers had any 251 * addresses. And we don't want to spin here forever.) 252 */ 253 if ((loc == nlip->nli_loc) && (serv == nlip->nli_serv) && (addr == nlip->nli_addr)) 254 return; 255 if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) 256 goto next_server; 257 258 nlip->nli_loc = loc; 259 nlip->nli_serv = serv; 260 nlip->nli_addr = addr; 261} 262 263/* 264 * Compare two location indices. 265 */ 266int 267nfs_location_index_cmp(struct nfs_location_index *nlip1, struct nfs_location_index *nlip2) 268{ 269 if (nlip1->nli_loc != nlip2->nli_loc) 270 return (nlip1->nli_loc - nlip2->nli_loc); 271 if (nlip1->nli_serv != nlip2->nli_serv) 272 return (nlip1->nli_serv - nlip2->nli_serv); 273 return (nlip1->nli_addr - nlip2->nli_addr); 274} 275 276/* 277 * Get the mntfromname (or path portion only) for a given location. 278 */ 279void 280nfs_location_mntfromname(struct nfs_fs_locations *locs, struct nfs_location_index idx, char *s, int size, int pathonly) 281{ 282 struct nfs_fs_location *fsl = locs->nl_locations[idx.nli_loc]; 283 char *p; 284 int cnt, i; 285 286 p = s; 287 if (!pathonly) { 288 cnt = snprintf(p, size, "%s:", fsl->nl_servers[idx.nli_serv]->ns_name); 289 p += cnt; 290 size -= cnt; 291 } 292 if (fsl->nl_path.np_compcount == 0) { 293 /* mounting root export on server */ 294 if (size > 0) { 295 *p++ = '/'; 296 *p++ = '\0'; 297 } 298 return; 299 } 300 /* append each server path component */ 301 for (i=0; (size > 0) && (i < (int)fsl->nl_path.np_compcount); i++) { 302 cnt = snprintf(p, size, "/%s", fsl->nl_path.np_components[i]); 303 p += cnt; 304 size -= cnt; 305 } 306} 307 308/* 309 * NFS client connect socket upcall. 310 * (Used only during socket connect/search.) 311 */ 312void 313nfs_connect_upcall(socket_t so, void *arg, __unused int waitflag) 314{ 315 struct nfs_socket *nso = arg; 316 size_t rcvlen; 317 mbuf_t m; 318 int error = 0, recv = 1; 319 320 if (nso->nso_flags & NSO_CONNECTING) { 321 NFS_SOCK_DBG("nfs connect - socket %p upcall - connecting\n", nso); 322 wakeup(nso->nso_wake); 323 return; 324 } 325 326 lck_mtx_lock(&nso->nso_lock); 327 if ((nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) || !(nso->nso_flags & NSO_PINGING)) { 328 NFS_SOCK_DBG("nfs connect - socket %p upcall - nevermind\n", nso); 329 lck_mtx_unlock(&nso->nso_lock); 330 return; 331 } 332 NFS_SOCK_DBG("nfs connect - socket %p upcall\n", nso); 333 nso->nso_flags |= NSO_UPCALL; 334 335 /* loop while we make error-free progress */ 336 while (!error && recv) { 337 /* make sure we're still interested in this socket */ 338 if (nso->nso_flags & (NSO_DISCONNECTING|NSO_DEAD)) 339 break; 340 lck_mtx_unlock(&nso->nso_lock); 341 m = NULL; 342 if (nso->nso_sotype == SOCK_STREAM) { 343 error = nfs_rpc_record_read(so, &nso->nso_rrs, MSG_DONTWAIT, &recv, &m); 344 } else { 345 rcvlen = 1000000; 346 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen); 347 recv = m ? 1 : 0; 348 } 349 lck_mtx_lock(&nso->nso_lock); 350 if (m) { 351 /* match response with request */ 352 struct nfsm_chain nmrep; 353 uint32_t reply = 0, rxid = 0, verf_type, verf_len; 354 uint32_t reply_status, rejected_status, accepted_status; 355 356 nfsm_chain_dissect_init(error, &nmrep, m); 357 nfsm_chain_get_32(error, &nmrep, rxid); 358 nfsm_chain_get_32(error, &nmrep, reply); 359 if (!error && ((reply != RPC_REPLY) || (rxid != nso->nso_pingxid))) 360 error = EBADRPC; 361 nfsm_chain_get_32(error, &nmrep, reply_status); 362 if (!error && (reply_status == RPC_MSGDENIED)) { 363 nfsm_chain_get_32(error, &nmrep, rejected_status); 364 if (!error) 365 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES; 366 } 367 nfsm_chain_get_32(error, &nmrep, verf_type); /* verifier flavor */ 368 nfsm_chain_get_32(error, &nmrep, verf_len); /* verifier length */ 369 nfsmout_if(error); 370 if (verf_len) 371 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len)); 372 nfsm_chain_get_32(error, &nmrep, accepted_status); 373 nfsmout_if(error); 374 if ((accepted_status == RPC_PROGMISMATCH) && !nso->nso_version) { 375 uint32_t minvers, maxvers; 376 nfsm_chain_get_32(error, &nmrep, minvers); 377 nfsm_chain_get_32(error, &nmrep, maxvers); 378 nfsmout_if(error); 379 if (nso->nso_protocol == PMAPPROG) { 380 if ((minvers > RPCBVERS4) || (maxvers < PMAPVERS)) 381 error = EPROGMISMATCH; 382 else if ((nso->nso_saddr->sa_family == AF_INET) && 383 (PMAPVERS >= minvers) && (PMAPVERS <= maxvers)) 384 nso->nso_version = PMAPVERS; 385 else if (nso->nso_saddr->sa_family == AF_INET6) { 386 if ((RPCBVERS4 >= minvers) && (RPCBVERS4 <= maxvers)) 387 nso->nso_version = RPCBVERS4; 388 else if ((RPCBVERS3 >= minvers) && (RPCBVERS3 <= maxvers)) 389 nso->nso_version = RPCBVERS3; 390 } 391 } else if (nso->nso_protocol == NFS_PROG) { 392 if ((minvers > NFS_VER4) || (maxvers < NFS_VER2)) 393 error = EPROGMISMATCH; 394 else if ((NFS_VER3 >= minvers) && (NFS_VER3 <= maxvers)) 395 nso->nso_version = NFS_VER3; 396 else if ((NFS_VER2 >= minvers) && (NFS_VER2 <= maxvers)) 397 nso->nso_version = NFS_VER2; 398 else if ((NFS_VER4 >= minvers) && (NFS_VER4 <= maxvers)) 399 nso->nso_version = NFS_VER4; 400 } 401 if (!error && nso->nso_version) 402 accepted_status = RPC_SUCCESS; 403 } 404 if (!error) { 405 switch (accepted_status) { 406 case RPC_SUCCESS: 407 error = 0; 408 break; 409 case RPC_PROGUNAVAIL: 410 error = EPROGUNAVAIL; 411 break; 412 case RPC_PROGMISMATCH: 413 error = EPROGMISMATCH; 414 break; 415 case RPC_PROCUNAVAIL: 416 error = EPROCUNAVAIL; 417 break; 418 case RPC_GARBAGE: 419 error = EBADRPC; 420 break; 421 case RPC_SYSTEM_ERR: 422 default: 423 error = EIO; 424 break; 425 } 426 } 427nfsmout: 428 nso->nso_flags &= ~NSO_PINGING; 429 if (error) { 430 nso->nso_error = error; 431 nso->nso_flags |= NSO_DEAD; 432 } else { 433 nso->nso_flags |= NSO_VERIFIED; 434 } 435 mbuf_freem(m); 436 /* wake up search thread */ 437 wakeup(nso->nso_wake); 438 break; 439 } 440 } 441 442 nso->nso_flags &= ~NSO_UPCALL; 443 if ((error != EWOULDBLOCK) && (error || !recv)) { 444 /* problems with the socket... */ 445 nso->nso_error = error ? error : EPIPE; 446 nso->nso_flags |= NSO_DEAD; 447 wakeup(nso->nso_wake); 448 } 449 if (nso->nso_flags & NSO_DISCONNECTING) 450 wakeup(&nso->nso_flags); 451 lck_mtx_unlock(&nso->nso_lock); 452} 453 454/* 455 * Create/initialize an nfs_socket structure. 456 */ 457int 458nfs_socket_create( 459 __unused struct nfsmount *nmp, 460 struct sockaddr *sa, 461 int sotype, 462 in_port_t port, 463 uint32_t protocol, 464 uint32_t vers, 465 int resvport, 466 struct nfs_socket **nsop) 467{ 468 struct nfs_socket *nso; 469 struct timeval now; 470 int error; 471#ifdef NFS_SOCKET_DEBUGGING 472 char naddr[MAX_IPv6_STR_LEN]; 473 void *sinaddr; 474 475 if (sa->sa_family == AF_INET) 476 sinaddr = &((struct sockaddr_in*)sa)->sin_addr; 477 else 478 sinaddr = &((struct sockaddr_in6*)sa)->sin6_addr; 479 if (inet_ntop(sa->sa_family, sinaddr, naddr, sizeof(naddr)) != naddr) 480 strlcpy(naddr, "<unknown>", sizeof(naddr)); 481#else 482 char naddr[1] = { 0 }; 483#endif 484 485 *nsop = NULL; 486 487 /* Create the socket. */ 488 MALLOC(nso, struct nfs_socket *, sizeof(struct nfs_socket), M_TEMP, M_WAITOK|M_ZERO); 489 if (nso) 490 MALLOC(nso->nso_saddr, struct sockaddr *, sa->sa_len, M_SONAME, M_WAITOK|M_ZERO); 491 if (!nso || !nso->nso_saddr) { 492 if (nso) 493 FREE(nso, M_TEMP); 494 return (ENOMEM); 495 } 496 lck_mtx_init(&nso->nso_lock, nfs_request_grp, LCK_ATTR_NULL); 497 nso->nso_sotype = sotype; 498 if (nso->nso_sotype == SOCK_STREAM) 499 nfs_rpc_record_state_init(&nso->nso_rrs); 500 microuptime(&now); 501 nso->nso_timestamp = now.tv_sec; 502 bcopy(sa, nso->nso_saddr, sa->sa_len); 503 if (sa->sa_family == AF_INET) 504 ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port); 505 else if (sa->sa_family == AF_INET6) 506 ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port); 507 nso->nso_protocol = protocol; 508 nso->nso_version = vers; 509 510 error = sock_socket(sa->sa_family, nso->nso_sotype, 0, NULL, NULL, &nso->nso_so); 511 512 /* Some servers require that the client port be a reserved port number. */ 513 if (!error && resvport && ((sa->sa_family == AF_INET) || (sa->sa_family == AF_INET6))) { 514 struct sockaddr_storage ss; 515 int level = (sa->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6; 516 int optname = (sa->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE; 517 int portrange = IP_PORTRANGE_LOW; 518 519 error = sock_setsockopt(nso->nso_so, level, optname, &portrange, sizeof(portrange)); 520 if (!error) { /* bind now to check for failure */ 521 ss.ss_len = sa->sa_len; 522 ss.ss_family = sa->sa_family; 523 if (ss.ss_family == AF_INET) { 524 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY; 525 ((struct sockaddr_in*)&ss)->sin_port = htons(0); 526 } else if (ss.ss_family == AF_INET6) { 527 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any; 528 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0); 529 } else { 530 error = EINVAL; 531 } 532 if (!error) 533 error = sock_bind(nso->nso_so, (struct sockaddr*)&ss); 534 } 535 } 536 537 if (error) { 538 NFS_SOCK_DBG("nfs connect %s error %d creating socket %p %s type %d%s port %d prot %d %d\n", 539 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nso, naddr, sotype, 540 resvport ? "r" : "", port, protocol, vers); 541 nfs_socket_destroy(nso); 542 } else { 543 NFS_SOCK_DBG("nfs connect %s created socket %p %s type %d%s port %d prot %d %d\n", 544 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, naddr, 545 sotype, resvport ? "r" : "", port, protocol, vers); 546 *nsop = nso; 547 } 548 return (error); 549} 550 551/* 552 * Destroy an nfs_socket structure. 553 */ 554void 555nfs_socket_destroy(struct nfs_socket *nso) 556{ 557 struct timespec ts = { 4, 0 }; 558 559 lck_mtx_lock(&nso->nso_lock); 560 nso->nso_flags |= NSO_DISCONNECTING; 561 if (nso->nso_flags & NSO_UPCALL) /* give upcall a chance to complete */ 562 msleep(&nso->nso_flags, &nso->nso_lock, PZERO-1, "nfswaitupcall", &ts); 563 lck_mtx_unlock(&nso->nso_lock); 564 sock_shutdown(nso->nso_so, SHUT_RDWR); 565 sock_close(nso->nso_so); 566 if (nso->nso_sotype == SOCK_STREAM) 567 nfs_rpc_record_state_cleanup(&nso->nso_rrs); 568 lck_mtx_destroy(&nso->nso_lock, nfs_request_grp); 569 if (nso->nso_saddr) 570 FREE(nso->nso_saddr, M_SONAME); 571 if (nso->nso_saddr2) 572 FREE(nso->nso_saddr2, M_SONAME); 573 NFS_SOCK_DBG("nfs connect - socket %p destroyed\n", nso); 574 FREE(nso, M_TEMP); 575} 576 577/* 578 * Set common socket options on an nfs_socket. 579 */ 580void 581nfs_socket_options(struct nfsmount *nmp, struct nfs_socket *nso) 582{ 583 /* 584 * Set socket send/receive timeouts 585 * - Receive timeout shouldn't matter because most receives are performed 586 * in the socket upcall non-blocking. 587 * - Send timeout should allow us to react to a blocked socket. 588 * Soft mounts will want to abort sooner. 589 */ 590 struct timeval timeo; 591 int on = 1, proto; 592 593 timeo.tv_usec = 0; 594 timeo.tv_sec = (NMFLAG(nmp, SOFT) || nfs_can_squish(nmp)) ? 5 : 60; 595 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 596 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 597 if (nso->nso_sotype == SOCK_STREAM) { 598 /* Assume that SOCK_STREAM always requires a connection */ 599 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); 600 /* set nodelay for TCP */ 601 sock_gettype(nso->nso_so, NULL, NULL, &proto); 602 if (proto == IPPROTO_TCP) 603 sock_setsockopt(nso->nso_so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 604 } 605 if (nso->nso_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ 606 int reserve = NFS_UDPSOCKBUF; 607 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve)); 608 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve)); 609 } 610 /* set SO_NOADDRERR to detect network changes ASAP */ 611 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); 612 /* just playin' it safe with upcalls */ 613 sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); 614 /* socket should be interruptible if the mount is */ 615 if (!NMFLAG(nmp, INTR)) 616 sock_nointerrupt(nso->nso_so, 1); 617} 618 619/* 620 * Release resources held in an nfs_socket_search. 621 */ 622void 623nfs_socket_search_cleanup(struct nfs_socket_search *nss) 624{ 625 struct nfs_socket *nso, *nsonext; 626 627 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) { 628 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); 629 nss->nss_sockcnt--; 630 nfs_socket_destroy(nso); 631 } 632 if (nss->nss_sock) { 633 nfs_socket_destroy(nss->nss_sock); 634 nss->nss_sock = NULL; 635 } 636} 637 638/* 639 * Prefer returning certain errors over others. 640 * This function returns a ranking of the given error. 641 */ 642int 643nfs_connect_error_class(int error) 644{ 645 switch (error) { 646 case 0: 647 return (0); 648 case ETIMEDOUT: 649 case EAGAIN: 650 return (1); 651 case EPIPE: 652 case EADDRNOTAVAIL: 653 case ENETDOWN: 654 case ENETUNREACH: 655 case ENETRESET: 656 case ECONNABORTED: 657 case ECONNRESET: 658 case EISCONN: 659 case ENOTCONN: 660 case ESHUTDOWN: 661 case ECONNREFUSED: 662 case EHOSTDOWN: 663 case EHOSTUNREACH: 664 return (2); 665 case ERPCMISMATCH: 666 case EPROCUNAVAIL: 667 case EPROGMISMATCH: 668 case EPROGUNAVAIL: 669 return (3); 670 case EBADRPC: 671 return (4); 672 default: 673 return (5); 674 } 675} 676 677/* 678 * Make sure a socket search returns the best error. 679 */ 680void 681nfs_socket_search_update_error(struct nfs_socket_search *nss, int error) 682{ 683 if (nfs_connect_error_class(error) >= nfs_connect_error_class(nss->nss_error)) 684 nss->nss_error = error; 685} 686 687/* nfs_connect_search_new_socket: 688 * Given a socket search structure for an nfs mount try to find a new socket from the set of addresses specified 689 * by nss. 690 * 691 * nss_last is set to -1 at initialization to indicate the first time. Its set to -2 if address was found but 692 * could not be used or if a socket timed out. 693 */ 694int 695nfs_connect_search_new_socket(struct nfsmount *nmp, struct nfs_socket_search *nss, struct timeval *now) 696{ 697 struct nfs_fs_location *fsl; 698 struct nfs_fs_server *fss; 699 struct sockaddr_storage ss; 700 struct nfs_socket *nso; 701 char *addrstr; 702 int error = 0; 703 704 705 NFS_SOCK_DBG("nfs connect %s nss_addrcnt = %d\n", 706 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss->nss_addrcnt); 707 708 /* 709 * while there are addresses and: 710 * we have no sockets or 711 * the last address failed and did not produce a socket (nss_last < 0) or 712 * Its been a while (2 seconds) and we have less than the max number of concurrent sockets to search (4) 713 * then attempt to create a socket with the current address. 714 */ 715 while (nss->nss_addrcnt > 0 && ((nss->nss_last < 0) || (nss->nss_sockcnt == 0) || 716 ((nss->nss_sockcnt < 4) && (now->tv_sec >= (nss->nss_last + 2))))) { 717 if (nmp->nm_sockflags & NMSOCK_UNMOUNT) 718 return (EINTR); 719 /* Can we convert the address to a sockaddr? */ 720 fsl = nmp->nm_locations.nl_locations[nss->nss_nextloc.nli_loc]; 721 fss = fsl->nl_servers[nss->nss_nextloc.nli_serv]; 722 addrstr = fss->ns_addresses[nss->nss_nextloc.nli_addr]; 723 if (!nfs_uaddr2sockaddr(addrstr, (struct sockaddr*)&ss)) { 724 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); 725 nss->nss_addrcnt -= 1; 726 nss->nss_last = -2; 727 continue; 728 } 729 /* Check that socket family is acceptable. */ 730 if (nmp->nm_sofamily && (ss.ss_family != nmp->nm_sofamily)) { 731 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); 732 nss->nss_addrcnt -= 1; 733 nss->nss_last = -2; 734 continue; 735 } 736 737 /* Create the socket. */ 738 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nss->nss_sotype, 739 nss->nss_port, nss->nss_protocol, nss->nss_version, 740 ((nss->nss_protocol == NFS_PROG) && NMFLAG(nmp, RESVPORT)), &nso); 741 if (error) 742 return (error); 743 744 nso->nso_location = nss->nss_nextloc; 745 nso->nso_wake = nss; 746 error = sock_setupcall(nso->nso_so, nfs_connect_upcall, nso); 747 if (error) { 748 lck_mtx_lock(&nso->nso_lock); 749 nso->nso_error = error; 750 nso->nso_flags |= NSO_DEAD; 751 lck_mtx_unlock(&nso->nso_lock); 752 } 753 754 TAILQ_INSERT_TAIL(&nss->nss_socklist, nso, nso_link); 755 nss->nss_sockcnt++; 756 nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); 757 nss->nss_addrcnt -= 1; 758 759 nss->nss_last = now->tv_sec; 760 } 761 762 if (nss->nss_addrcnt == 0 && nss->nss_last < 0) 763 nss->nss_last = now->tv_sec; 764 765 return (error); 766} 767 768/* 769 * nfs_connect_search_socket_connect: Connect an nfs socket nso for nfsmount nmp. 770 * If successful set the socket options for the socket as require from the mount. 771 * 772 * Assumes: nso->nso_lock is held on entry and return. 773 */ 774int 775nfs_connect_search_socket_connect(struct nfsmount *nmp, struct nfs_socket *nso, int verbose) 776{ 777 int error; 778 779 if ((nso->nso_sotype != SOCK_STREAM) && NMFLAG(nmp, NOCONNECT)) { 780 /* no connection needed, just say it's already connected */ 781 NFS_SOCK_DBG("nfs connect %s UDP socket %p noconnect\n", 782 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 783 nso->nso_flags |= NSO_CONNECTED; 784 nfs_socket_options(nmp, nso); 785 return (1); /* Socket is connected and setup */ 786 } else if (!(nso->nso_flags & NSO_CONNECTING)) { 787 /* initiate the connection */ 788 nso->nso_flags |= NSO_CONNECTING; 789 lck_mtx_unlock(&nso->nso_lock); 790 NFS_SOCK_DBG("nfs connect %s connecting socket %p\n", 791 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 792 error = sock_connect(nso->nso_so, nso->nso_saddr, MSG_DONTWAIT); 793 lck_mtx_lock(&nso->nso_lock); 794 if (error && (error != EINPROGRESS)) { 795 nso->nso_error = error; 796 nso->nso_flags |= NSO_DEAD; 797 return (0); 798 } 799 } 800 if (nso->nso_flags & NSO_CONNECTING) { 801 /* check the connection */ 802 if (sock_isconnected(nso->nso_so)) { 803 NFS_SOCK_DBG("nfs connect %s socket %p is connected\n", 804 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 805 nso->nso_flags &= ~NSO_CONNECTING; 806 nso->nso_flags |= NSO_CONNECTED; 807 nfs_socket_options(nmp, nso); 808 return (1); /* Socket is connected and setup */ 809 } else { 810 int optlen = sizeof(error); 811 error = 0; 812 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &error, &optlen); 813 if (error) { /* we got an error on the socket */ 814 NFS_SOCK_DBG("nfs connect %s socket %p connection error %d\n", 815 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); 816 if (verbose) 817 printf("nfs connect socket error %d for %s\n", 818 error, vfs_statfs(nmp->nm_mountp)->f_mntfromname); 819 nso->nso_error = error; 820 nso->nso_flags |= NSO_DEAD; 821 return (0); 822 } 823 } 824 } 825 826 return (0); /* Waiting to be connected */ 827} 828 829/* 830 * nfs_connect_search_ping: Send a null proc on the nso socket. 831 */ 832int 833nfs_connect_search_ping(struct nfsmount *nmp, struct nfs_socket *nso, struct timeval *now) 834{ 835 /* initiate a NULL RPC request */ 836 uint64_t xid = nso->nso_pingxid; 837 mbuf_t m, mreq = NULL; 838 struct msghdr msg; 839 size_t reqlen, sentlen; 840 uint32_t vers = nso->nso_version; 841 int error; 842 843 if (!vers) { 844 if (nso->nso_protocol == PMAPPROG) 845 vers = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; 846 else if (nso->nso_protocol == NFS_PROG) 847 vers = NFS_VER3; 848 } 849 lck_mtx_unlock(&nso->nso_lock); 850 error = nfsm_rpchead2(nmp, nso->nso_sotype, nso->nso_protocol, vers, 0, RPCAUTH_SYS, 851 vfs_context_ucred(vfs_context_kernel()), NULL, NULL, &xid, &mreq); 852 lck_mtx_lock(&nso->nso_lock); 853 if (!error) { 854 nso->nso_flags |= NSO_PINGING; 855 nso->nso_pingxid = R_XID32(xid); 856 nso->nso_reqtimestamp = now->tv_sec; 857 bzero(&msg, sizeof(msg)); 858 if ((nso->nso_sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so)) { 859 msg.msg_name = nso->nso_saddr; 860 msg.msg_namelen = nso->nso_saddr->sa_len; 861 } 862 for (reqlen=0, m=mreq; m; m = mbuf_next(m)) 863 reqlen += mbuf_len(m); 864 lck_mtx_unlock(&nso->nso_lock); 865 error = sock_sendmbuf(nso->nso_so, &msg, mreq, 0, &sentlen); 866 NFS_SOCK_DBG("nfs connect %s verifying socket %p send rv %d\n", 867 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); 868 lck_mtx_lock(&nso->nso_lock); 869 if (!error && (sentlen != reqlen)) 870 error = ETIMEDOUT; 871 } 872 if (error) { 873 nso->nso_error = error; 874 nso->nso_flags |= NSO_DEAD; 875 return (0); 876 } 877 878 return (1); 879} 880 881/* 882 * nfs_connect_search_socket_found: Take the found socket of the socket search list and assign it to the searched socket. 883 * Set the nfs socket protocol and version if needed. 884 */ 885void 886nfs_connect_search_socket_found(struct nfsmount *nmp __unused, struct nfs_socket_search *nss, struct nfs_socket *nso) 887{ 888 NFS_SOCK_DBG("nfs connect %s socket %p verified\n", 889 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 890 if (!nso->nso_version) { 891 /* If the version isn't set, the default must have worked. */ 892 if (nso->nso_protocol == PMAPPROG) 893 nso->nso_version = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; 894 if (nso->nso_protocol == NFS_PROG) 895 nso->nso_version = NFS_VER3; 896 } 897 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); 898 nss->nss_sockcnt--; 899 nss->nss_sock = nso; 900} 901 902/* 903 * nfs_connect_search_socket_reap: For each socket in the search list mark any timed out socket as dead and remove from 904 * the list. Dead socket are then destroyed. 905 */ 906void 907nfs_connect_search_socket_reap(struct nfsmount *nmp __unused, struct nfs_socket_search *nss, struct timeval *now) 908{ 909 struct nfs_socket *nso, *nsonext; 910 911 TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) { 912 lck_mtx_lock(&nso->nso_lock); 913 if (now->tv_sec >= (nso->nso_timestamp + nss->nss_timeo)) { 914 /* took too long */ 915 NFS_SOCK_DBG("nfs connect %s socket %p timed out\n", 916 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 917 nso->nso_error = ETIMEDOUT; 918 nso->nso_flags |= NSO_DEAD; 919 } 920 if (!(nso->nso_flags & NSO_DEAD)) { 921 lck_mtx_unlock(&nso->nso_lock); 922 continue; 923 } 924 lck_mtx_unlock(&nso->nso_lock); 925 NFS_SOCK_DBG("nfs connect %s reaping socket %p %d\n", 926 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, nso->nso_error); 927 nfs_socket_search_update_error(nss, nso->nso_error); 928 TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); 929 nss->nss_sockcnt--; 930 nfs_socket_destroy(nso); 931 /* If there are more sockets to try, force the starting of another socket */ 932 if (nss->nss_addrcnt > 0) 933 nss->nss_last = -2; 934 } 935} 936 937/* 938 * nfs_connect_search_check: Check on the status of search and wait for replies if needed. 939 */ 940int 941nfs_connect_search_check(struct nfsmount *nmp, struct nfs_socket_search *nss, struct timeval *now) 942{ 943 int error; 944 945 /* log a warning if connect is taking a while */ 946 if (((now->tv_sec - nss->nss_timestamp) >= 8) && ((nss->nss_flags & (NSS_VERBOSE|NSS_WARNED)) == NSS_VERBOSE)) { 947 printf("nfs_connect: socket connect taking a while for %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); 948 nss->nss_flags |= NSS_WARNED; 949 } 950 if (nmp->nm_sockflags & NMSOCK_UNMOUNT) 951 return (EINTR); 952 if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0))) 953 return (error); 954 955 /* If we were succesfull at sending a ping, wait up to a second for a reply */ 956 if (nss->nss_last >= 0) 957 tsleep(nss, PSOCK, "nfs_connect_search_wait", hz); 958 959 return (0); 960} 961 962 963/* 964 * Continue the socket search until we have something to report. 965 */ 966int 967nfs_connect_search_loop(struct nfsmount *nmp, struct nfs_socket_search *nss) 968{ 969 struct nfs_socket *nso; 970 struct timeval now; 971 int error; 972 int verbose = (nss->nss_flags & NSS_VERBOSE); 973 974loop: 975 microuptime(&now); 976 NFS_SOCK_DBG("nfs connect %s search %ld\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, now.tv_sec); 977 978 /* add a new socket to the socket list if needed and available */ 979 error = nfs_connect_search_new_socket(nmp, nss, &now); 980 if (error) { 981 NFS_SOCK_DBG("nfs connect returned %d\n", error); 982 return (error); 983 } 984 985 /* check each active socket on the list and try to push it along */ 986 TAILQ_FOREACH(nso, &nss->nss_socklist, nso_link) { 987 lck_mtx_lock(&nso->nso_lock); 988 989 /* If not connected connect it */ 990 if (!(nso->nso_flags & NSO_CONNECTED)) { 991 if (!nfs_connect_search_socket_connect(nmp, nso, verbose)) { 992 lck_mtx_unlock(&nso->nso_lock); 993 continue; 994 } 995 } 996 997 /* If the socket hasn't been verified or in a ping, ping it. We also handle UDP retransmits */ 998 if (!(nso->nso_flags & (NSO_PINGING|NSO_VERIFIED)) || 999 ((nso->nso_sotype == SOCK_DGRAM) && (now.tv_sec >= nso->nso_reqtimestamp+2))) { 1000 if (!nfs_connect_search_ping(nmp, nso, &now)) { 1001 lck_mtx_unlock(&nso->nso_lock); 1002 continue; 1003 } 1004 } 1005 1006 /* Has the socket been verified by the up call routine? */ 1007 if (nso->nso_flags & NSO_VERIFIED) { 1008 /* WOOHOO!! This socket looks good! */ 1009 nfs_connect_search_socket_found(nmp, nss, nso); 1010 lck_mtx_unlock(&nso->nso_lock); 1011 break; 1012 } 1013 lck_mtx_unlock(&nso->nso_lock); 1014 } 1015 1016 /* Check for timed out sockets and mark as dead and then remove all dead sockets. */ 1017 nfs_connect_search_socket_reap(nmp, nss, &now); 1018 1019 /* 1020 * Keep looping if we haven't found a socket yet and we have more 1021 * sockets to (continue to) try. 1022 */ 1023 error = 0; 1024 if (!nss->nss_sock && (!TAILQ_EMPTY(&nss->nss_socklist) || nss->nss_addrcnt)) { 1025 error = nfs_connect_search_check(nmp, nss, &now); 1026 if (!error) 1027 goto loop; 1028 } 1029 1030 NFS_SOCK_DBG("nfs connect %s returning %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, error); 1031 return (error); 1032} 1033 1034/* 1035 * Initialize a new NFS connection. 1036 * 1037 * Search for a location to connect a socket to and initialize the connection. 1038 * 1039 * An NFS mount may have multiple locations/servers/addresses available. 1040 * We attempt to connect to each one asynchronously and will start 1041 * several sockets in parallel if other locations are slow to answer. 1042 * We'll use the first NFS socket we can successfully set up. 1043 * 1044 * The search may involve contacting the portmapper service first. 1045 * 1046 * A mount's initial connection may require negotiating some parameters such 1047 * as socket type and NFS version. 1048 */ 1049int 1050nfs_connect(struct nfsmount *nmp, int verbose, int timeo) 1051{ 1052 struct nfs_socket_search nss; 1053 struct nfs_socket *nso, *nsonfs; 1054 struct sockaddr_storage ss; 1055 struct sockaddr *saddr, *oldsaddr; 1056 sock_upcall upcall; 1057 struct timeval now, start; 1058 int error, savederror, nfsvers; 1059 uint8_t sotype = nmp->nm_sotype ? nmp->nm_sotype : SOCK_STREAM; 1060 fhandle_t *fh = NULL; 1061 char *path = NULL; 1062 in_port_t port; 1063 int addrtotal = 0; 1064 1065 /* paranoia... check that we have at least one address in the locations */ 1066 uint32_t loc, serv; 1067 for (loc=0; loc < nmp->nm_locations.nl_numlocs; loc++) { 1068 for (serv=0; serv < nmp->nm_locations.nl_locations[loc]->nl_servcount; serv++) { 1069 addrtotal += nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount; 1070 if (nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount == 0) 1071 NFS_SOCK_DBG("nfs connect %s search, server %s has no addresses\n", 1072 vfs_statfs(nmp->nm_mountp)->f_mntfromname, 1073 nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name); 1074 } 1075 } 1076 1077 if (addrtotal == 0) { 1078 NFS_SOCK_DBG("nfs connect %s search failed, no addresses\n", 1079 vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1080 return (EINVAL); 1081 } else 1082 NFS_SOCK_DBG("nfs connect %s has %d addresses\n", 1083 vfs_statfs(nmp->nm_mountp)->f_mntfromname, addrtotal); 1084 1085 lck_mtx_lock(&nmp->nm_lock); 1086 nmp->nm_sockflags |= NMSOCK_CONNECTING; 1087 nmp->nm_nss = &nss; 1088 lck_mtx_unlock(&nmp->nm_lock); 1089 microuptime(&start); 1090 savederror = error = 0; 1091 1092tryagain: 1093 /* initialize socket search state */ 1094 bzero(&nss, sizeof(nss)); 1095 nss.nss_addrcnt = addrtotal; 1096 nss.nss_error = savederror; 1097 TAILQ_INIT(&nss.nss_socklist); 1098 nss.nss_sotype = sotype; 1099 nss.nss_startloc = nmp->nm_locations.nl_current; 1100 nss.nss_timestamp = start.tv_sec; 1101 nss.nss_timeo = timeo; 1102 if (verbose) 1103 nss.nss_flags |= NSS_VERBOSE; 1104 1105 /* First time connecting, we may need to negotiate some things */ 1106 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { 1107 if (!nmp->nm_vers) { 1108 /* No NFS version specified... */ 1109 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) { 1110 /* ...connect to portmapper first if we (may) need any ports. */ 1111 nss.nss_port = PMAPPORT; 1112 nss.nss_protocol = PMAPPROG; 1113 nss.nss_version = 0; 1114 } else { 1115 /* ...connect to NFS port first. */ 1116 nss.nss_port = nmp->nm_nfsport; 1117 nss.nss_protocol = NFS_PROG; 1118 nss.nss_version = 0; 1119 } 1120 } else if (nmp->nm_vers >= NFS_VER4) { 1121 /* For NFSv4, we use the given (or default) port. */ 1122 nss.nss_port = nmp->nm_nfsport ? nmp->nm_nfsport : NFS_PORT; 1123 nss.nss_protocol = NFS_PROG; 1124 nss.nss_version = 4; 1125 } else { 1126 /* For NFSv3/v2... */ 1127 if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) { 1128 /* ...connect to portmapper first if we need any ports. */ 1129 nss.nss_port = PMAPPORT; 1130 nss.nss_protocol = PMAPPROG; 1131 nss.nss_version = 0; 1132 } else { 1133 /* ...connect to NFS port first. */ 1134 nss.nss_port = nmp->nm_nfsport; 1135 nss.nss_protocol = NFS_PROG; 1136 nss.nss_version = nmp->nm_vers; 1137 } 1138 } 1139 NFS_SOCK_DBG("nfs connect first %s, so type %d port %d prot %d %d\n", 1140 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port, 1141 nss.nss_protocol, nss.nss_version); 1142 } else { 1143 /* we've connected before, just connect to NFS port */ 1144 if (!nmp->nm_nfsport) { 1145 /* need to ask portmapper which port that would be */ 1146 nss.nss_port = PMAPPORT; 1147 nss.nss_protocol = PMAPPROG; 1148 nss.nss_version = 0; 1149 } else { 1150 nss.nss_port = nmp->nm_nfsport; 1151 nss.nss_protocol = NFS_PROG; 1152 nss.nss_version = nmp->nm_vers; 1153 } 1154 NFS_SOCK_DBG("nfs connect %s, so type %d port %d prot %d %d\n", 1155 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port, 1156 nss.nss_protocol, nss.nss_version); 1157 } 1158 1159 /* Set next location to first valid location. */ 1160 /* If start location is invalid, find next location. */ 1161 nss.nss_nextloc = nss.nss_startloc; 1162 if ((nss.nss_nextloc.nli_serv >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servcount) || 1163 (nss.nss_nextloc.nli_addr >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servers[nss.nss_nextloc.nli_serv]->ns_addrcount)) { 1164 nfs_location_next(&nmp->nm_locations, &nss.nss_nextloc); 1165 if (!nfs_location_index_cmp(&nss.nss_nextloc, &nss.nss_startloc)) { 1166 NFS_SOCK_DBG("nfs connect %s search failed, couldn't find a valid location index\n", 1167 vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1168 return (ENOENT); 1169 } 1170 } 1171 nss.nss_last = -1; 1172 1173keepsearching: 1174 1175 error = nfs_connect_search_loop(nmp, &nss); 1176 if (error || !nss.nss_sock) { 1177 /* search failed */ 1178 nfs_socket_search_cleanup(&nss); 1179 if (!error && (nss.nss_sotype == SOCK_STREAM) && !nmp->nm_sotype && (nmp->nm_vers < NFS_VER4)) { 1180 /* Try using UDP */ 1181 sotype = SOCK_DGRAM; 1182 savederror = nss.nss_error; 1183 NFS_SOCK_DBG("nfs connect %s TCP failed %d %d, trying UDP\n", 1184 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error); 1185 goto tryagain; 1186 } 1187 if (!error) 1188 error = nss.nss_error ? nss.nss_error : ETIMEDOUT; 1189 lck_mtx_lock(&nmp->nm_lock); 1190 nmp->nm_sockflags &= ~NMSOCK_CONNECTING; 1191 nmp->nm_nss = NULL; 1192 lck_mtx_unlock(&nmp->nm_lock); 1193 if (nss.nss_flags & NSS_WARNED) 1194 log(LOG_INFO, "nfs_connect: socket connect aborted for %s\n", 1195 vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1196 if (fh) 1197 FREE(fh, M_TEMP); 1198 if (path) 1199 FREE_ZONE(path, MAXPATHLEN, M_NAMEI); 1200 NFS_SOCK_DBG("nfs connect %s search failed, returning %d\n", 1201 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error); 1202 return (error); 1203 } 1204 1205 /* try to use nss_sock */ 1206 nso = nss.nss_sock; 1207 nss.nss_sock = NULL; 1208 1209 /* We may be speaking to portmap first... to determine port(s). */ 1210 if (nso->nso_saddr->sa_family == AF_INET) 1211 port = ntohs(((struct sockaddr_in*)nso->nso_saddr)->sin_port); 1212 else 1213 port = ntohs(((struct sockaddr_in6*)nso->nso_saddr)->sin6_port); 1214 if (port == PMAPPORT) { 1215 /* Use this portmapper port to get the port #s we need. */ 1216 NFS_SOCK_DBG("nfs connect %s got portmapper socket %p\n", 1217 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 1218 1219 /* remove the connect upcall so nfs_portmap_lookup() can use this socket */ 1220 sock_setupcall(nso->nso_so, NULL, NULL); 1221 1222 /* Set up socket address and port for NFS socket. */ 1223 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len); 1224 1225 /* If NFS version not set, try NFSv3 then NFSv2. */ 1226 nfsvers = nmp->nm_vers ? nmp->nm_vers : NFS_VER3; 1227 1228 if (!(port = nmp->nm_nfsport)) { 1229 if (ss.ss_family == AF_INET) 1230 ((struct sockaddr_in*)&ss)->sin_port = htons(0); 1231 else if (ss.ss_family == AF_INET6) 1232 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0); 1233 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, 1234 nso->nso_so, NFS_PROG, nfsvers, 1235 (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo); 1236 if (!error) { 1237 if (ss.ss_family == AF_INET) 1238 port = ntohs(((struct sockaddr_in*)&ss)->sin_port); 1239 else if (ss.ss_family == AF_INET6) 1240 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); 1241 if (!port) 1242 error = EPROGUNAVAIL; 1243 } 1244 if (error && !nmp->nm_vers) { 1245 nfsvers = NFS_VER2; 1246 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, 1247 nso->nso_so, NFS_PROG, nfsvers, 1248 (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo); 1249 if (!error) { 1250 if (ss.ss_family == AF_INET) 1251 port = ntohs(((struct sockaddr_in*)&ss)->sin_port); 1252 else if (ss.ss_family == AF_INET6) 1253 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); 1254 if (!port) 1255 error = EPROGUNAVAIL; 1256 } 1257 } 1258 if (error) { 1259 nfs_socket_search_update_error(&nss, error); 1260 nfs_socket_destroy(nso); 1261 goto keepsearching; 1262 } 1263 } 1264 /* Create NFS protocol socket and add it to the list of sockets. */ 1265 error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nso->nso_sotype, port, 1266 NFS_PROG, nfsvers, NMFLAG(nmp, RESVPORT), &nsonfs); 1267 if (error) { 1268 nfs_socket_search_update_error(&nss, error); 1269 nfs_socket_destroy(nso); 1270 goto keepsearching; 1271 } 1272 nsonfs->nso_location = nso->nso_location; 1273 nsonfs->nso_wake = &nss; 1274 error = sock_setupcall(nsonfs->nso_so, nfs_connect_upcall, nsonfs); 1275 if (error) { 1276 nfs_socket_search_update_error(&nss, error); 1277 nfs_socket_destroy(nsonfs); 1278 nfs_socket_destroy(nso); 1279 goto keepsearching; 1280 } 1281 TAILQ_INSERT_TAIL(&nss.nss_socklist, nsonfs, nso_link); 1282 nss.nss_sockcnt++; 1283 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) { 1284 /* Set up socket address and port for MOUNT socket. */ 1285 error = 0; 1286 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len); 1287 port = nmp->nm_mountport; 1288 if (ss.ss_family == AF_INET) 1289 ((struct sockaddr_in*)&ss)->sin_port = htons(port); 1290 else if (ss.ss_family == AF_INET6) 1291 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port); 1292 if (!port) { 1293 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */ 1294 /* If NFS version is unknown, optimistically choose for NFSv3. */ 1295 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3; 1296 int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP; 1297 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, 1298 nso->nso_so, RPCPROG_MNT, mntvers, mntproto, timeo); 1299 } 1300 if (!error) { 1301 if (ss.ss_family == AF_INET) 1302 port = ntohs(((struct sockaddr_in*)&ss)->sin_port); 1303 else if (ss.ss_family == AF_INET6) 1304 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); 1305 if (!port) 1306 error = EPROGUNAVAIL; 1307 } 1308 /* create sockaddr for MOUNT */ 1309 if (!error) 1310 MALLOC(nsonfs->nso_saddr2, struct sockaddr *, ss.ss_len, M_SONAME, M_WAITOK|M_ZERO); 1311 if (!error && !nsonfs->nso_saddr2) 1312 error = ENOMEM; 1313 if (!error) 1314 bcopy(&ss, nsonfs->nso_saddr2, ss.ss_len); 1315 if (error) { 1316 lck_mtx_lock(&nsonfs->nso_lock); 1317 nsonfs->nso_error = error; 1318 nsonfs->nso_flags |= NSO_DEAD; 1319 lck_mtx_unlock(&nsonfs->nso_lock); 1320 } 1321 } 1322 nfs_socket_destroy(nso); 1323 goto keepsearching; 1324 } 1325 1326 /* nso is an NFS socket */ 1327 NFS_SOCK_DBG("nfs connect %s got NFS socket %p\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); 1328 1329 /* If NFS version wasn't specified, it was determined during the connect. */ 1330 nfsvers = nmp->nm_vers ? nmp->nm_vers : (int)nso->nso_version; 1331 1332 /* Perform MOUNT call for initial NFSv2/v3 connection/mount. */ 1333 if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) { 1334 error = 0; 1335 saddr = nso->nso_saddr2; 1336 if (!saddr) { 1337 /* Need sockaddr for MOUNT port */ 1338 bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len); 1339 port = nmp->nm_mountport; 1340 if (ss.ss_family == AF_INET) 1341 ((struct sockaddr_in*)&ss)->sin_port = htons(port); 1342 else if (ss.ss_family == AF_INET6) 1343 ((struct sockaddr_in6*)&ss)->sin6_port = htons(port); 1344 if (!port) { 1345 /* Get port/sockaddr for MOUNT version corresponding to NFS version. */ 1346 int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3; 1347 int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP; 1348 error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, 1349 NULL, RPCPROG_MNT, mntvers, mntproto, timeo); 1350 if (ss.ss_family == AF_INET) 1351 port = ntohs(((struct sockaddr_in*)&ss)->sin_port); 1352 else if (ss.ss_family == AF_INET6) 1353 port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); 1354 } 1355 if (!error) { 1356 if (port) 1357 saddr = (struct sockaddr*)&ss; 1358 else 1359 error = EPROGUNAVAIL; 1360 } 1361 } 1362 if (saddr) 1363 MALLOC(fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK|M_ZERO); 1364 if (saddr && fh) 1365 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); 1366 if (!saddr || !fh || !path) { 1367 if (!error) 1368 error = ENOMEM; 1369 if (fh) 1370 FREE(fh, M_TEMP); 1371 if (path) 1372 FREE_ZONE(path, MAXPATHLEN, M_NAMEI); 1373 fh = NULL; 1374 path = NULL; 1375 nfs_socket_search_update_error(&nss, error); 1376 nfs_socket_destroy(nso); 1377 goto keepsearching; 1378 } 1379 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, path, MAXPATHLEN, 1); 1380 error = nfs3_mount_rpc(nmp, saddr, nso->nso_sotype, nfsvers, 1381 path, vfs_context_current(), timeo, fh, &nmp->nm_servsec); 1382 NFS_SOCK_DBG("nfs connect %s socket %p mount %d\n", 1383 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); 1384 if (!error) { 1385 /* Make sure we can agree on a security flavor. */ 1386 int o, s; /* indices into mount option and server security flavor lists */ 1387 int found = 0; 1388 1389 if ((nfsvers == NFS_VER3) && !nmp->nm_servsec.count) { 1390 /* Some servers return an empty list to indicate RPCAUTH_SYS? */ 1391 nmp->nm_servsec.count = 1; 1392 nmp->nm_servsec.flavors[0] = RPCAUTH_SYS; 1393 } 1394 if (nmp->nm_sec.count) { 1395 /* Choose the first flavor in our list that the server supports. */ 1396 if (!nmp->nm_servsec.count) { 1397 /* we don't know what the server supports, just use our first choice */ 1398 nmp->nm_auth = nmp->nm_sec.flavors[0]; 1399 found = 1; 1400 } 1401 for (o=0; !found && (o < nmp->nm_sec.count); o++) 1402 for (s=0; !found && (s < nmp->nm_servsec.count); s++) 1403 if (nmp->nm_sec.flavors[o] == nmp->nm_servsec.flavors[s]) { 1404 nmp->nm_auth = nmp->nm_sec.flavors[o]; 1405 found = 1; 1406 } 1407 } else { 1408 /* Choose the first one we support from the server's list. */ 1409 if (!nmp->nm_servsec.count) { 1410 nmp->nm_auth = RPCAUTH_SYS; 1411 found = 1; 1412 } 1413 for (s=0; s < nmp->nm_servsec.count; s++) 1414 switch (nmp->nm_servsec.flavors[s]) { 1415 case RPCAUTH_SYS: 1416 /* prefer RPCAUTH_SYS to RPCAUTH_NONE */ 1417 if (found && (nmp->nm_auth == RPCAUTH_NONE)) 1418 found = 0; 1419 case RPCAUTH_NONE: 1420 case RPCAUTH_KRB5: 1421 case RPCAUTH_KRB5I: 1422 case RPCAUTH_KRB5P: 1423 if (!found) { 1424 nmp->nm_auth = nmp->nm_servsec.flavors[s]; 1425 found = 1; 1426 } 1427 break; 1428 } 1429 } 1430 error = !found ? EAUTH : 0; 1431 } 1432 FREE_ZONE(path, MAXPATHLEN, M_NAMEI); 1433 path = NULL; 1434 if (error) { 1435 nfs_socket_search_update_error(&nss, error); 1436 FREE(fh, M_TEMP); 1437 fh = NULL; 1438 nfs_socket_destroy(nso); 1439 goto keepsearching; 1440 } 1441 if (nmp->nm_fh) 1442 FREE(nmp->nm_fh, M_TEMP); 1443 nmp->nm_fh = fh; 1444 fh = NULL; 1445 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_CALLUMNT); 1446 } 1447 1448 /* put the real upcall in place */ 1449 upcall = (nso->nso_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv; 1450 error = sock_setupcall(nso->nso_so, upcall, nmp); 1451 if (error) { 1452 nfs_socket_search_update_error(&nss, error); 1453 nfs_socket_destroy(nso); 1454 goto keepsearching; 1455 } 1456 1457 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { 1458 /* set mntfromname to this location */ 1459 if (!NM_OMATTR_GIVEN(nmp, MNTFROM)) 1460 nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, 1461 vfs_statfs(nmp->nm_mountp)->f_mntfromname, 1462 sizeof(vfs_statfs(nmp->nm_mountp)->f_mntfromname), 0); 1463 /* some negotiated values need to remain unchanged for the life of the mount */ 1464 if (!nmp->nm_sotype) 1465 nmp->nm_sotype = nso->nso_sotype; 1466 if (!nmp->nm_vers) { 1467 nmp->nm_vers = nfsvers; 1468 /* If we negotiated NFSv4, set nm_nfsport if we ended up on the standard NFS port */ 1469 if ((nfsvers >= NFS_VER4) && !NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) { 1470 if (nso->nso_saddr->sa_family == AF_INET) 1471 port = ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port); 1472 else if (nso->nso_saddr->sa_family == AF_INET6) 1473 port = ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port); 1474 else 1475 port = 0; 1476 if (port == NFS_PORT) 1477 nmp->nm_nfsport = NFS_PORT; 1478 } 1479 } 1480 /* do some version-specific pre-mount set up */ 1481 if (nmp->nm_vers >= NFS_VER4) { 1482 microtime(&now); 1483 nmp->nm_mounttime = ((uint64_t)now.tv_sec << 32) | now.tv_usec; 1484 if (!NMFLAG(nmp, NOCALLBACK)) 1485 nfs4_mount_callback_setup(nmp); 1486 } 1487 } 1488 1489 /* Initialize NFS socket state variables */ 1490 lck_mtx_lock(&nmp->nm_lock); 1491 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = 1492 nmp->nm_srtt[3] = (NFS_TIMEO << 3); 1493 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 1494 nmp->nm_sdrtt[3] = 0; 1495 if (nso->nso_sotype == SOCK_DGRAM) { 1496 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 1497 nmp->nm_sent = 0; 1498 } else if (nso->nso_sotype == SOCK_STREAM) { 1499 nmp->nm_timeouts = 0; 1500 } 1501 nmp->nm_sockflags &= ~NMSOCK_CONNECTING; 1502 nmp->nm_sockflags |= NMSOCK_SETUP; 1503 /* move the socket to the mount structure */ 1504 nmp->nm_nso = nso; 1505 oldsaddr = nmp->nm_saddr; 1506 nmp->nm_saddr = nso->nso_saddr; 1507 lck_mtx_unlock(&nmp->nm_lock); 1508 error = nfs_connect_setup(nmp); 1509 lck_mtx_lock(&nmp->nm_lock); 1510 nmp->nm_sockflags &= ~NMSOCK_SETUP; 1511 if (!error) { 1512 nmp->nm_sockflags |= NMSOCK_READY; 1513 wakeup(&nmp->nm_sockflags); 1514 } 1515 if (error) { 1516 NFS_SOCK_DBG("nfs connect %s socket %p setup failed %d\n", 1517 vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); 1518 nfs_socket_search_update_error(&nss, error); 1519 nmp->nm_saddr = oldsaddr; 1520 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { 1521 /* undo settings made prior to setup */ 1522 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_SOCKET_TYPE)) 1523 nmp->nm_sotype = 0; 1524 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_VERSION)) { 1525 if (nmp->nm_vers >= NFS_VER4) { 1526 if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) 1527 nmp->nm_nfsport = 0; 1528 if (nmp->nm_cbid) 1529 nfs4_mount_callback_shutdown(nmp); 1530 if (IS_VALID_CRED(nmp->nm_mcred)) 1531 kauth_cred_unref(&nmp->nm_mcred); 1532 bzero(&nmp->nm_un, sizeof(nmp->nm_un)); 1533 } 1534 nmp->nm_vers = 0; 1535 } 1536 } 1537 lck_mtx_unlock(&nmp->nm_lock); 1538 nmp->nm_nso = NULL; 1539 nfs_socket_destroy(nso); 1540 goto keepsearching; 1541 } 1542 1543 /* update current location */ 1544 if ((nmp->nm_locations.nl_current.nli_flags & NLI_VALID) && 1545 (nmp->nm_locations.nl_current.nli_serv != nso->nso_location.nli_serv)) { 1546 /* server has changed, we should initiate failover/recovery */ 1547 // XXX 1548 } 1549 nmp->nm_locations.nl_current = nso->nso_location; 1550 nmp->nm_locations.nl_current.nli_flags |= NLI_VALID; 1551 1552 if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { 1553 /* We have now successfully connected... make a note of it. */ 1554 nmp->nm_sockflags |= NMSOCK_HASCONNECTED; 1555 } 1556 1557 lck_mtx_unlock(&nmp->nm_lock); 1558 if (oldsaddr) 1559 FREE(oldsaddr, M_SONAME); 1560 1561 if (nss.nss_flags & NSS_WARNED) 1562 log(LOG_INFO, "nfs_connect: socket connect completed for %s\n", 1563 vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1564 1565 nmp->nm_nss = NULL; 1566 nfs_socket_search_cleanup(&nss); 1567 if (fh) 1568 FREE(fh, M_TEMP); 1569 if (path) 1570 FREE_ZONE(path, MAXPATHLEN, M_NAMEI); 1571 NFS_SOCK_DBG("nfs connect %s success\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1572 return (0); 1573} 1574 1575 1576/* setup & confirm socket connection is functional */ 1577int 1578nfs_connect_setup(struct nfsmount *nmp) 1579{ 1580 int error = 0; 1581 1582 if (nmp->nm_vers >= NFS_VER4) { 1583 if (nmp->nm_state & NFSSTA_CLIENTID) { 1584 /* first, try to renew our current state */ 1585 error = nfs4_renew(nmp, R_SETUP); 1586 if ((error == NFSERR_ADMIN_REVOKED) || 1587 (error == NFSERR_CB_PATH_DOWN) || 1588 (error == NFSERR_EXPIRED) || 1589 (error == NFSERR_LEASE_MOVED) || 1590 (error == NFSERR_STALE_CLIENTID)) { 1591 lck_mtx_lock(&nmp->nm_lock); 1592 nfs_need_recover(nmp, error); 1593 lck_mtx_unlock(&nmp->nm_lock); 1594 } 1595 } 1596 error = nfs4_setclientid(nmp); 1597 } 1598 return (error); 1599} 1600 1601/* 1602 * NFS socket reconnect routine: 1603 * Called when a connection is broken. 1604 * - disconnect the old socket 1605 * - nfs_connect() again 1606 * - set R_MUSTRESEND for all outstanding requests on mount point 1607 * If this fails the mount point is DEAD! 1608 */ 1609int 1610nfs_reconnect(struct nfsmount *nmp) 1611{ 1612 struct nfsreq *rq; 1613 struct timeval now; 1614 thread_t thd = current_thread(); 1615 int error, wentdown = 0, verbose = 1; 1616 time_t lastmsg; 1617 int timeo; 1618 1619 microuptime(&now); 1620 lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay); 1621 1622 nfs_disconnect(nmp); 1623 1624 1625 lck_mtx_lock(&nmp->nm_lock); 1626 timeo = nfs_is_squishy(nmp) ? 8 : 30; 1627 lck_mtx_unlock(&nmp->nm_lock); 1628 1629 while ((error = nfs_connect(nmp, verbose, timeo))) { 1630 verbose = 0; 1631 nfs_disconnect(nmp); 1632 if ((error == EINTR) || (error == ERESTART)) 1633 return (EINTR); 1634 if (error == EIO) 1635 return (EIO); 1636 microuptime(&now); 1637 if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) { 1638 lastmsg = now.tv_sec; 1639 nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect"); 1640 wentdown = 1; 1641 } 1642 lck_mtx_lock(&nmp->nm_lock); 1643 if (!(nmp->nm_state & NFSSTA_MOUNTED)) { 1644 /* we're not yet completely mounted and */ 1645 /* we can't reconnect, so we fail */ 1646 lck_mtx_unlock(&nmp->nm_lock); 1647 NFS_SOCK_DBG("Not mounted returning %d\n", error); 1648 return (error); 1649 } 1650 nfs_mount_check_dead_timeout(nmp); 1651 if ((error = nfs_sigintr(nmp, NULL, thd, 1))) { 1652 lck_mtx_unlock(&nmp->nm_lock); 1653 return (error); 1654 } 1655 lck_mtx_unlock(&nmp->nm_lock); 1656 tsleep(nfs_reconnect, PSOCK, "nfs_reconnect_delay", 2*hz); 1657 if ((error = nfs_sigintr(nmp, NULL, thd, 0))) 1658 return (error); 1659 } 1660 1661 if (wentdown) 1662 nfs_up(nmp, thd, NFSSTA_TIMEO, "connected"); 1663 1664 /* 1665 * Loop through outstanding request list and mark all requests 1666 * as needing a resend. (Though nfs_need_reconnect() probably 1667 * marked them all already.) 1668 */ 1669 lck_mtx_lock(nfs_request_mutex); 1670 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) { 1671 if (rq->r_nmp == nmp) { 1672 lck_mtx_lock(&rq->r_mtx); 1673 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) { 1674 rq->r_flags |= R_MUSTRESEND; 1675 rq->r_rtt = -1; 1676 wakeup(rq); 1677 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) 1678 nfs_asyncio_resend(rq); 1679 } 1680 lck_mtx_unlock(&rq->r_mtx); 1681 } 1682 } 1683 lck_mtx_unlock(nfs_request_mutex); 1684 return (0); 1685} 1686 1687/* 1688 * NFS disconnect. Clean up and unlink. 1689 */ 1690void 1691nfs_disconnect(struct nfsmount *nmp) 1692{ 1693 struct nfs_socket *nso; 1694 1695 lck_mtx_lock(&nmp->nm_lock); 1696tryagain: 1697 if (nmp->nm_nso) { 1698 struct timespec ts = { 1, 0 }; 1699 if (nmp->nm_state & NFSSTA_SENDING) { /* wait for sending to complete */ 1700 nmp->nm_state |= NFSSTA_WANTSND; 1701 msleep(&nmp->nm_state, &nmp->nm_lock, PZERO-1, "nfswaitsending", &ts); 1702 goto tryagain; 1703 } 1704 if (nmp->nm_sockflags & NMSOCK_POKE) { /* wait for poking to complete */ 1705 msleep(&nmp->nm_sockflags, &nmp->nm_lock, PZERO-1, "nfswaitpoke", &ts); 1706 goto tryagain; 1707 } 1708 nmp->nm_sockflags |= NMSOCK_DISCONNECTING; 1709 nmp->nm_sockflags &= ~NMSOCK_READY; 1710 nso = nmp->nm_nso; 1711 nmp->nm_nso = NULL; 1712 if (nso->nso_saddr == nmp->nm_saddr) 1713 nso->nso_saddr = NULL; 1714 lck_mtx_unlock(&nmp->nm_lock); 1715 nfs_socket_destroy(nso); 1716 lck_mtx_lock(&nmp->nm_lock); 1717 nmp->nm_sockflags &= ~NMSOCK_DISCONNECTING; 1718 lck_mtx_unlock(&nmp->nm_lock); 1719 } else { 1720 lck_mtx_unlock(&nmp->nm_lock); 1721 } 1722} 1723 1724/* 1725 * mark an NFS mount as needing a reconnect/resends. 1726 */ 1727void 1728nfs_need_reconnect(struct nfsmount *nmp) 1729{ 1730 struct nfsreq *rq; 1731 1732 lck_mtx_lock(&nmp->nm_lock); 1733 nmp->nm_sockflags &= ~(NMSOCK_READY|NMSOCK_SETUP); 1734 lck_mtx_unlock(&nmp->nm_lock); 1735 1736 /* 1737 * Loop through outstanding request list and 1738 * mark all requests as needing a resend. 1739 */ 1740 lck_mtx_lock(nfs_request_mutex); 1741 TAILQ_FOREACH(rq, &nfs_reqq, r_chain) { 1742 if (rq->r_nmp == nmp) { 1743 lck_mtx_lock(&rq->r_mtx); 1744 if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) { 1745 rq->r_flags |= R_MUSTRESEND; 1746 rq->r_rtt = -1; 1747 wakeup(rq); 1748 if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) 1749 nfs_asyncio_resend(rq); 1750 } 1751 lck_mtx_unlock(&rq->r_mtx); 1752 } 1753 } 1754 lck_mtx_unlock(nfs_request_mutex); 1755} 1756 1757 1758/* 1759 * thread to handle miscellaneous async NFS socket work (reconnects/resends) 1760 */ 1761void 1762nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) 1763{ 1764 struct nfsmount *nmp = arg; 1765 struct timespec ts = { 30, 0 }; 1766 thread_t thd = current_thread(); 1767 struct nfsreq *req; 1768 struct timeval now; 1769 int error, dofinish; 1770 nfsnode_t np; 1771 int do_reconnect_sleep = 0; 1772 1773 lck_mtx_lock(&nmp->nm_lock); 1774 1775 while (!(nmp->nm_sockflags & NMSOCK_READY) || 1776 !TAILQ_EMPTY(&nmp->nm_resendq) || 1777 !LIST_EMPTY(&nmp->nm_monlist) || 1778 nmp->nm_deadto_start || 1779 (nmp->nm_state & NFSSTA_RECOVER) || 1780 ((nmp->nm_vers >= NFS_VER4) && !TAILQ_EMPTY(&nmp->nm_dreturnq))) 1781 { 1782 if (nmp->nm_sockflags & NMSOCK_UNMOUNT) 1783 break; 1784 /* do reconnect, if necessary */ 1785 if (!(nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_FORCE)) { 1786 if (nmp->nm_reconnect_start <= 0) { 1787 microuptime(&now); 1788 nmp->nm_reconnect_start = now.tv_sec; 1789 } 1790 lck_mtx_unlock(&nmp->nm_lock); 1791 NFS_SOCK_DBG("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); 1792 /* 1793 * XXX We don't want to call reconnect again right away if returned errors 1794 * before that may not have blocked. This has caused spamming null procs 1795 * from machines in the pass. 1796 */ 1797 if (do_reconnect_sleep) 1798 tsleep(nfs_mount_sock_thread, PSOCK, "nfs_reconnect_sock_thread_delay", hz); 1799 error = nfs_reconnect(nmp); 1800 if (error) { 1801 int lvl = 7; 1802 if (error == EIO || error == EINTR) { 1803 lvl = (do_reconnect_sleep++ % 600) ? 7 : 0; 1804 } 1805 nfs_printf(NFS_FAC_SOCK, lvl, "nfs reconnect %s: returned %d\n", 1806 vfs_statfs(nmp->nm_mountp)->f_mntfromname, error); 1807 } else { 1808 nmp->nm_reconnect_start = 0; 1809 do_reconnect_sleep = 0; 1810 } 1811 lck_mtx_lock(&nmp->nm_lock); 1812 } 1813 if ((nmp->nm_sockflags & NMSOCK_READY) && 1814 (nmp->nm_state & NFSSTA_RECOVER) && 1815 !(nmp->nm_sockflags & NMSOCK_UNMOUNT) && 1816 !(nmp->nm_state & NFSSTA_FORCE)) { 1817 /* perform state recovery */ 1818 lck_mtx_unlock(&nmp->nm_lock); 1819 nfs_recover(nmp); 1820 lck_mtx_lock(&nmp->nm_lock); 1821 } 1822 /* handle NFSv4 delegation returns */ 1823 while ((nmp->nm_vers >= NFS_VER4) && !(nmp->nm_state & NFSSTA_FORCE) && 1824 (nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER) && 1825 ((np = TAILQ_FIRST(&nmp->nm_dreturnq)))) { 1826 lck_mtx_unlock(&nmp->nm_lock); 1827 nfs4_delegation_return(np, R_RECOVER, thd, nmp->nm_mcred); 1828 lck_mtx_lock(&nmp->nm_lock); 1829 } 1830 /* do resends, if necessary/possible */ 1831 while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) || (nmp->nm_state & NFSSTA_FORCE)) && 1832 ((req = TAILQ_FIRST(&nmp->nm_resendq)))) { 1833 if (req->r_resendtime) 1834 microuptime(&now); 1835 while (req && !(nmp->nm_state & NFSSTA_FORCE) && req->r_resendtime && (now.tv_sec < req->r_resendtime)) 1836 req = TAILQ_NEXT(req, r_rchain); 1837 if (!req) 1838 break; 1839 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); 1840 req->r_rchain.tqe_next = NFSREQNOLIST; 1841 lck_mtx_unlock(&nmp->nm_lock); 1842 lck_mtx_lock(&req->r_mtx); 1843 if (req->r_error || req->r_nmrep.nmc_mhead) { 1844 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); 1845 req->r_flags &= ~R_RESENDQ; 1846 wakeup(req); 1847 lck_mtx_unlock(&req->r_mtx); 1848 if (dofinish) 1849 nfs_asyncio_finish(req); 1850 lck_mtx_lock(&nmp->nm_lock); 1851 continue; 1852 } 1853 if ((req->r_flags & R_RESTART) || nfs_request_using_gss(req)) { 1854 req->r_flags &= ~R_RESTART; 1855 req->r_resendtime = 0; 1856 lck_mtx_unlock(&req->r_mtx); 1857 /* async RPCs on GSS mounts need to be rebuilt and resent. */ 1858 nfs_reqdequeue(req); 1859 if (nfs_request_using_gss(req)) { 1860 nfs_gss_clnt_rpcdone(req); 1861 error = nfs_gss_clnt_args_restore(req); 1862 if (error == ENEEDAUTH) 1863 req->r_xid = 0; 1864 } 1865 NFS_SOCK_DBG("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n", 1866 nfs_request_using_gss(req) ? " gss" : "", req->r_procnum, req->r_xid, 1867 req->r_flags, req->r_rtt); 1868 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */ 1869 if (!error) 1870 error = nfs_sigintr(nmp, req, req->r_thread, 0); 1871 if (!error) 1872 error = nfs_request_add_header(req); 1873 if (!error) 1874 error = nfs_request_send(req, 0); 1875 lck_mtx_lock(&req->r_mtx); 1876 if (req->r_flags & R_RESENDQ) 1877 req->r_flags &= ~R_RESENDQ; 1878 if (error) 1879 req->r_error = error; 1880 wakeup(req); 1881 dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); 1882 lck_mtx_unlock(&req->r_mtx); 1883 if (dofinish) 1884 nfs_asyncio_finish(req); 1885 lck_mtx_lock(&nmp->nm_lock); 1886 error = 0; 1887 continue; 1888 } 1889 NFS_SOCK_DBG("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n", 1890 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); 1891 error = !req->r_nmp ? ENXIO : 0; /* unmounted? */ 1892 if (!error) 1893 error = nfs_sigintr(nmp, req, req->r_thread, 0); 1894 if (!error) { 1895 req->r_flags |= R_SENDING; 1896 lck_mtx_unlock(&req->r_mtx); 1897 error = nfs_send(req, 0); 1898 lck_mtx_lock(&req->r_mtx); 1899 if (!error) { 1900 if (req->r_flags & R_RESENDQ) 1901 req->r_flags &= ~R_RESENDQ; 1902 wakeup(req); 1903 lck_mtx_unlock(&req->r_mtx); 1904 lck_mtx_lock(&nmp->nm_lock); 1905 continue; 1906 } 1907 } 1908 req->r_error = error; 1909 if (req->r_flags & R_RESENDQ) 1910 req->r_flags &= ~R_RESENDQ; 1911 wakeup(req); 1912 dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); 1913 lck_mtx_unlock(&req->r_mtx); 1914 if (dofinish) 1915 nfs_asyncio_finish(req); 1916 lck_mtx_lock(&nmp->nm_lock); 1917 } 1918 if (nmp->nm_deadto_start) 1919 nfs_mount_check_dead_timeout(nmp); 1920 if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) 1921 break; 1922 /* check monitored nodes, if necessary/possible */ 1923 if (!LIST_EMPTY(&nmp->nm_monlist)) { 1924 nmp->nm_state |= NFSSTA_MONITOR_SCAN; 1925 LIST_FOREACH(np, &nmp->nm_monlist, n_monlink) { 1926 if (!(nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE))) 1927 break; 1928 np->n_mflag |= NMMONSCANINPROG; 1929 lck_mtx_unlock(&nmp->nm_lock); 1930 error = nfs_getattr(np, NULL, vfs_context_kernel(), (NGA_UNCACHED|NGA_MONITOR)); 1931 if (!error && ISSET(np->n_flag, NUPDATESIZE)) /* update quickly to avoid multiple events */ 1932 nfs_data_update_size(np, 0); 1933 lck_mtx_lock(&nmp->nm_lock); 1934 np->n_mflag &= ~NMMONSCANINPROG; 1935 if (np->n_mflag & NMMONSCANWANT) { 1936 np->n_mflag &= ~NMMONSCANWANT; 1937 wakeup(&np->n_mflag); 1938 } 1939 if (error || !(nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE))) 1940 break; 1941 } 1942 nmp->nm_state &= ~NFSSTA_MONITOR_SCAN; 1943 if (nmp->nm_state & NFSSTA_UNMOUNTING) 1944 wakeup(&nmp->nm_state); /* let unmounting thread know scan is done */ 1945 } 1946 if ((nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING))) { 1947 if (nmp->nm_deadto_start || !TAILQ_EMPTY(&nmp->nm_resendq) || 1948 (nmp->nm_state & NFSSTA_RECOVER)) 1949 ts.tv_sec = 1; 1950 else 1951 ts.tv_sec = 5; 1952 msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts); 1953 } 1954 } 1955 1956 /* If we're unmounting, send the unmount RPC, if requested/appropriate. */ 1957 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) && 1958 (nmp->nm_state & NFSSTA_MOUNTED) && NMFLAG(nmp, CALLUMNT) && 1959 (nmp->nm_vers < NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) { 1960 lck_mtx_unlock(&nmp->nm_lock); 1961 nfs3_umount_rpc(nmp, vfs_context_kernel(), 1962 (nmp->nm_sockflags & NMSOCK_READY) ? 6 : 2); 1963 lck_mtx_lock(&nmp->nm_lock); 1964 } 1965 1966 if (nmp->nm_sockthd == thd) 1967 nmp->nm_sockthd = NULL; 1968 lck_mtx_unlock(&nmp->nm_lock); 1969 wakeup(&nmp->nm_sockthd); 1970 thread_terminate(thd); 1971} 1972 1973/* start or wake a mount's socket thread */ 1974void 1975nfs_mount_sock_thread_wake(struct nfsmount *nmp) 1976{ 1977 if (nmp->nm_sockthd) 1978 wakeup(&nmp->nm_sockthd); 1979 else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS) 1980 thread_deallocate(nmp->nm_sockthd); 1981} 1982 1983/* 1984 * Check if we should mark the mount dead because the 1985 * unresponsive mount has reached the dead timeout. 1986 * (must be called with nmp locked) 1987 */ 1988void 1989nfs_mount_check_dead_timeout(struct nfsmount *nmp) 1990{ 1991 struct timeval now; 1992 1993 if (nmp->nm_deadto_start == 0) 1994 return; 1995 if (nmp->nm_state & NFSSTA_DEAD) 1996 return; 1997 nfs_is_squishy(nmp); 1998 if (nmp->nm_curdeadtimeout <= 0) 1999 return; 2000 microuptime(&now); 2001 if ((now.tv_sec - nmp->nm_deadto_start) < nmp->nm_curdeadtimeout) 2002 return; 2003 printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, 2004 (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : ""); 2005 nmp->nm_state |= NFSSTA_DEAD; 2006 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0); 2007} 2008 2009/* 2010 * NFS callback channel socket state 2011 */ 2012struct nfs_callback_socket 2013{ 2014 TAILQ_ENTRY(nfs_callback_socket) ncbs_link; 2015 socket_t ncbs_so; /* the socket */ 2016 struct sockaddr_storage ncbs_saddr; /* socket address */ 2017 struct nfs_rpc_record_state ncbs_rrs; /* RPC record parsing state */ 2018 time_t ncbs_stamp; /* last accessed at */ 2019 uint32_t ncbs_flags; /* see below */ 2020}; 2021#define NCBSOCK_UPCALL 0x0001 2022#define NCBSOCK_UPCALLWANT 0x0002 2023#define NCBSOCK_DEAD 0x0004 2024 2025/* 2026 * NFS callback channel state 2027 * 2028 * One listening socket for accepting socket connections from servers and 2029 * a list of connected sockets to handle callback requests on. 2030 * Mounts registered with the callback channel are assigned IDs and 2031 * put on a list so that the callback request handling code can match 2032 * the requests up with mounts. 2033 */ 2034socket_t nfs4_cb_so = NULL; 2035socket_t nfs4_cb_so6 = NULL; 2036in_port_t nfs4_cb_port = 0; 2037in_port_t nfs4_cb_port6 = 0; 2038uint32_t nfs4_cb_id = 0; 2039uint32_t nfs4_cb_so_usecount = 0; 2040TAILQ_HEAD(nfs4_cb_sock_list,nfs_callback_socket) nfs4_cb_socks; 2041TAILQ_HEAD(nfs4_cb_mount_list,nfsmount) nfs4_cb_mounts; 2042 2043int nfs4_cb_handler(struct nfs_callback_socket *, mbuf_t); 2044 2045/* 2046 * Set up the callback channel for the NFS mount. 2047 * 2048 * Initializes the callback channel socket state and 2049 * assigns a callback ID to the mount. 2050 */ 2051void 2052nfs4_mount_callback_setup(struct nfsmount *nmp) 2053{ 2054 struct sockaddr_in sin; 2055 struct sockaddr_in6 sin6; 2056 socket_t so = NULL; 2057 socket_t so6 = NULL; 2058 struct timeval timeo; 2059 int error, on = 1; 2060 in_port_t port; 2061 2062 lck_mtx_lock(nfs_global_mutex); 2063 if (nfs4_cb_id == 0) { 2064 TAILQ_INIT(&nfs4_cb_mounts); 2065 TAILQ_INIT(&nfs4_cb_socks); 2066 nfs4_cb_id++; 2067 } 2068 nmp->nm_cbid = nfs4_cb_id++; 2069 if (nmp->nm_cbid == 0) 2070 nmp->nm_cbid = nfs4_cb_id++; 2071 nfs4_cb_so_usecount++; 2072 TAILQ_INSERT_HEAD(&nfs4_cb_mounts, nmp, nm_cblink); 2073 2074 if (nfs4_cb_so) { 2075 lck_mtx_unlock(nfs_global_mutex); 2076 return; 2077 } 2078 2079 /* IPv4 */ 2080 error = sock_socket(AF_INET, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so); 2081 if (error) { 2082 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv4 socket\n", error); 2083 goto fail; 2084 } 2085 so = nfs4_cb_so; 2086 2087 sock_setsockopt(so, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); 2088 sin.sin_len = sizeof(struct sockaddr_in); 2089 sin.sin_family = AF_INET; 2090 sin.sin_addr.s_addr = htonl(INADDR_ANY); 2091 sin.sin_port = htons(nfs_callback_port); /* try to use specified port */ 2092 error = sock_bind(so, (struct sockaddr *)&sin); 2093 if (error) { 2094 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv4 socket\n", error); 2095 goto fail; 2096 } 2097 error = sock_getsockname(so, (struct sockaddr *)&sin, sin.sin_len); 2098 if (error) { 2099 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv4 socket port\n", error); 2100 goto fail; 2101 } 2102 nfs4_cb_port = ntohs(sin.sin_port); 2103 2104 error = sock_listen(so, 32); 2105 if (error) { 2106 log(LOG_INFO, "nfs callback setup: error %d on IPv4 listen\n", error); 2107 goto fail; 2108 } 2109 2110 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ 2111 timeo.tv_usec = 0; 2112 timeo.tv_sec = 60; 2113 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 2114 if (error) 2115 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket rx timeout\n", error); 2116 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 2117 if (error) 2118 log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket tx timeout\n", error); 2119 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 2120 sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); 2121 sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); 2122 error = 0; 2123 2124 /* IPv6 */ 2125 error = sock_socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so6); 2126 if (error) { 2127 log(LOG_INFO, "nfs callback setup: error %d creating listening IPv6 socket\n", error); 2128 goto fail; 2129 } 2130 so6 = nfs4_cb_so6; 2131 2132 sock_setsockopt(so6, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); 2133 sock_setsockopt(so6, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)); 2134 /* try to use specified port or same port as IPv4 */ 2135 port = nfs_callback_port ? nfs_callback_port : nfs4_cb_port; 2136ipv6_bind_again: 2137 sin6.sin6_len = sizeof(struct sockaddr_in6); 2138 sin6.sin6_family = AF_INET6; 2139 sin6.sin6_addr = in6addr_any; 2140 sin6.sin6_port = htons(port); 2141 error = sock_bind(so6, (struct sockaddr *)&sin6); 2142 if (error) { 2143 if (port != nfs_callback_port) { 2144 /* if we simply tried to match the IPv4 port, then try any port */ 2145 port = 0; 2146 goto ipv6_bind_again; 2147 } 2148 log(LOG_INFO, "nfs callback setup: error %d binding listening IPv6 socket\n", error); 2149 goto fail; 2150 } 2151 error = sock_getsockname(so6, (struct sockaddr *)&sin6, sin6.sin6_len); 2152 if (error) { 2153 log(LOG_INFO, "nfs callback setup: error %d getting listening IPv6 socket port\n", error); 2154 goto fail; 2155 } 2156 nfs4_cb_port6 = ntohs(sin6.sin6_port); 2157 2158 error = sock_listen(so6, 32); 2159 if (error) { 2160 log(LOG_INFO, "nfs callback setup: error %d on IPv6 listen\n", error); 2161 goto fail; 2162 } 2163 2164 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ 2165 timeo.tv_usec = 0; 2166 timeo.tv_sec = 60; 2167 error = sock_setsockopt(so6, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 2168 if (error) 2169 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket rx timeout\n", error); 2170 error = sock_setsockopt(so6, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 2171 if (error) 2172 log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket tx timeout\n", error); 2173 sock_setsockopt(so6, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 2174 sock_setsockopt(so6, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); 2175 sock_setsockopt(so6, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); 2176 error = 0; 2177 2178fail: 2179 if (error) { 2180 nfs4_cb_so = nfs4_cb_so6 = NULL; 2181 lck_mtx_unlock(nfs_global_mutex); 2182 if (so) { 2183 sock_shutdown(so, SHUT_RDWR); 2184 sock_close(so); 2185 } 2186 if (so6) { 2187 sock_shutdown(so6, SHUT_RDWR); 2188 sock_close(so6); 2189 } 2190 } else { 2191 lck_mtx_unlock(nfs_global_mutex); 2192 } 2193} 2194 2195/* 2196 * Shut down the callback channel for the NFS mount. 2197 * 2198 * Clears the mount's callback ID and releases the mounts 2199 * reference on the callback socket. Last reference dropped 2200 * will also shut down the callback socket(s). 2201 */ 2202void 2203nfs4_mount_callback_shutdown(struct nfsmount *nmp) 2204{ 2205 struct nfs_callback_socket *ncbsp; 2206 socket_t so, so6; 2207 struct nfs4_cb_sock_list cb_socks; 2208 struct timespec ts = {1,0}; 2209 2210 lck_mtx_lock(nfs_global_mutex); 2211 TAILQ_REMOVE(&nfs4_cb_mounts, nmp, nm_cblink); 2212 /* wait for any callbacks in progress to complete */ 2213 while (nmp->nm_cbrefs) 2214 msleep(&nmp->nm_cbrefs, nfs_global_mutex, PSOCK, "cbshutwait", &ts); 2215 nmp->nm_cbid = 0; 2216 if (--nfs4_cb_so_usecount) { 2217 lck_mtx_unlock(nfs_global_mutex); 2218 return; 2219 } 2220 so = nfs4_cb_so; 2221 so6 = nfs4_cb_so6; 2222 nfs4_cb_so = nfs4_cb_so6 = NULL; 2223 TAILQ_INIT(&cb_socks); 2224 TAILQ_CONCAT(&cb_socks, &nfs4_cb_socks, ncbs_link); 2225 lck_mtx_unlock(nfs_global_mutex); 2226 if (so) { 2227 sock_shutdown(so, SHUT_RDWR); 2228 sock_close(so); 2229 } 2230 if (so6) { 2231 sock_shutdown(so6, SHUT_RDWR); 2232 sock_close(so6); 2233 } 2234 while ((ncbsp = TAILQ_FIRST(&cb_socks))) { 2235 TAILQ_REMOVE(&cb_socks, ncbsp, ncbs_link); 2236 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR); 2237 sock_close(ncbsp->ncbs_so); 2238 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs); 2239 FREE(ncbsp, M_TEMP); 2240 } 2241} 2242 2243/* 2244 * Check periodically for stale/unused nfs callback sockets 2245 */ 2246#define NFS4_CB_TIMER_PERIOD 30 2247#define NFS4_CB_IDLE_MAX 300 2248void 2249nfs4_callback_timer(__unused void *param0, __unused void *param1) 2250{ 2251 struct nfs_callback_socket *ncbsp, *nextncbsp; 2252 struct timeval now; 2253 2254loop: 2255 lck_mtx_lock(nfs_global_mutex); 2256 if (TAILQ_EMPTY(&nfs4_cb_socks)) { 2257 nfs4_callback_timer_on = 0; 2258 lck_mtx_unlock(nfs_global_mutex); 2259 return; 2260 } 2261 microuptime(&now); 2262 TAILQ_FOREACH_SAFE(ncbsp, &nfs4_cb_socks, ncbs_link, nextncbsp) { 2263 if (!(ncbsp->ncbs_flags & NCBSOCK_DEAD) && 2264 (now.tv_sec < (ncbsp->ncbs_stamp + NFS4_CB_IDLE_MAX))) 2265 continue; 2266 TAILQ_REMOVE(&nfs4_cb_socks, ncbsp, ncbs_link); 2267 lck_mtx_unlock(nfs_global_mutex); 2268 sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR); 2269 sock_close(ncbsp->ncbs_so); 2270 nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs); 2271 FREE(ncbsp, M_TEMP); 2272 goto loop; 2273 } 2274 nfs4_callback_timer_on = 1; 2275 nfs_interval_timer_start(nfs4_callback_timer_call, 2276 NFS4_CB_TIMER_PERIOD * 1000); 2277 lck_mtx_unlock(nfs_global_mutex); 2278} 2279 2280/* 2281 * Accept a new callback socket. 2282 */ 2283void 2284nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag) 2285{ 2286 socket_t newso = NULL; 2287 struct nfs_callback_socket *ncbsp; 2288 struct nfsmount *nmp; 2289 struct timeval timeo, now; 2290 int error, on = 1, ip; 2291 2292 if (so == nfs4_cb_so) 2293 ip = 4; 2294 else if (so == nfs4_cb_so6) 2295 ip = 6; 2296 else 2297 return; 2298 2299 /* allocate/initialize a new nfs_callback_socket */ 2300 MALLOC(ncbsp, struct nfs_callback_socket *, sizeof(struct nfs_callback_socket), M_TEMP, M_WAITOK); 2301 if (!ncbsp) { 2302 log(LOG_ERR, "nfs callback accept: no memory for new socket\n"); 2303 return; 2304 } 2305 bzero(ncbsp, sizeof(*ncbsp)); 2306 ncbsp->ncbs_saddr.ss_len = (ip == 4) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); 2307 nfs_rpc_record_state_init(&ncbsp->ncbs_rrs); 2308 2309 /* accept a new socket */ 2310 error = sock_accept(so, (struct sockaddr*)&ncbsp->ncbs_saddr, 2311 ncbsp->ncbs_saddr.ss_len, MSG_DONTWAIT, 2312 nfs4_cb_rcv, ncbsp, &newso); 2313 if (error) { 2314 log(LOG_INFO, "nfs callback accept: error %d accepting IPv%d socket\n", error, ip); 2315 FREE(ncbsp, M_TEMP); 2316 return; 2317 } 2318 2319 /* set up the new socket */ 2320 /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ 2321 timeo.tv_usec = 0; 2322 timeo.tv_sec = 60; 2323 error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 2324 if (error) 2325 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket rx timeout\n", error, ip); 2326 error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 2327 if (error) 2328 log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket tx timeout\n", error, ip); 2329 sock_setsockopt(newso, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 2330 sock_setsockopt(newso, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); 2331 sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); 2332 sock_setsockopt(newso, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); 2333 2334 ncbsp->ncbs_so = newso; 2335 microuptime(&now); 2336 ncbsp->ncbs_stamp = now.tv_sec; 2337 2338 lck_mtx_lock(nfs_global_mutex); 2339 2340 /* add it to the list */ 2341 TAILQ_INSERT_HEAD(&nfs4_cb_socks, ncbsp, ncbs_link); 2342 2343 /* verify it's from a host we have mounted */ 2344 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) { 2345 /* check if socket's source address matches this mount's server address */ 2346 if (!nmp->nm_saddr) 2347 continue; 2348 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0) 2349 break; 2350 } 2351 if (!nmp) /* we don't want this socket, mark it dead */ 2352 ncbsp->ncbs_flags |= NCBSOCK_DEAD; 2353 2354 /* make sure the callback socket cleanup timer is running */ 2355 /* (shorten the timer if we've got a socket we don't want) */ 2356 if (!nfs4_callback_timer_on) { 2357 nfs4_callback_timer_on = 1; 2358 nfs_interval_timer_start(nfs4_callback_timer_call, 2359 !nmp ? 500 : (NFS4_CB_TIMER_PERIOD * 1000)); 2360 } else if (!nmp && (nfs4_callback_timer_on < 2)) { 2361 nfs4_callback_timer_on = 2; 2362 thread_call_cancel(nfs4_callback_timer_call); 2363 nfs_interval_timer_start(nfs4_callback_timer_call, 500); 2364 } 2365 2366 lck_mtx_unlock(nfs_global_mutex); 2367} 2368 2369/* 2370 * Receive mbufs from callback sockets into RPC records and process each record. 2371 * Detect connection has been closed and shut down. 2372 */ 2373void 2374nfs4_cb_rcv(socket_t so, void *arg, __unused int waitflag) 2375{ 2376 struct nfs_callback_socket *ncbsp = arg; 2377 struct timespec ts = {1,0}; 2378 struct timeval now; 2379 mbuf_t m; 2380 int error = 0, recv = 1; 2381 2382 lck_mtx_lock(nfs_global_mutex); 2383 while (ncbsp->ncbs_flags & NCBSOCK_UPCALL) { 2384 /* wait if upcall is already in progress */ 2385 ncbsp->ncbs_flags |= NCBSOCK_UPCALLWANT; 2386 msleep(ncbsp, nfs_global_mutex, PSOCK, "cbupcall", &ts); 2387 } 2388 ncbsp->ncbs_flags |= NCBSOCK_UPCALL; 2389 lck_mtx_unlock(nfs_global_mutex); 2390 2391 /* loop while we make error-free progress */ 2392 while (!error && recv) { 2393 error = nfs_rpc_record_read(so, &ncbsp->ncbs_rrs, MSG_DONTWAIT, &recv, &m); 2394 if (m) /* handle the request */ 2395 error = nfs4_cb_handler(ncbsp, m); 2396 } 2397 2398 /* note: no error and no data indicates server closed its end */ 2399 if ((error != EWOULDBLOCK) && (error || !recv)) { 2400 /* 2401 * Socket is either being closed or should be. 2402 * We can't close the socket in the context of the upcall. 2403 * So we mark it as dead and leave it for the cleanup timer to reap. 2404 */ 2405 ncbsp->ncbs_stamp = 0; 2406 ncbsp->ncbs_flags |= NCBSOCK_DEAD; 2407 } else { 2408 microuptime(&now); 2409 ncbsp->ncbs_stamp = now.tv_sec; 2410 } 2411 2412 lck_mtx_lock(nfs_global_mutex); 2413 ncbsp->ncbs_flags &= ~NCBSOCK_UPCALL; 2414 lck_mtx_unlock(nfs_global_mutex); 2415 wakeup(ncbsp); 2416} 2417 2418/* 2419 * Handle an NFS callback channel request. 2420 */ 2421int 2422nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq) 2423{ 2424 socket_t so = ncbsp->ncbs_so; 2425 struct nfsm_chain nmreq, nmrep; 2426 mbuf_t mhead = NULL, mrest = NULL, m; 2427 struct msghdr msg; 2428 struct nfsmount *nmp; 2429 fhandle_t fh; 2430 nfsnode_t np; 2431 nfs_stateid stateid; 2432 uint32_t bitmap[NFS_ATTR_BITMAP_LEN], rbitmap[NFS_ATTR_BITMAP_LEN], bmlen, truncate, attrbytes; 2433 uint32_t val, xid, procnum, taglen, cbid, numops, op, status; 2434 uint32_t auth_type, auth_len; 2435 uint32_t numres, *pnumres; 2436 int error = 0, replen, len; 2437 size_t sentlen = 0; 2438 2439 xid = numops = op = status = procnum = taglen = cbid = 0; 2440 2441 nfsm_chain_dissect_init(error, &nmreq, mreq); 2442 nfsm_chain_get_32(error, &nmreq, xid); // RPC XID 2443 nfsm_chain_get_32(error, &nmreq, val); // RPC Call 2444 nfsm_assert(error, (val == RPC_CALL), EBADRPC); 2445 nfsm_chain_get_32(error, &nmreq, val); // RPC Version 2446 nfsm_assert(error, (val == RPC_VER2), ERPCMISMATCH); 2447 nfsm_chain_get_32(error, &nmreq, val); // RPC Program Number 2448 nfsm_assert(error, (val == NFS4_CALLBACK_PROG), EPROGUNAVAIL); 2449 nfsm_chain_get_32(error, &nmreq, val); // NFS Callback Program Version Number 2450 nfsm_assert(error, (val == NFS4_CALLBACK_PROG_VERSION), EPROGMISMATCH); 2451 nfsm_chain_get_32(error, &nmreq, procnum); // NFS Callback Procedure Number 2452 nfsm_assert(error, (procnum <= NFSPROC4_CB_COMPOUND), EPROCUNAVAIL); 2453 2454 /* Handle authentication */ 2455 /* XXX just ignore auth for now - handling kerberos may be tricky */ 2456 nfsm_chain_get_32(error, &nmreq, auth_type); // RPC Auth Flavor 2457 nfsm_chain_get_32(error, &nmreq, auth_len); // RPC Auth Length 2458 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC); 2459 if (!error && (auth_len > 0)) 2460 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len)); 2461 nfsm_chain_adv(error, &nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE) 2462 nfsm_chain_get_32(error, &nmreq, auth_len); // verifier length 2463 nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC); 2464 if (!error && (auth_len > 0)) 2465 nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len)); 2466 if (error) { 2467 status = error; 2468 error = 0; 2469 goto nfsmout; 2470 } 2471 2472 switch (procnum) { 2473 case NFSPROC4_CB_NULL: 2474 status = NFSERR_RETVOID; 2475 break; 2476 case NFSPROC4_CB_COMPOUND: 2477 /* tag, minorversion, cb ident, numops, op array */ 2478 nfsm_chain_get_32(error, &nmreq, taglen); /* tag length */ 2479 nfsm_assert(error, (val <= NFS4_OPAQUE_LIMIT), EBADRPC); 2480 2481 /* start building the body of the response */ 2482 nfsm_mbuf_get(error, &mrest, nfsm_rndup(taglen) + 5*NFSX_UNSIGNED); 2483 nfsm_chain_init(&nmrep, mrest); 2484 2485 /* copy tag from request to response */ 2486 nfsm_chain_add_32(error, &nmrep, taglen); /* tag length */ 2487 for (len = (int)taglen; !error && (len > 0); len -= NFSX_UNSIGNED) { 2488 nfsm_chain_get_32(error, &nmreq, val); 2489 nfsm_chain_add_32(error, &nmrep, val); 2490 } 2491 2492 /* insert number of results placeholder */ 2493 numres = 0; 2494 nfsm_chain_add_32(error, &nmrep, numres); 2495 pnumres = (uint32_t*)(nmrep.nmc_ptr - NFSX_UNSIGNED); 2496 2497 nfsm_chain_get_32(error, &nmreq, val); /* minorversion */ 2498 nfsm_assert(error, (val == 0), NFSERR_MINOR_VERS_MISMATCH); 2499 nfsm_chain_get_32(error, &nmreq, cbid); /* callback ID */ 2500 nfsm_chain_get_32(error, &nmreq, numops); /* number of operations */ 2501 if (error) { 2502 if ((error == EBADRPC) || (error == NFSERR_MINOR_VERS_MISMATCH)) 2503 status = error; 2504 else if ((error == ENOBUFS) || (error == ENOMEM)) 2505 status = NFSERR_RESOURCE; 2506 else 2507 status = NFSERR_SERVERFAULT; 2508 error = 0; 2509 nfsm_chain_null(&nmrep); 2510 goto nfsmout; 2511 } 2512 /* match the callback ID to a registered mount */ 2513 lck_mtx_lock(nfs_global_mutex); 2514 TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) { 2515 if (nmp->nm_cbid != cbid) 2516 continue; 2517 /* verify socket's source address matches this mount's server address */ 2518 if (!nmp->nm_saddr) 2519 continue; 2520 if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0) 2521 break; 2522 } 2523 /* mark the NFS mount as busy */ 2524 if (nmp) 2525 nmp->nm_cbrefs++; 2526 lck_mtx_unlock(nfs_global_mutex); 2527 if (!nmp) { 2528 /* if no mount match, just drop socket. */ 2529 error = EPERM; 2530 nfsm_chain_null(&nmrep); 2531 goto out; 2532 } 2533 2534 /* process ops, adding results to mrest */ 2535 while (numops > 0) { 2536 numops--; 2537 nfsm_chain_get_32(error, &nmreq, op); 2538 if (error) 2539 break; 2540 switch (op) { 2541 case NFS_OP_CB_GETATTR: 2542 // (FH, BITMAP) -> (STATUS, BITMAP, ATTRS) 2543 np = NULL; 2544 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh); 2545 bmlen = NFS_ATTR_BITMAP_LEN; 2546 nfsm_chain_get_bitmap(error, &nmreq, bitmap, bmlen); 2547 if (error) { 2548 status = error; 2549 error = 0; 2550 numops = 0; /* don't process any more ops */ 2551 } else { 2552 /* find the node for the file handle */ 2553 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np); 2554 if (error || !np) { 2555 status = NFSERR_BADHANDLE; 2556 error = 0; 2557 np = NULL; 2558 numops = 0; /* don't process any more ops */ 2559 } 2560 } 2561 nfsm_chain_add_32(error, &nmrep, op); 2562 nfsm_chain_add_32(error, &nmrep, status); 2563 if (!error && (status == EBADRPC)) 2564 error = status; 2565 if (np) { 2566 /* only allow returning size, change, and mtime attrs */ 2567 NFS_CLEAR_ATTRIBUTES(&rbitmap); 2568 attrbytes = 0; 2569 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) { 2570 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_CHANGE); 2571 attrbytes += 2 * NFSX_UNSIGNED; 2572 } 2573 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) { 2574 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_SIZE); 2575 attrbytes += 2 * NFSX_UNSIGNED; 2576 } 2577 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) { 2578 NFS_BITMAP_SET(&rbitmap, NFS_FATTR_TIME_MODIFY); 2579 attrbytes += 3 * NFSX_UNSIGNED; 2580 } 2581 nfsm_chain_add_bitmap(error, &nmrep, rbitmap, NFS_ATTR_BITMAP_LEN); 2582 nfsm_chain_add_32(error, &nmrep, attrbytes); 2583 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) 2584 nfsm_chain_add_64(error, &nmrep, 2585 np->n_vattr.nva_change + ((np->n_flag & NMODIFIED) ? 1 : 0)); 2586 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) 2587 nfsm_chain_add_64(error, &nmrep, np->n_size); 2588 if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) { 2589 nfsm_chain_add_64(error, &nmrep, np->n_vattr.nva_timesec[NFSTIME_MODIFY]); 2590 nfsm_chain_add_32(error, &nmrep, np->n_vattr.nva_timensec[NFSTIME_MODIFY]); 2591 } 2592 nfs_node_unlock(np); 2593 vnode_put(NFSTOV(np)); 2594 np = NULL; 2595 } 2596 /* 2597 * If we hit an error building the reply, we can't easily back up. 2598 * So we'll just update the status and hope the server ignores the 2599 * extra garbage. 2600 */ 2601 break; 2602 case NFS_OP_CB_RECALL: 2603 // (STATEID, TRUNCATE, FH) -> (STATUS) 2604 np = NULL; 2605 nfsm_chain_get_stateid(error, &nmreq, &stateid); 2606 nfsm_chain_get_32(error, &nmreq, truncate); 2607 nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh); 2608 if (error) { 2609 status = error; 2610 error = 0; 2611 numops = 0; /* don't process any more ops */ 2612 } else { 2613 /* find the node for the file handle */ 2614 error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np); 2615 if (error || !np) { 2616 status = NFSERR_BADHANDLE; 2617 error = 0; 2618 np = NULL; 2619 numops = 0; /* don't process any more ops */ 2620 } else if (!(np->n_openflags & N_DELEG_MASK) || 2621 bcmp(&np->n_dstateid, &stateid, sizeof(stateid))) { 2622 /* delegation stateid state doesn't match */ 2623 status = NFSERR_BAD_STATEID; 2624 numops = 0; /* don't process any more ops */ 2625 } 2626 if (!status) /* add node to recall queue, and wake socket thread */ 2627 nfs4_delegation_return_enqueue(np); 2628 if (np) { 2629 nfs_node_unlock(np); 2630 vnode_put(NFSTOV(np)); 2631 } 2632 } 2633 nfsm_chain_add_32(error, &nmrep, op); 2634 nfsm_chain_add_32(error, &nmrep, status); 2635 if (!error && (status == EBADRPC)) 2636 error = status; 2637 break; 2638 case NFS_OP_CB_ILLEGAL: 2639 default: 2640 nfsm_chain_add_32(error, &nmrep, NFS_OP_CB_ILLEGAL); 2641 status = NFSERR_OP_ILLEGAL; 2642 nfsm_chain_add_32(error, &nmrep, status); 2643 numops = 0; /* don't process any more ops */ 2644 break; 2645 } 2646 numres++; 2647 } 2648 2649 if (!status && error) { 2650 if (error == EBADRPC) 2651 status = error; 2652 else if ((error == ENOBUFS) || (error == ENOMEM)) 2653 status = NFSERR_RESOURCE; 2654 else 2655 status = NFSERR_SERVERFAULT; 2656 error = 0; 2657 } 2658 2659 /* Now, set the numres field */ 2660 *pnumres = txdr_unsigned(numres); 2661 nfsm_chain_build_done(error, &nmrep); 2662 nfsm_chain_null(&nmrep); 2663 2664 /* drop the callback reference on the mount */ 2665 lck_mtx_lock(nfs_global_mutex); 2666 nmp->nm_cbrefs--; 2667 if (!nmp->nm_cbid) 2668 wakeup(&nmp->nm_cbrefs); 2669 lck_mtx_unlock(nfs_global_mutex); 2670 break; 2671 } 2672 2673nfsmout: 2674 if (status == EBADRPC) 2675 OSAddAtomic64(1, &nfsstats.rpcinvalid); 2676 2677 /* build reply header */ 2678 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mhead); 2679 nfsm_chain_init(&nmrep, mhead); 2680 nfsm_chain_add_32(error, &nmrep, 0); /* insert space for an RPC record mark */ 2681 nfsm_chain_add_32(error, &nmrep, xid); 2682 nfsm_chain_add_32(error, &nmrep, RPC_REPLY); 2683 if ((status == ERPCMISMATCH) || (status & NFSERR_AUTHERR)) { 2684 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED); 2685 if (status & NFSERR_AUTHERR) { 2686 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR); 2687 nfsm_chain_add_32(error, &nmrep, (status & ~NFSERR_AUTHERR)); 2688 } else { 2689 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH); 2690 nfsm_chain_add_32(error, &nmrep, RPC_VER2); 2691 nfsm_chain_add_32(error, &nmrep, RPC_VER2); 2692 } 2693 } else { 2694 /* reply status */ 2695 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED); 2696 /* XXX RPCAUTH_NULL verifier */ 2697 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL); 2698 nfsm_chain_add_32(error, &nmrep, 0); 2699 /* accepted status */ 2700 switch (status) { 2701 case EPROGUNAVAIL: 2702 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL); 2703 break; 2704 case EPROGMISMATCH: 2705 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH); 2706 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION); 2707 nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION); 2708 break; 2709 case EPROCUNAVAIL: 2710 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL); 2711 break; 2712 case EBADRPC: 2713 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE); 2714 break; 2715 default: 2716 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS); 2717 if (status != NFSERR_RETVOID) 2718 nfsm_chain_add_32(error, &nmrep, status); 2719 break; 2720 } 2721 } 2722 nfsm_chain_build_done(error, &nmrep); 2723 if (error) { 2724 nfsm_chain_null(&nmrep); 2725 goto out; 2726 } 2727 error = mbuf_setnext(nmrep.nmc_mcur, mrest); 2728 if (error) { 2729 printf("nfs cb: mbuf_setnext failed %d\n", error); 2730 goto out; 2731 } 2732 mrest = NULL; 2733 /* Calculate the size of the reply */ 2734 replen = 0; 2735 for (m = nmrep.nmc_mhead; m; m = mbuf_next(m)) 2736 replen += mbuf_len(m); 2737 mbuf_pkthdr_setlen(mhead, replen); 2738 error = mbuf_pkthdr_setrcvif(mhead, NULL); 2739 nfsm_chain_set_recmark(error, &nmrep, (replen - NFSX_UNSIGNED) | 0x80000000); 2740 nfsm_chain_null(&nmrep); 2741 2742 /* send the reply */ 2743 bzero(&msg, sizeof(msg)); 2744 error = sock_sendmbuf(so, &msg, mhead, 0, &sentlen); 2745 mhead = NULL; 2746 if (!error && ((int)sentlen != replen)) 2747 error = EWOULDBLOCK; 2748 if (error == EWOULDBLOCK) /* inability to send response is considered fatal */ 2749 error = ETIMEDOUT; 2750out: 2751 if (error) 2752 nfsm_chain_cleanup(&nmrep); 2753 if (mhead) 2754 mbuf_freem(mhead); 2755 if (mrest) 2756 mbuf_freem(mrest); 2757 if (mreq) 2758 mbuf_freem(mreq); 2759 return (error); 2760} 2761 2762 2763/* 2764 * Initialize an nfs_rpc_record_state structure. 2765 */ 2766void 2767nfs_rpc_record_state_init(struct nfs_rpc_record_state *nrrsp) 2768{ 2769 bzero(nrrsp, sizeof(*nrrsp)); 2770 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft); 2771} 2772 2773/* 2774 * Clean up an nfs_rpc_record_state structure. 2775 */ 2776void 2777nfs_rpc_record_state_cleanup(struct nfs_rpc_record_state *nrrsp) 2778{ 2779 if (nrrsp->nrrs_m) { 2780 mbuf_freem(nrrsp->nrrs_m); 2781 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL; 2782 } 2783} 2784 2785/* 2786 * Read the next (marked) RPC record from the socket. 2787 * 2788 * *recvp returns if any data was received. 2789 * *mp returns the next complete RPC record 2790 */ 2791int 2792nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int flags, int *recvp, mbuf_t *mp) 2793{ 2794 struct iovec aio; 2795 struct msghdr msg; 2796 size_t rcvlen; 2797 int error = 0; 2798 mbuf_t m; 2799 2800 *recvp = 0; 2801 *mp = NULL; 2802 2803 /* read the TCP RPC record marker */ 2804 while (!error && nrrsp->nrrs_markerleft) { 2805 aio.iov_base = ((char*)&nrrsp->nrrs_fragleft + 2806 sizeof(nrrsp->nrrs_fragleft) - nrrsp->nrrs_markerleft); 2807 aio.iov_len = nrrsp->nrrs_markerleft; 2808 bzero(&msg, sizeof(msg)); 2809 msg.msg_iov = &aio; 2810 msg.msg_iovlen = 1; 2811 error = sock_receive(so, &msg, flags, &rcvlen); 2812 if (error || !rcvlen) 2813 break; 2814 *recvp = 1; 2815 nrrsp->nrrs_markerleft -= rcvlen; 2816 if (nrrsp->nrrs_markerleft) 2817 continue; 2818 /* record marker complete */ 2819 nrrsp->nrrs_fragleft = ntohl(nrrsp->nrrs_fragleft); 2820 if (nrrsp->nrrs_fragleft & 0x80000000) { 2821 nrrsp->nrrs_lastfrag = 1; 2822 nrrsp->nrrs_fragleft &= ~0x80000000; 2823 } 2824 nrrsp->nrrs_reclen += nrrsp->nrrs_fragleft; 2825 if (nrrsp->nrrs_reclen > NFS_MAXPACKET) { 2826 /* This is SERIOUS! We are out of sync with the sender. */ 2827 log(LOG_ERR, "impossible RPC record length (%d) on callback", nrrsp->nrrs_reclen); 2828 error = EFBIG; 2829 } 2830 } 2831 2832 /* read the TCP RPC record fragment */ 2833 while (!error && !nrrsp->nrrs_markerleft && nrrsp->nrrs_fragleft) { 2834 m = NULL; 2835 rcvlen = nrrsp->nrrs_fragleft; 2836 error = sock_receivembuf(so, NULL, &m, flags, &rcvlen); 2837 if (error || !rcvlen || !m) 2838 break; 2839 *recvp = 1; 2840 /* append mbufs to list */ 2841 nrrsp->nrrs_fragleft -= rcvlen; 2842 if (!nrrsp->nrrs_m) { 2843 nrrsp->nrrs_m = m; 2844 } else { 2845 error = mbuf_setnext(nrrsp->nrrs_mlast, m); 2846 if (error) { 2847 printf("nfs tcp rcv: mbuf_setnext failed %d\n", error); 2848 mbuf_freem(m); 2849 break; 2850 } 2851 } 2852 while (mbuf_next(m)) 2853 m = mbuf_next(m); 2854 nrrsp->nrrs_mlast = m; 2855 } 2856 2857 /* done reading fragment? */ 2858 if (!error && !nrrsp->nrrs_markerleft && !nrrsp->nrrs_fragleft) { 2859 /* reset socket fragment parsing state */ 2860 nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft); 2861 if (nrrsp->nrrs_lastfrag) { 2862 /* RPC record complete */ 2863 *mp = nrrsp->nrrs_m; 2864 /* reset socket record parsing state */ 2865 nrrsp->nrrs_reclen = 0; 2866 nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL; 2867 nrrsp->nrrs_lastfrag = 0; 2868 } 2869 } 2870 2871 return (error); 2872} 2873 2874 2875 2876/* 2877 * The NFS client send routine. 2878 * 2879 * Send the given NFS request out the mount's socket. 2880 * Holds nfs_sndlock() for the duration of this call. 2881 * 2882 * - check for request termination (sigintr) 2883 * - wait for reconnect, if necessary 2884 * - UDP: check the congestion window 2885 * - make a copy of the request to send 2886 * - UDP: update the congestion window 2887 * - send the request 2888 * 2889 * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared. 2890 * rexmit count is also updated if this isn't the first send. 2891 * 2892 * If the send is not successful, make sure R_MUSTRESEND is set. 2893 * If this wasn't the first transmit, set R_RESENDERR. 2894 * Also, undo any UDP congestion window changes made. 2895 * 2896 * If the error appears to indicate that the socket should 2897 * be reconnected, mark the socket for reconnection. 2898 * 2899 * Only return errors when the request should be aborted. 2900 */ 2901int 2902nfs_send(struct nfsreq *req, int wait) 2903{ 2904 struct nfsmount *nmp; 2905 struct nfs_socket *nso; 2906 int error, error2, sotype, rexmit, slpflag = 0, needrecon; 2907 struct msghdr msg; 2908 struct sockaddr *sendnam; 2909 mbuf_t mreqcopy; 2910 size_t sentlen = 0; 2911 struct timespec ts = { 2, 0 }; 2912 2913again: 2914 error = nfs_sndlock(req); 2915 if (error) { 2916 lck_mtx_lock(&req->r_mtx); 2917 req->r_error = error; 2918 req->r_flags &= ~R_SENDING; 2919 lck_mtx_unlock(&req->r_mtx); 2920 return (error); 2921 } 2922 2923 error = nfs_sigintr(req->r_nmp, req, NULL, 0); 2924 if (error) { 2925 nfs_sndunlock(req); 2926 lck_mtx_lock(&req->r_mtx); 2927 req->r_error = error; 2928 req->r_flags &= ~R_SENDING; 2929 lck_mtx_unlock(&req->r_mtx); 2930 return (error); 2931 } 2932 nmp = req->r_nmp; 2933 sotype = nmp->nm_sotype; 2934 2935 /* 2936 * If it's a setup RPC but we're not in SETUP... must need reconnect. 2937 * If it's a recovery RPC but the socket's not ready... must need reconnect. 2938 */ 2939 if (((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) || 2940 ((req->r_flags & R_RECOVER) && !(nmp->nm_sockflags & NMSOCK_READY))) { 2941 error = ETIMEDOUT; 2942 nfs_sndunlock(req); 2943 lck_mtx_lock(&req->r_mtx); 2944 req->r_error = error; 2945 req->r_flags &= ~R_SENDING; 2946 lck_mtx_unlock(&req->r_mtx); 2947 return (error); 2948 } 2949 2950 /* If the socket needs reconnection, do that now. */ 2951 /* wait until socket is ready - unless this request is part of setup */ 2952 lck_mtx_lock(&nmp->nm_lock); 2953 if (!(nmp->nm_sockflags & NMSOCK_READY) && 2954 !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) { 2955 if (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) 2956 slpflag |= PCATCH; 2957 lck_mtx_unlock(&nmp->nm_lock); 2958 nfs_sndunlock(req); 2959 if (!wait) { 2960 lck_mtx_lock(&req->r_mtx); 2961 req->r_flags &= ~R_SENDING; 2962 req->r_flags |= R_MUSTRESEND; 2963 req->r_rtt = 0; 2964 lck_mtx_unlock(&req->r_mtx); 2965 return (0); 2966 } 2967 NFS_SOCK_DBG("nfs_send: 0x%llx wait reconnect\n", req->r_xid); 2968 lck_mtx_lock(&req->r_mtx); 2969 req->r_flags &= ~R_MUSTRESEND; 2970 req->r_rtt = 0; 2971 lck_mtx_unlock(&req->r_mtx); 2972 lck_mtx_lock(&nmp->nm_lock); 2973 while (!(nmp->nm_sockflags & NMSOCK_READY)) { 2974 /* don't bother waiting if the socket thread won't be reconnecting it */ 2975 if (nmp->nm_state & NFSSTA_FORCE) { 2976 error = EIO; 2977 break; 2978 } 2979 if (NMFLAG(nmp, SOFT) && (nmp->nm_reconnect_start > 0)) { 2980 struct timeval now; 2981 microuptime(&now); 2982 if ((now.tv_sec - nmp->nm_reconnect_start) >= 8) { 2983 /* soft mount in reconnect for a while... terminate ASAP */ 2984 OSAddAtomic64(1, &nfsstats.rpctimeouts); 2985 req->r_flags |= R_SOFTTERM; 2986 req->r_error = error = ETIMEDOUT; 2987 break; 2988 } 2989 } 2990 /* make sure socket thread is running, then wait */ 2991 nfs_mount_sock_thread_wake(nmp); 2992 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1))) 2993 break; 2994 msleep(req, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectwait", &ts); 2995 slpflag = 0; 2996 } 2997 lck_mtx_unlock(&nmp->nm_lock); 2998 if (error) { 2999 lck_mtx_lock(&req->r_mtx); 3000 req->r_error = error; 3001 req->r_flags &= ~R_SENDING; 3002 lck_mtx_unlock(&req->r_mtx); 3003 return (error); 3004 } 3005 goto again; 3006 } 3007 nso = nmp->nm_nso; 3008 /* note that we're using the mount's socket to do the send */ 3009 nmp->nm_state |= NFSSTA_SENDING; /* will be cleared by nfs_sndunlock() */ 3010 lck_mtx_unlock(&nmp->nm_lock); 3011 if (!nso) { 3012 nfs_sndunlock(req); 3013 lck_mtx_lock(&req->r_mtx); 3014 req->r_flags &= ~R_SENDING; 3015 req->r_flags |= R_MUSTRESEND; 3016 req->r_rtt = 0; 3017 lck_mtx_unlock(&req->r_mtx); 3018 return (0); 3019 } 3020 3021 lck_mtx_lock(&req->r_mtx); 3022 rexmit = (req->r_flags & R_SENT); 3023 3024 if (sotype == SOCK_DGRAM) { 3025 lck_mtx_lock(&nmp->nm_lock); 3026 if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) { 3027 /* if we can't send this out yet, wait on the cwnd queue */ 3028 slpflag = (NMFLAG(nmp, INTR) && req->r_thread) ? PCATCH : 0; 3029 lck_mtx_unlock(&nmp->nm_lock); 3030 nfs_sndunlock(req); 3031 req->r_flags &= ~R_SENDING; 3032 req->r_flags |= R_MUSTRESEND; 3033 lck_mtx_unlock(&req->r_mtx); 3034 if (!wait) { 3035 req->r_rtt = 0; 3036 return (0); 3037 } 3038 lck_mtx_lock(&nmp->nm_lock); 3039 while (nmp->nm_sent >= nmp->nm_cwnd) { 3040 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1))) 3041 break; 3042 TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain); 3043 msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd", &ts); 3044 slpflag = 0; 3045 if ((req->r_cchain.tqe_next != NFSREQNOLIST)) { 3046 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain); 3047 req->r_cchain.tqe_next = NFSREQNOLIST; 3048 } 3049 } 3050 lck_mtx_unlock(&nmp->nm_lock); 3051 goto again; 3052 } 3053 /* 3054 * We update these *before* the send to avoid racing 3055 * against others who may be looking to send requests. 3056 */ 3057 if (!rexmit) { 3058 /* first transmit */ 3059 req->r_flags |= R_CWND; 3060 nmp->nm_sent += NFS_CWNDSCALE; 3061 } else { 3062 /* 3063 * When retransmitting, turn timing off 3064 * and divide congestion window by 2. 3065 */ 3066 req->r_flags &= ~R_TIMING; 3067 nmp->nm_cwnd >>= 1; 3068 if (nmp->nm_cwnd < NFS_CWNDSCALE) 3069 nmp->nm_cwnd = NFS_CWNDSCALE; 3070 } 3071 lck_mtx_unlock(&nmp->nm_lock); 3072 } 3073 3074 req->r_flags &= ~R_MUSTRESEND; 3075 lck_mtx_unlock(&req->r_mtx); 3076 3077 error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL, 3078 wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy); 3079 if (error) { 3080 if (wait) 3081 log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error); 3082 nfs_sndunlock(req); 3083 lck_mtx_lock(&req->r_mtx); 3084 req->r_flags &= ~R_SENDING; 3085 req->r_flags |= R_MUSTRESEND; 3086 req->r_rtt = 0; 3087 lck_mtx_unlock(&req->r_mtx); 3088 return (0); 3089 } 3090 3091 bzero(&msg, sizeof(msg)); 3092 if ((sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so) && ((sendnam = nmp->nm_saddr))) { 3093 msg.msg_name = (caddr_t)sendnam; 3094 msg.msg_namelen = sendnam->sa_len; 3095 } 3096 error = sock_sendmbuf(nso->nso_so, &msg, mreqcopy, 0, &sentlen); 3097#ifdef NFS_SOCKET_DEBUGGING 3098 if (error || (sentlen != req->r_mreqlen)) 3099 NFS_SOCK_DBG("nfs_send: 0x%llx sent %d/%d error %d\n", 3100 req->r_xid, (int)sentlen, (int)req->r_mreqlen, error); 3101#endif 3102 if (!error && (sentlen != req->r_mreqlen)) 3103 error = EWOULDBLOCK; 3104 needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen)); 3105 3106 lck_mtx_lock(&req->r_mtx); 3107 req->r_flags &= ~R_SENDING; 3108 req->r_rtt = 0; 3109 if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT)) 3110 req->r_rexmit = NFS_MAXREXMIT; 3111 3112 if (!error) { 3113 /* SUCCESS */ 3114 req->r_flags &= ~R_RESENDERR; 3115 if (rexmit) 3116 OSAddAtomic64(1, &nfsstats.rpcretries); 3117 req->r_flags |= R_SENT; 3118 if (req->r_flags & R_WAITSENT) { 3119 req->r_flags &= ~R_WAITSENT; 3120 wakeup(req); 3121 } 3122 nfs_sndunlock(req); 3123 lck_mtx_unlock(&req->r_mtx); 3124 return (0); 3125 } 3126 3127 /* send failed */ 3128 req->r_flags |= R_MUSTRESEND; 3129 if (rexmit) 3130 req->r_flags |= R_RESENDERR; 3131 if ((error == EINTR) || (error == ERESTART)) 3132 req->r_error = error; 3133 lck_mtx_unlock(&req->r_mtx); 3134 3135 if (sotype == SOCK_DGRAM) { 3136 /* 3137 * Note: even though a first send may fail, we consider 3138 * the request sent for congestion window purposes. 3139 * So we don't need to undo any of the changes made above. 3140 */ 3141 /* 3142 * Socket errors ignored for connectionless sockets?? 3143 * For now, ignore them all 3144 */ 3145 if ((error != EINTR) && (error != ERESTART) && 3146 (error != EWOULDBLOCK) && (error != EIO) && (nso == nmp->nm_nso)) { 3147 int clearerror = 0, optlen = sizeof(clearerror); 3148 sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen); 3149#ifdef NFS_SOCKET_DEBUGGING 3150 if (clearerror) 3151 NFS_SOCK_DBG("nfs_send: ignoring UDP socket error %d so %d\n", 3152 error, clearerror); 3153#endif 3154 } 3155 } 3156 3157 /* check if it appears we should reconnect the socket */ 3158 switch (error) { 3159 case EWOULDBLOCK: 3160 /* if send timed out, reconnect if on TCP */ 3161 if (sotype != SOCK_STREAM) 3162 break; 3163 case EPIPE: 3164 case EADDRNOTAVAIL: 3165 case ENETDOWN: 3166 case ENETUNREACH: 3167 case ENETRESET: 3168 case ECONNABORTED: 3169 case ECONNRESET: 3170 case ENOTCONN: 3171 case ESHUTDOWN: 3172 case ECONNREFUSED: 3173 case EHOSTDOWN: 3174 case EHOSTUNREACH: 3175 needrecon = 1; 3176 break; 3177 } 3178 if (needrecon && (nso == nmp->nm_nso)) { /* mark socket as needing reconnect */ 3179 NFS_SOCK_DBG("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error); 3180 nfs_need_reconnect(nmp); 3181 } 3182 3183 nfs_sndunlock(req); 3184 3185 /* 3186 * Don't log some errors: 3187 * EPIPE errors may be common with servers that drop idle connections. 3188 * EADDRNOTAVAIL may occur on network transitions. 3189 * ENOTCONN may occur under some network conditions. 3190 */ 3191 if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN)) 3192 error = 0; 3193 if (error && (error != EINTR) && (error != ERESTART)) 3194 log(LOG_INFO, "nfs send error %d for server %s\n", error, 3195 !req->r_nmp ? "<unmounted>" : 3196 vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname); 3197 3198 if (nfs_is_dead(error, nmp)) 3199 error = EIO; 3200 3201 /* prefer request termination error over other errors */ 3202 error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0); 3203 if (error2) 3204 error = error2; 3205 3206 /* only allow the following errors to be returned */ 3207 if ((error != EINTR) && (error != ERESTART) && (error != EIO) && 3208 (error != ENXIO) && (error != ETIMEDOUT)) 3209 error = 0; 3210 return (error); 3211} 3212 3213/* 3214 * NFS client socket upcalls 3215 * 3216 * Pull RPC replies out of an NFS mount's socket and match them 3217 * up with the pending request. 3218 * 3219 * The datagram code is simple because we always get whole 3220 * messages out of the socket. 3221 * 3222 * The stream code is more involved because we have to parse 3223 * the RPC records out of the stream. 3224 */ 3225 3226/* NFS client UDP socket upcall */ 3227void 3228nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag) 3229{ 3230 struct nfsmount *nmp = arg; 3231 struct nfs_socket *nso = nmp->nm_nso; 3232 size_t rcvlen; 3233 mbuf_t m; 3234 int error = 0; 3235 3236 if (nmp->nm_sockflags & NMSOCK_CONNECTING) 3237 return; 3238 3239 do { 3240 /* make sure we're on the current socket */ 3241 if (!nso || (nso->nso_so != so)) 3242 return; 3243 3244 m = NULL; 3245 rcvlen = 1000000; 3246 error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen); 3247 if (m) 3248 nfs_request_match_reply(nmp, m); 3249 } while (m && !error); 3250 3251 if (error && (error != EWOULDBLOCK)) { 3252 /* problems with the socket... mark for reconnection */ 3253 NFS_SOCK_DBG("nfs_udp_rcv: need reconnect %d\n", error); 3254 nfs_need_reconnect(nmp); 3255 } 3256} 3257 3258/* NFS client TCP socket upcall */ 3259void 3260nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag) 3261{ 3262 struct nfsmount *nmp = arg; 3263 struct nfs_socket *nso = nmp->nm_nso; 3264 struct nfs_rpc_record_state nrrs; 3265 mbuf_t m; 3266 int error = 0; 3267 int recv = 1; 3268 3269 if (nmp->nm_sockflags & NMSOCK_CONNECTING) 3270 return; 3271 3272 /* make sure we're on the current socket */ 3273 lck_mtx_lock(&nmp->nm_lock); 3274 nso = nmp->nm_nso; 3275 if (!nso || (nso->nso_so != so) || (nmp->nm_sockflags & (NMSOCK_DISCONNECTING))) { 3276 lck_mtx_unlock(&nmp->nm_lock); 3277 return; 3278 } 3279 lck_mtx_unlock(&nmp->nm_lock); 3280 3281 /* make sure this upcall should be trying to do work */ 3282 lck_mtx_lock(&nso->nso_lock); 3283 if (nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) { 3284 lck_mtx_unlock(&nso->nso_lock); 3285 return; 3286 } 3287 nso->nso_flags |= NSO_UPCALL; 3288 nrrs = nso->nso_rrs; 3289 lck_mtx_unlock(&nso->nso_lock); 3290 3291 /* loop while we make error-free progress */ 3292 while (!error && recv) { 3293 error = nfs_rpc_record_read(so, &nrrs, MSG_DONTWAIT, &recv, &m); 3294 if (m) /* match completed response with request */ 3295 nfs_request_match_reply(nmp, m); 3296 } 3297 3298 lck_mtx_lock(&nmp->nm_lock); 3299 if (nmp->nm_nso == nso) { 3300 /* still the same socket, so update socket's RPC parsing state */ 3301 lck_mtx_unlock(&nmp->nm_lock); 3302 lck_mtx_lock(&nso->nso_lock); 3303 nso->nso_rrs = nrrs; 3304 nso->nso_flags &= ~NSO_UPCALL; 3305 lck_mtx_unlock(&nso->nso_lock); 3306 if (nmp->nm_sockflags & NMSOCK_DISCONNECTING) 3307 wakeup(&nmp->nm_sockflags); 3308 } else { 3309 lck_mtx_unlock(&nmp->nm_lock); 3310 } 3311#ifdef NFS_SOCKET_DEBUGGING 3312 if (!recv && (error != EWOULDBLOCK)) 3313 NFS_SOCK_DBG("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error); 3314#endif 3315 /* note: no error and no data indicates server closed its end */ 3316 if ((error != EWOULDBLOCK) && (error || !recv)) { 3317 /* problems with the socket... mark for reconnection */ 3318 NFS_SOCK_DBG("nfs_tcp_rcv: need reconnect %d\n", error); 3319 nfs_need_reconnect(nmp); 3320 } 3321} 3322 3323/* 3324 * "poke" a socket to try to provoke any pending errors 3325 */ 3326void 3327nfs_sock_poke(struct nfsmount *nmp) 3328{ 3329 struct iovec aio; 3330 struct msghdr msg; 3331 size_t len; 3332 int error = 0; 3333 int dummy; 3334 3335 lck_mtx_lock(&nmp->nm_lock); 3336 if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) || 3337 !(nmp->nm_sockflags & NMSOCK_READY) || !nmp->nm_nso || !nmp->nm_nso->nso_so) { 3338 lck_mtx_unlock(&nmp->nm_lock); 3339 return; 3340 } 3341 lck_mtx_unlock(&nmp->nm_lock); 3342 aio.iov_base = &dummy; 3343 aio.iov_len = 0; 3344 len = 0; 3345 bzero(&msg, sizeof(msg)); 3346 msg.msg_iov = &aio; 3347 msg.msg_iovlen = 1; 3348 error = sock_send(nmp->nm_nso->nso_so, &msg, MSG_DONTWAIT, &len); 3349 NFS_SOCK_DBG("nfs_sock_poke: error %d\n", error); 3350 nfs_is_dead(error, nmp); 3351} 3352 3353/* 3354 * Match an RPC reply with the corresponding request 3355 */ 3356void 3357nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep) 3358{ 3359 struct nfsreq *req; 3360 struct nfsm_chain nmrep; 3361 u_int32_t reply = 0, rxid = 0; 3362 int error = 0, asyncioq, t1; 3363 3364 /* Get the xid and check that it is an rpc reply */ 3365 nfsm_chain_dissect_init(error, &nmrep, mrep); 3366 nfsm_chain_get_32(error, &nmrep, rxid); 3367 nfsm_chain_get_32(error, &nmrep, reply); 3368 if (error || (reply != RPC_REPLY)) { 3369 OSAddAtomic64(1, &nfsstats.rpcinvalid); 3370 mbuf_freem(mrep); 3371 return; 3372 } 3373 3374 /* 3375 * Loop through the request list to match up the reply 3376 * Iff no match, just drop it. 3377 */ 3378 lck_mtx_lock(nfs_request_mutex); 3379 TAILQ_FOREACH(req, &nfs_reqq, r_chain) { 3380 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) 3381 continue; 3382 /* looks like we have it, grab lock and double check */ 3383 lck_mtx_lock(&req->r_mtx); 3384 if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) { 3385 lck_mtx_unlock(&req->r_mtx); 3386 continue; 3387 } 3388 /* Found it.. */ 3389 req->r_nmrep = nmrep; 3390 lck_mtx_lock(&nmp->nm_lock); 3391 if (nmp->nm_sotype == SOCK_DGRAM) { 3392 /* 3393 * Update congestion window. 3394 * Do the additive increase of one rpc/rtt. 3395 */ 3396 FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd); 3397 if (nmp->nm_cwnd <= nmp->nm_sent) { 3398 nmp->nm_cwnd += 3399 ((NFS_CWNDSCALE * NFS_CWNDSCALE) + 3400 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 3401 if (nmp->nm_cwnd > NFS_MAXCWND) 3402 nmp->nm_cwnd = NFS_MAXCWND; 3403 } 3404 if (req->r_flags & R_CWND) { 3405 nmp->nm_sent -= NFS_CWNDSCALE; 3406 req->r_flags &= ~R_CWND; 3407 } 3408 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { 3409 /* congestion window is open, poke the cwnd queue */ 3410 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); 3411 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); 3412 req2->r_cchain.tqe_next = NFSREQNOLIST; 3413 wakeup(req2); 3414 } 3415 } 3416 /* 3417 * Update rtt using a gain of 0.125 on the mean 3418 * and a gain of 0.25 on the deviation. 3419 */ 3420 if (req->r_flags & R_TIMING) { 3421 /* 3422 * Since the timer resolution of 3423 * NFS_HZ is so course, it can often 3424 * result in r_rtt == 0. Since 3425 * r_rtt == N means that the actual 3426 * rtt is between N+dt and N+2-dt ticks, 3427 * add 1. 3428 */ 3429 if (proct[req->r_procnum] == 0) 3430 panic("nfs_request_match_reply: proct[%d] is zero", req->r_procnum); 3431 t1 = req->r_rtt + 1; 3432 t1 -= (NFS_SRTT(req) >> 3); 3433 NFS_SRTT(req) += t1; 3434 if (t1 < 0) 3435 t1 = -t1; 3436 t1 -= (NFS_SDRTT(req) >> 2); 3437 NFS_SDRTT(req) += t1; 3438 } 3439 nmp->nm_timeouts = 0; 3440 lck_mtx_unlock(&nmp->nm_lock); 3441 /* signal anyone waiting on this request */ 3442 wakeup(req); 3443 asyncioq = (req->r_callback.rcb_func != NULL); 3444 if (nfs_request_using_gss(req)) 3445 nfs_gss_clnt_rpcdone(req); 3446 lck_mtx_unlock(&req->r_mtx); 3447 lck_mtx_unlock(nfs_request_mutex); 3448 /* if it's an async RPC with a callback, queue it up */ 3449 if (asyncioq) 3450 nfs_asyncio_finish(req); 3451 break; 3452 } 3453 3454 if (!req) { 3455 /* not matched to a request, so drop it. */ 3456 lck_mtx_unlock(nfs_request_mutex); 3457 OSAddAtomic64(1, &nfsstats.rpcunexpected); 3458 mbuf_freem(mrep); 3459 } 3460} 3461 3462/* 3463 * Wait for the reply for a given request... 3464 * ...potentially resending the request if necessary. 3465 */ 3466int 3467nfs_wait_reply(struct nfsreq *req) 3468{ 3469 struct timespec ts = { 2, 0 }; 3470 int error = 0, slpflag, first = 1; 3471 3472 if (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) 3473 slpflag = PCATCH; 3474 else 3475 slpflag = 0; 3476 3477 lck_mtx_lock(&req->r_mtx); 3478 while (!req->r_nmrep.nmc_mhead) { 3479 if ((error = nfs_sigintr(req->r_nmp, req, first ? NULL : req->r_thread, 0))) 3480 break; 3481 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead) 3482 break; 3483 /* check if we need to resend */ 3484 if (req->r_flags & R_MUSTRESEND) { 3485 NFS_SOCK_DBG("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n", 3486 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); 3487 req->r_flags |= R_SENDING; 3488 lck_mtx_unlock(&req->r_mtx); 3489 if (nfs_request_using_gss(req)) { 3490 /* 3491 * It's an RPCSEC_GSS request. 3492 * Can't just resend the original request 3493 * without bumping the cred sequence number. 3494 * Go back and re-build the request. 3495 */ 3496 lck_mtx_lock(&req->r_mtx); 3497 req->r_flags &= ~R_SENDING; 3498 lck_mtx_unlock(&req->r_mtx); 3499 return (EAGAIN); 3500 } 3501 error = nfs_send(req, 1); 3502 lck_mtx_lock(&req->r_mtx); 3503 NFS_SOCK_DBG("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n", 3504 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error); 3505 if (error) 3506 break; 3507 if (((error = req->r_error)) || req->r_nmrep.nmc_mhead) 3508 break; 3509 } 3510 /* need to poll if we're P_NOREMOTEHANG */ 3511 if (nfs_noremotehang(req->r_thread)) 3512 ts.tv_sec = 1; 3513 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts); 3514 first = slpflag = 0; 3515 } 3516 lck_mtx_unlock(&req->r_mtx); 3517 3518 return (error); 3519} 3520 3521/* 3522 * An NFS request goes something like this: 3523 * (nb: always frees up mreq mbuf list) 3524 * nfs_request_create() 3525 * - allocates a request struct if one is not provided 3526 * - initial fill-in of the request struct 3527 * nfs_request_add_header() 3528 * - add the RPC header 3529 * nfs_request_send() 3530 * - link it into list 3531 * - call nfs_send() for first transmit 3532 * nfs_request_wait() 3533 * - call nfs_wait_reply() to wait for the reply 3534 * nfs_request_finish() 3535 * - break down rpc header and return with error or nfs reply 3536 * pointed to by nmrep. 3537 * nfs_request_rele() 3538 * nfs_request_destroy() 3539 * - clean up the request struct 3540 * - free the request struct if it was allocated by nfs_request_create() 3541 */ 3542 3543/* 3544 * Set up an NFS request struct (allocating if no request passed in). 3545 */ 3546int 3547nfs_request_create( 3548 nfsnode_t np, 3549 mount_t mp, /* used only if !np */ 3550 struct nfsm_chain *nmrest, 3551 int procnum, 3552 thread_t thd, 3553 kauth_cred_t cred, 3554 struct nfsreq **reqp) 3555{ 3556 struct nfsreq *req, *newreq = NULL; 3557 struct nfsmount *nmp; 3558 3559 req = *reqp; 3560 if (!req) { 3561 /* allocate a new NFS request structure */ 3562 MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK); 3563 if (!newreq) { 3564 mbuf_freem(nmrest->nmc_mhead); 3565 nmrest->nmc_mhead = NULL; 3566 return (ENOMEM); 3567 } 3568 req = newreq; 3569 } 3570 3571 bzero(req, sizeof(*req)); 3572 if (req == newreq) 3573 req->r_flags = R_ALLOCATED; 3574 3575 nmp = VFSTONFS(np ? NFSTOMP(np) : mp); 3576 if (!nmp) { 3577 if (newreq) 3578 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ); 3579 return (ENXIO); 3580 } 3581 lck_mtx_lock(&nmp->nm_lock); 3582 if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) == 3583 (NFSSTA_FORCE|NFSSTA_TIMEO)) { 3584 lck_mtx_unlock(&nmp->nm_lock); 3585 mbuf_freem(nmrest->nmc_mhead); 3586 nmrest->nmc_mhead = NULL; 3587 if (newreq) 3588 FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ); 3589 return (ENXIO); 3590 } 3591 3592 if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS)) 3593 OSAddAtomic64(1, &nfsstats.rpccnt[procnum]); 3594 if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL)) 3595 panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum); 3596 3597 lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL); 3598 req->r_nmp = nmp; 3599 req->r_np = np; 3600 req->r_thread = thd; 3601 if (!thd) 3602 req->r_flags |= R_NOINTR; 3603 if (IS_VALID_CRED(cred)) { 3604 kauth_cred_ref(cred); 3605 req->r_cred = cred; 3606 } 3607 req->r_procnum = procnum; 3608 if (proct[procnum] > 0) 3609 req->r_flags |= R_TIMING; 3610 req->r_nmrep.nmc_mhead = NULL; 3611 SLIST_INIT(&req->r_gss_seqlist); 3612 req->r_achain.tqe_next = NFSREQNOLIST; 3613 req->r_rchain.tqe_next = NFSREQNOLIST; 3614 req->r_cchain.tqe_next = NFSREQNOLIST; 3615 3616 /* set auth flavor to use for request */ 3617 if (!req->r_cred) 3618 req->r_auth = RPCAUTH_NONE; 3619 else if (req->r_np && (req->r_np->n_auth != RPCAUTH_INVALID)) 3620 req->r_auth = req->r_np->n_auth; 3621 else 3622 req->r_auth = nmp->nm_auth; 3623 3624 lck_mtx_unlock(&nmp->nm_lock); 3625 3626 /* move the request mbuf chain to the nfsreq */ 3627 req->r_mrest = nmrest->nmc_mhead; 3628 nmrest->nmc_mhead = NULL; 3629 3630 req->r_flags |= R_INITTED; 3631 req->r_refs = 1; 3632 if (newreq) 3633 *reqp = req; 3634 return (0); 3635} 3636 3637/* 3638 * Clean up and free an NFS request structure. 3639 */ 3640void 3641nfs_request_destroy(struct nfsreq *req) 3642{ 3643 struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3644 struct gss_seq *gsp, *ngsp; 3645 struct timespec ts = { 1, 0 }; 3646 int clearjbtimeo = 0; 3647 3648 if (!req || !(req->r_flags & R_INITTED)) 3649 return; 3650 req->r_flags &= ~R_INITTED; 3651 if (req->r_lflags & RL_QUEUED) 3652 nfs_reqdequeue(req); 3653 if (req->r_achain.tqe_next != NFSREQNOLIST) { 3654 /* still on an async I/O queue? */ 3655 lck_mtx_lock(nfsiod_mutex); 3656 if (nmp && (req->r_achain.tqe_next != NFSREQNOLIST)) { 3657 TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain); 3658 req->r_achain.tqe_next = NFSREQNOLIST; 3659 } 3660 lck_mtx_unlock(nfsiod_mutex); 3661 } 3662 lck_mtx_lock(&req->r_mtx); 3663 if (nmp) { 3664 lck_mtx_lock(&nmp->nm_lock); 3665 if (req->r_flags & R_CWND) { 3666 /* Decrement the outstanding request count. */ 3667 req->r_flags &= ~R_CWND; 3668 nmp->nm_sent -= NFS_CWNDSCALE; 3669 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { 3670 /* congestion window is open, poke the cwnd queue */ 3671 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); 3672 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); 3673 req2->r_cchain.tqe_next = NFSREQNOLIST; 3674 wakeup(req2); 3675 } 3676 } 3677 if (req->r_rchain.tqe_next != NFSREQNOLIST) { 3678 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); 3679 req->r_rchain.tqe_next = NFSREQNOLIST; 3680 if (req->r_flags & R_RESENDQ) 3681 req->r_flags &= ~R_RESENDQ; 3682 } 3683 if (req->r_cchain.tqe_next != NFSREQNOLIST) { 3684 TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain); 3685 req->r_cchain.tqe_next = NFSREQNOLIST; 3686 } 3687 if (req->r_flags & R_JBTPRINTFMSG) { 3688 req->r_flags &= ~R_JBTPRINTFMSG; 3689 nmp->nm_jbreqs--; 3690 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; 3691 } 3692 lck_mtx_unlock(&nmp->nm_lock); 3693 } 3694 while (req->r_flags & R_RESENDQ) 3695 msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts); 3696 lck_mtx_unlock(&req->r_mtx); 3697 if (clearjbtimeo) 3698 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL); 3699 if (req->r_mhead) 3700 mbuf_freem(req->r_mhead); 3701 else if (req->r_mrest) 3702 mbuf_freem(req->r_mrest); 3703 if (req->r_nmrep.nmc_mhead) 3704 mbuf_freem(req->r_nmrep.nmc_mhead); 3705 if (IS_VALID_CRED(req->r_cred)) 3706 kauth_cred_unref(&req->r_cred); 3707 if (nfs_request_using_gss(req)) 3708 nfs_gss_clnt_rpcdone(req); 3709 SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp) 3710 FREE(gsp, M_TEMP); 3711 if (req->r_gss_ctx) 3712 nfs_gss_clnt_ctx_unref(req); 3713 if (req->r_wrongsec) 3714 FREE(req->r_wrongsec, M_TEMP); 3715 3716 lck_mtx_destroy(&req->r_mtx, nfs_request_grp); 3717 if (req->r_flags & R_ALLOCATED) 3718 FREE_ZONE(req, sizeof(*req), M_NFSREQ); 3719} 3720 3721void 3722nfs_request_ref(struct nfsreq *req, int locked) 3723{ 3724 if (!locked) 3725 lck_mtx_lock(&req->r_mtx); 3726 if (req->r_refs <= 0) 3727 panic("nfsreq reference error"); 3728 req->r_refs++; 3729 if (!locked) 3730 lck_mtx_unlock(&req->r_mtx); 3731} 3732 3733void 3734nfs_request_rele(struct nfsreq *req) 3735{ 3736 int destroy; 3737 3738 lck_mtx_lock(&req->r_mtx); 3739 if (req->r_refs <= 0) 3740 panic("nfsreq reference underflow"); 3741 req->r_refs--; 3742 destroy = (req->r_refs == 0); 3743 lck_mtx_unlock(&req->r_mtx); 3744 if (destroy) 3745 nfs_request_destroy(req); 3746} 3747 3748 3749/* 3750 * Add an (updated) RPC header with authorization to an NFS request. 3751 */ 3752int 3753nfs_request_add_header(struct nfsreq *req) 3754{ 3755 struct nfsmount *nmp; 3756 int error = 0; 3757 mbuf_t m; 3758 3759 /* free up any previous header */ 3760 if ((m = req->r_mhead)) { 3761 while (m && (m != req->r_mrest)) 3762 m = mbuf_free(m); 3763 req->r_mhead = NULL; 3764 } 3765 3766 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3767 if (!nmp) 3768 return (ENXIO); 3769 3770 error = nfsm_rpchead(req, req->r_mrest, &req->r_xid, &req->r_mhead); 3771 if (error) 3772 return (error); 3773 3774 req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead); 3775 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3776 if (!nmp) 3777 return (ENXIO); 3778 lck_mtx_lock(&nmp->nm_lock); 3779 if (NMFLAG(nmp, SOFT)) 3780 req->r_retry = nmp->nm_retry; 3781 else 3782 req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 3783 lck_mtx_unlock(&nmp->nm_lock); 3784 3785 return (error); 3786} 3787 3788 3789/* 3790 * Queue an NFS request up and send it out. 3791 */ 3792int 3793nfs_request_send(struct nfsreq *req, int wait) 3794{ 3795 struct nfsmount *nmp; 3796 struct timeval now; 3797 3798 lck_mtx_lock(&req->r_mtx); 3799 req->r_flags |= R_SENDING; 3800 lck_mtx_unlock(&req->r_mtx); 3801 3802 lck_mtx_lock(nfs_request_mutex); 3803 3804 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3805 if (!nmp) { 3806 lck_mtx_unlock(nfs_request_mutex); 3807 return (ENXIO); 3808 } 3809 3810 microuptime(&now); 3811 if (!req->r_start) { 3812 req->r_start = now.tv_sec; 3813 req->r_lastmsg = now.tv_sec - 3814 ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); 3815 } 3816 3817 OSAddAtomic64(1, &nfsstats.rpcrequests); 3818 3819 /* 3820 * Chain request into list of outstanding requests. Be sure 3821 * to put it LAST so timer finds oldest requests first. 3822 * Make sure that the request queue timer is running 3823 * to check for possible request timeout. 3824 */ 3825 TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain); 3826 req->r_lflags |= RL_QUEUED; 3827 if (!nfs_request_timer_on) { 3828 nfs_request_timer_on = 1; 3829 nfs_interval_timer_start(nfs_request_timer_call, 3830 NFS_REQUESTDELAY); 3831 } 3832 lck_mtx_unlock(nfs_request_mutex); 3833 3834 /* Send the request... */ 3835 return (nfs_send(req, wait)); 3836} 3837 3838/* 3839 * Call nfs_wait_reply() to wait for the reply. 3840 */ 3841void 3842nfs_request_wait(struct nfsreq *req) 3843{ 3844 req->r_error = nfs_wait_reply(req); 3845} 3846 3847/* 3848 * Finish up an NFS request by dequeueing it and 3849 * doing the initial NFS request reply processing. 3850 */ 3851int 3852nfs_request_finish( 3853 struct nfsreq *req, 3854 struct nfsm_chain *nmrepp, 3855 int *status) 3856{ 3857 struct nfsmount *nmp; 3858 mbuf_t mrep; 3859 int verf_type = 0; 3860 uint32_t verf_len = 0; 3861 uint32_t reply_status = 0; 3862 uint32_t rejected_status = 0; 3863 uint32_t auth_status = 0; 3864 uint32_t accepted_status = 0; 3865 struct nfsm_chain nmrep; 3866 int error, clearjbtimeo; 3867 3868 error = req->r_error; 3869 3870 if (nmrepp) 3871 nmrepp->nmc_mhead = NULL; 3872 3873 /* RPC done, unlink the request. */ 3874 nfs_reqdequeue(req); 3875 3876 mrep = req->r_nmrep.nmc_mhead; 3877 3878 nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; 3879 3880 if ((req->r_flags & R_CWND) && nmp) { 3881 /* 3882 * Decrement the outstanding request count. 3883 */ 3884 req->r_flags &= ~R_CWND; 3885 lck_mtx_lock(&nmp->nm_lock); 3886 FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd); 3887 nmp->nm_sent -= NFS_CWNDSCALE; 3888 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { 3889 /* congestion window is open, poke the cwnd queue */ 3890 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); 3891 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); 3892 req2->r_cchain.tqe_next = NFSREQNOLIST; 3893 wakeup(req2); 3894 } 3895 lck_mtx_unlock(&nmp->nm_lock); 3896 } 3897 3898 if (nfs_request_using_gss(req)) { 3899 /* 3900 * If the request used an RPCSEC_GSS credential 3901 * then reset its sequence number bit in the 3902 * request window. 3903 */ 3904 nfs_gss_clnt_rpcdone(req); 3905 3906 /* 3907 * If we need to re-send, go back and re-build the 3908 * request based on a new sequence number. 3909 * Note that we're using the original XID. 3910 */ 3911 if (error == EAGAIN) { 3912 req->r_error = 0; 3913 if (mrep) 3914 mbuf_freem(mrep); 3915 error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs 3916 req->r_nmrep.nmc_mhead = NULL; 3917 req->r_flags |= R_RESTART; 3918 if (error == ENEEDAUTH) { 3919 req->r_xid = 0; // get a new XID 3920 error = 0; 3921 } 3922 goto nfsmout; 3923 } 3924 } 3925 3926 /* 3927 * If there was a successful reply, make sure to mark the mount as up. 3928 * If a tprintf message was given (or if this is a timed-out soft mount) 3929 * then post a tprintf message indicating the server is alive again. 3930 */ 3931 if (!error) { 3932 if ((req->r_flags & R_TPRINTFMSG) || 3933 (nmp && NMFLAG(nmp, SOFT) && 3934 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE)) == NFSSTA_TIMEO))) 3935 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again"); 3936 else 3937 nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL); 3938 } 3939 if (!error && !nmp) 3940 error = ENXIO; 3941 nfsmout_if(error); 3942 3943 /* 3944 * break down the RPC header and check if ok 3945 */ 3946 nmrep = req->r_nmrep; 3947 nfsm_chain_get_32(error, &nmrep, reply_status); 3948 nfsmout_if(error); 3949 if (reply_status == RPC_MSGDENIED) { 3950 nfsm_chain_get_32(error, &nmrep, rejected_status); 3951 nfsmout_if(error); 3952 if (rejected_status == RPC_MISMATCH) { 3953 error = ENOTSUP; 3954 goto nfsmout; 3955 } 3956 nfsm_chain_get_32(error, &nmrep, auth_status); 3957 nfsmout_if(error); 3958 switch (auth_status) { 3959 case RPCSEC_GSS_CREDPROBLEM: 3960 case RPCSEC_GSS_CTXPROBLEM: 3961 /* 3962 * An RPCSEC_GSS cred or context problem. 3963 * We can't use it anymore. 3964 * Restore the args, renew the context 3965 * and set up for a resend. 3966 */ 3967 error = nfs_gss_clnt_args_restore(req); 3968 if (error && error != ENEEDAUTH) 3969 break; 3970 3971 if (!error) { 3972 error = nfs_gss_clnt_ctx_renew(req); 3973 if (error) 3974 break; 3975 } 3976 mbuf_freem(mrep); 3977 req->r_nmrep.nmc_mhead = NULL; 3978 req->r_xid = 0; // get a new XID 3979 req->r_flags |= R_RESTART; 3980 goto nfsmout; 3981 default: 3982 error = EACCES; 3983 break; 3984 } 3985 goto nfsmout; 3986 } 3987 3988 /* Now check the verifier */ 3989 nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor 3990 nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length 3991 nfsmout_if(error); 3992 3993 switch (req->r_auth) { 3994 case RPCAUTH_NONE: 3995 case RPCAUTH_SYS: 3996 /* Any AUTH_SYS verifier is ignored */ 3997 if (verf_len > 0) 3998 nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len)); 3999 nfsm_chain_get_32(error, &nmrep, accepted_status); 4000 break; 4001 case RPCAUTH_KRB5: 4002 case RPCAUTH_KRB5I: 4003 case RPCAUTH_KRB5P: 4004 error = nfs_gss_clnt_verf_get(req, &nmrep, 4005 verf_type, verf_len, &accepted_status); 4006 break; 4007 } 4008 nfsmout_if(error); 4009 4010 switch (accepted_status) { 4011 case RPC_SUCCESS: 4012 if (req->r_procnum == NFSPROC_NULL) { 4013 /* 4014 * The NFS null procedure is unique, 4015 * in not returning an NFS status. 4016 */ 4017 *status = NFS_OK; 4018 } else { 4019 nfsm_chain_get_32(error, &nmrep, *status); 4020 nfsmout_if(error); 4021 } 4022 4023 if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) { 4024 /* 4025 * It's a JUKEBOX error - delay and try again 4026 */ 4027 int delay, slpflag = (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) ? PCATCH : 0; 4028 4029 mbuf_freem(mrep); 4030 req->r_nmrep.nmc_mhead = NULL; 4031 if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) { 4032 /* we're not yet completely mounted and */ 4033 /* we can't complete an RPC, so we fail */ 4034 OSAddAtomic64(1, &nfsstats.rpctimeouts); 4035 nfs_softterm(req); 4036 error = req->r_error; 4037 goto nfsmout; 4038 } 4039 req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2); 4040 if (req->r_delay > 30) 4041 req->r_delay = 30; 4042 if (nmp->nm_tprintf_initial_delay && (req->r_delay >= nmp->nm_tprintf_initial_delay)) { 4043 if (!(req->r_flags & R_JBTPRINTFMSG)) { 4044 req->r_flags |= R_JBTPRINTFMSG; 4045 lck_mtx_lock(&nmp->nm_lock); 4046 nmp->nm_jbreqs++; 4047 lck_mtx_unlock(&nmp->nm_lock); 4048 } 4049 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO, 4050 "resource temporarily unavailable (jukebox)"); 4051 } 4052 if (NMFLAG(nmp, SOFT) && (req->r_delay == 30) && !(req->r_flags & R_NOINTR)) { 4053 /* for soft mounts, just give up after a short while */ 4054 OSAddAtomic64(1, &nfsstats.rpctimeouts); 4055 nfs_softterm(req); 4056 error = req->r_error; 4057 goto nfsmout; 4058 } 4059 delay = req->r_delay; 4060 if (req->r_callback.rcb_func) { 4061 struct timeval now; 4062 microuptime(&now); 4063 req->r_resendtime = now.tv_sec + delay; 4064 } else { 4065 do { 4066 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) 4067 goto nfsmout; 4068 tsleep(nfs_request_finish, PSOCK|slpflag, "nfs_jukebox_trylater", hz); 4069 slpflag = 0; 4070 } while (--delay > 0); 4071 } 4072 req->r_xid = 0; // get a new XID 4073 req->r_flags |= R_RESTART; 4074 req->r_start = 0; 4075 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER); 4076 return (0); 4077 } 4078 4079 if (req->r_flags & R_JBTPRINTFMSG) { 4080 req->r_flags &= ~R_JBTPRINTFMSG; 4081 lck_mtx_lock(&nmp->nm_lock); 4082 nmp->nm_jbreqs--; 4083 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; 4084 lck_mtx_unlock(&nmp->nm_lock); 4085 nfs_up(nmp, req->r_thread, clearjbtimeo, "resource available again"); 4086 } 4087 4088 if ((nmp->nm_vers >= NFS_VER4) && (*status == NFSERR_WRONGSEC)) { 4089 /* 4090 * Hmmm... we need to try a different security flavor. 4091 * The first time a request hits this, we will allocate an array 4092 * to track flavors to try. We fill the array with the mount's 4093 * preferred flavors or the server's preferred flavors or just the 4094 * flavors we support. 4095 */ 4096 uint32_t srvflavors[NX_MAX_SEC_FLAVORS]; 4097 int srvcount, i, j; 4098 4099 /* Call SECINFO to try to get list of flavors from server. */ 4100 srvcount = NX_MAX_SEC_FLAVORS; 4101 nfs4_secinfo_rpc(nmp, &req->r_secinfo, req->r_cred, srvflavors, &srvcount); 4102 4103 if (!req->r_wrongsec) { 4104 /* first time... set up flavor array */ 4105 MALLOC(req->r_wrongsec, uint32_t*, NX_MAX_SEC_FLAVORS*sizeof(uint32_t), M_TEMP, M_WAITOK); 4106 if (!req->r_wrongsec) { 4107 error = EACCES; 4108 goto nfsmout; 4109 } 4110 i=0; 4111 if (nmp->nm_sec.count) { /* use the mount's preferred list of flavors */ 4112 for(; i < nmp->nm_sec.count; i++) 4113 req->r_wrongsec[i] = nmp->nm_sec.flavors[i]; 4114 } else if (srvcount) { /* otherwise use the server's list of flavors */ 4115 for(; i < srvcount; i++) 4116 req->r_wrongsec[i] = srvflavors[i]; 4117 } else { /* otherwise, just try the flavors we support. */ 4118 req->r_wrongsec[i++] = RPCAUTH_KRB5P; 4119 req->r_wrongsec[i++] = RPCAUTH_KRB5I; 4120 req->r_wrongsec[i++] = RPCAUTH_KRB5; 4121 req->r_wrongsec[i++] = RPCAUTH_SYS; 4122 req->r_wrongsec[i++] = RPCAUTH_NONE; 4123 } 4124 for(; i < NX_MAX_SEC_FLAVORS; i++) /* invalidate any remaining slots */ 4125 req->r_wrongsec[i] = RPCAUTH_INVALID; 4126 } 4127 4128 /* clear the current flavor from the list */ 4129 for(i=0; i < NX_MAX_SEC_FLAVORS; i++) 4130 if (req->r_wrongsec[i] == req->r_auth) 4131 req->r_wrongsec[i] = RPCAUTH_INVALID; 4132 4133 /* find the next flavor to try */ 4134 for(i=0; i < NX_MAX_SEC_FLAVORS; i++) 4135 if (req->r_wrongsec[i] != RPCAUTH_INVALID) { 4136 if (((req->r_wrongsec[i] == RPCAUTH_KRB5P) || 4137 (req->r_wrongsec[i] == RPCAUTH_KRB5I) || 4138 (req->r_wrongsec[i] == RPCAUTH_KRB5)) && (req->r_gss_ctx && 4139 (req->r_gss_ctx->gss_clnt_service == RPCSEC_GSS_SVC_SYS))) { 4140 /* don't bother trying Kerberos if we've already got a fallback context */ 4141 req->r_wrongsec[i] = RPCAUTH_INVALID; 4142 continue; 4143 } 4144 if (!srvcount) /* no server list, just try it */ 4145 break; 4146 /* check that it's in the server's list */ 4147 for(j=0; j < srvcount; j++) 4148 if (req->r_wrongsec[i] == srvflavors[j]) 4149 break; 4150 if (j < srvcount) /* found */ 4151 break; 4152 /* not found in server list */ 4153 req->r_wrongsec[i] = RPCAUTH_INVALID; 4154 } 4155 if (i == NX_MAX_SEC_FLAVORS) { 4156 /* nothing left to try! */ 4157 error = EACCES; 4158 goto nfsmout; 4159 } 4160 4161 /* retry with the next auth flavor */ 4162 req->r_auth = req->r_wrongsec[i]; 4163 req->r_xid = 0; // get a new XID 4164 req->r_flags |= R_RESTART; 4165 req->r_start = 0; 4166 FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_WRONGSEC); 4167 return (0); 4168 } 4169 if ((nmp->nm_vers >= NFS_VER4) && req->r_wrongsec) { 4170 /* 4171 * We renegotiated security for this request; so update the 4172 * default security flavor for the associated node. 4173 */ 4174 if (req->r_np) 4175 req->r_np->n_auth = req->r_auth; 4176 } 4177 4178 if (*status == NFS_OK) { 4179 /* 4180 * Successful NFS request 4181 */ 4182 *nmrepp = nmrep; 4183 req->r_nmrep.nmc_mhead = NULL; 4184 break; 4185 } 4186 /* Got an NFS error of some kind */ 4187 4188 /* 4189 * If the File Handle was stale, invalidate the 4190 * lookup cache, just in case. 4191 */ 4192 if ((*status == ESTALE) && req->r_np) { 4193 cache_purge(NFSTOV(req->r_np)); 4194 /* if monitored, also send delete event */ 4195 if (vnode_ismonitored(NFSTOV(req->r_np))) 4196 nfs_vnode_notify(req->r_np, (VNODE_EVENT_ATTRIB|VNODE_EVENT_DELETE)); 4197 } 4198 if (nmp->nm_vers == NFS_VER2) 4199 mbuf_freem(mrep); 4200 else 4201 *nmrepp = nmrep; 4202 req->r_nmrep.nmc_mhead = NULL; 4203 error = 0; 4204 break; 4205 case RPC_PROGUNAVAIL: 4206 error = EPROGUNAVAIL; 4207 break; 4208 case RPC_PROGMISMATCH: 4209 error = ERPCMISMATCH; 4210 break; 4211 case RPC_PROCUNAVAIL: 4212 error = EPROCUNAVAIL; 4213 break; 4214 case RPC_GARBAGE: 4215 error = EBADRPC; 4216 break; 4217 case RPC_SYSTEM_ERR: 4218 default: 4219 error = EIO; 4220 break; 4221 } 4222nfsmout: 4223 if (req->r_flags & R_JBTPRINTFMSG) { 4224 req->r_flags &= ~R_JBTPRINTFMSG; 4225 lck_mtx_lock(&nmp->nm_lock); 4226 nmp->nm_jbreqs--; 4227 clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; 4228 lck_mtx_unlock(&nmp->nm_lock); 4229 if (clearjbtimeo) 4230 nfs_up(nmp, req->r_thread, clearjbtimeo, NULL); 4231 } 4232 FSDBG(273, R_XID32(req->r_xid), nmp, req, 4233 (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error); 4234 return (error); 4235} 4236 4237/* 4238 * NFS request using a GSS/Kerberos security flavor? 4239 */ 4240int 4241nfs_request_using_gss(struct nfsreq *req) 4242{ 4243 if (!req->r_gss_ctx) 4244 return (0); 4245 switch (req->r_auth) { 4246 case RPCAUTH_KRB5: 4247 case RPCAUTH_KRB5I: 4248 case RPCAUTH_KRB5P: 4249 return (1); 4250 } 4251 return (0); 4252} 4253 4254/* 4255 * Perform an NFS request synchronously. 4256 */ 4257 4258int 4259nfs_request( 4260 nfsnode_t np, 4261 mount_t mp, /* used only if !np */ 4262 struct nfsm_chain *nmrest, 4263 int procnum, 4264 vfs_context_t ctx, 4265 struct nfsreq_secinfo_args *si, 4266 struct nfsm_chain *nmrepp, 4267 u_int64_t *xidp, 4268 int *status) 4269{ 4270 return nfs_request2(np, mp, nmrest, procnum, 4271 vfs_context_thread(ctx), vfs_context_ucred(ctx), 4272 si, 0, nmrepp, xidp, status); 4273} 4274 4275int 4276nfs_request2( 4277 nfsnode_t np, 4278 mount_t mp, /* used only if !np */ 4279 struct nfsm_chain *nmrest, 4280 int procnum, 4281 thread_t thd, 4282 kauth_cred_t cred, 4283 struct nfsreq_secinfo_args *si, 4284 int flags, 4285 struct nfsm_chain *nmrepp, 4286 u_int64_t *xidp, 4287 int *status) 4288{ 4289 struct nfsreq rq, *req = &rq; 4290 int error; 4291 4292 if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req))) 4293 return (error); 4294 req->r_flags |= (flags & R_OPTMASK); 4295 if (si) 4296 req->r_secinfo = *si; 4297 4298 FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0); 4299 do { 4300 req->r_error = 0; 4301 req->r_flags &= ~R_RESTART; 4302 if ((error = nfs_request_add_header(req))) 4303 break; 4304 if (xidp) 4305 *xidp = req->r_xid; 4306 if ((error = nfs_request_send(req, 1))) 4307 break; 4308 nfs_request_wait(req); 4309 if ((error = nfs_request_finish(req, nmrepp, status))) 4310 break; 4311 } while (req->r_flags & R_RESTART); 4312 4313 FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error); 4314 nfs_request_rele(req); 4315 return (error); 4316} 4317 4318 4319/* 4320 * Set up a new null proc request to exchange GSS context tokens with the 4321 * server. Associate the context that we are setting up with the request that we 4322 * are sending. 4323 */ 4324 4325int 4326nfs_request_gss( 4327 mount_t mp, 4328 struct nfsm_chain *nmrest, 4329 thread_t thd, 4330 kauth_cred_t cred, 4331 int flags, 4332 struct nfs_gss_clnt_ctx *cp, /* Set to gss context to renew or setup */ 4333 struct nfsm_chain *nmrepp, 4334 int *status) 4335{ 4336 struct nfsreq rq, *req = &rq; 4337 int error; 4338 4339 if ((error = nfs_request_create(NULL, mp, nmrest, NFSPROC_NULL, thd, cred, &req))) 4340 return (error); 4341 req->r_flags |= (flags & R_OPTMASK); 4342 4343 if (cp == NULL) { 4344 printf("nfs_request_gss request has no context\n"); 4345 nfs_request_rele(req); 4346 return (NFSERR_EAUTH); 4347 } 4348 nfs_gss_clnt_ctx_ref(req, cp); 4349 4350 FSDBG_TOP(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, 0); 4351 do { 4352 req->r_error = 0; 4353 req->r_flags &= ~R_RESTART; 4354 if ((error = nfs_request_add_header(req))) 4355 break; 4356 4357 if ((error = nfs_request_send(req, 1))) 4358 break; 4359 nfs_request_wait(req); 4360 if ((error = nfs_request_finish(req, nmrepp, status))) 4361 break; 4362 } while (req->r_flags & R_RESTART); 4363 4364 FSDBG_BOT(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, error); 4365 nfs_request_rele(req); 4366 return (error); 4367} 4368 4369/* 4370 * Create and start an asynchronous NFS request. 4371 */ 4372int 4373nfs_request_async( 4374 nfsnode_t np, 4375 mount_t mp, /* used only if !np */ 4376 struct nfsm_chain *nmrest, 4377 int procnum, 4378 thread_t thd, 4379 kauth_cred_t cred, 4380 struct nfsreq_secinfo_args *si, 4381 int flags, 4382 struct nfsreq_cbinfo *cb, 4383 struct nfsreq **reqp) 4384{ 4385 struct nfsreq *req; 4386 struct nfsmount *nmp; 4387 int error, sent; 4388 4389 error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp); 4390 req = *reqp; 4391 FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error); 4392 if (error) 4393 return (error); 4394 req->r_flags |= (flags & R_OPTMASK); 4395 req->r_flags |= R_ASYNC; 4396 if (si) 4397 req->r_secinfo = *si; 4398 if (cb) 4399 req->r_callback = *cb; 4400 error = nfs_request_add_header(req); 4401 if (!error) { 4402 req->r_flags |= R_WAITSENT; 4403 if (req->r_callback.rcb_func) 4404 nfs_request_ref(req, 0); 4405 error = nfs_request_send(req, 1); 4406 lck_mtx_lock(&req->r_mtx); 4407 if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) { 4408 /* make sure to wait until this async I/O request gets sent */ 4409 int slpflag = (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) ? PCATCH : 0; 4410 struct timespec ts = { 2, 0 }; 4411 while (!(req->r_flags & R_SENT)) { 4412 if ((req->r_flags & R_RESENDQ) && ((nmp = req->r_nmp))) { 4413 lck_mtx_lock(&nmp->nm_lock); 4414 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) { 4415 /* 4416 * It's not going to get off the resend queue if we're in recovery. 4417 * So, just take it off ourselves. We could be holding mount state 4418 * busy and thus holding up the start of recovery. 4419 */ 4420 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); 4421 req->r_rchain.tqe_next = NFSREQNOLIST; 4422 if (req->r_flags & R_RESENDQ) 4423 req->r_flags &= ~R_RESENDQ; 4424 lck_mtx_unlock(&nmp->nm_lock); 4425 req->r_flags |= R_SENDING; 4426 lck_mtx_unlock(&req->r_mtx); 4427 error = nfs_send(req, 1); 4428 lck_mtx_lock(&req->r_mtx); 4429 if (error) 4430 break; 4431 continue; 4432 } 4433 lck_mtx_unlock(&nmp->nm_lock); 4434 } 4435 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) 4436 break; 4437 msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts); 4438 slpflag = 0; 4439 } 4440 } 4441 sent = req->r_flags & R_SENT; 4442 lck_mtx_unlock(&req->r_mtx); 4443 if (error && req->r_callback.rcb_func && !sent) 4444 nfs_request_rele(req); 4445 } 4446 FSDBG(274, R_XID32(req->r_xid), np, procnum, error); 4447 if (error || req->r_callback.rcb_func) 4448 nfs_request_rele(req); 4449 return (error); 4450} 4451 4452/* 4453 * Wait for and finish an asynchronous NFS request. 4454 */ 4455int 4456nfs_request_async_finish( 4457 struct nfsreq *req, 4458 struct nfsm_chain *nmrepp, 4459 u_int64_t *xidp, 4460 int *status) 4461{ 4462 int error = 0, asyncio = req->r_callback.rcb_func ? 1 : 0; 4463 struct nfsmount *nmp; 4464 4465 lck_mtx_lock(&req->r_mtx); 4466 if (!asyncio) 4467 req->r_flags |= R_ASYNCWAIT; 4468 while (req->r_flags & R_RESENDQ) { /* wait until the request is off the resend queue */ 4469 struct timespec ts = { 2, 0 }; 4470 if ((nmp = req->r_nmp)) { 4471 lck_mtx_lock(&nmp->nm_lock); 4472 if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) { 4473 /* 4474 * It's not going to get off the resend queue if we're in recovery. 4475 * So, just take it off ourselves. We could be holding mount state 4476 * busy and thus holding up the start of recovery. 4477 */ 4478 TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); 4479 req->r_rchain.tqe_next = NFSREQNOLIST; 4480 if (req->r_flags & R_RESENDQ) 4481 req->r_flags &= ~R_RESENDQ; 4482 lck_mtx_unlock(&nmp->nm_lock); 4483 break; 4484 } 4485 lck_mtx_unlock(&nmp->nm_lock); 4486 } 4487 if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) 4488 break; 4489 msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait", &ts); 4490 } 4491 lck_mtx_unlock(&req->r_mtx); 4492 4493 if (!error) { 4494 nfs_request_wait(req); 4495 error = nfs_request_finish(req, nmrepp, status); 4496 } 4497 4498 while (!error && (req->r_flags & R_RESTART)) { 4499 if (asyncio && req->r_resendtime) { /* send later */ 4500 lck_mtx_lock(&req->r_mtx); 4501 nfs_asyncio_resend(req); 4502 lck_mtx_unlock(&req->r_mtx); 4503 return (EINPROGRESS); 4504 } 4505 req->r_error = 0; 4506 req->r_flags &= ~R_RESTART; 4507 if ((error = nfs_request_add_header(req))) 4508 break; 4509 if ((error = nfs_request_send(req, !asyncio))) 4510 break; 4511 if (asyncio) 4512 return (EINPROGRESS); 4513 nfs_request_wait(req); 4514 if ((error = nfs_request_finish(req, nmrepp, status))) 4515 break; 4516 } 4517 if (xidp) 4518 *xidp = req->r_xid; 4519 4520 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error); 4521 nfs_request_rele(req); 4522 return (error); 4523} 4524 4525/* 4526 * Cancel a pending asynchronous NFS request. 4527 */ 4528void 4529nfs_request_async_cancel(struct nfsreq *req) 4530{ 4531 nfs_reqdequeue(req); 4532 FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E); 4533 nfs_request_rele(req); 4534} 4535 4536/* 4537 * Flag a request as being terminated. 4538 */ 4539void 4540nfs_softterm(struct nfsreq *req) 4541{ 4542 struct nfsmount *nmp = req->r_nmp; 4543 req->r_flags |= R_SOFTTERM; 4544 req->r_error = ETIMEDOUT; 4545 if (!(req->r_flags & R_CWND) || !nmp) 4546 return; 4547 /* update congestion window */ 4548 req->r_flags &= ~R_CWND; 4549 lck_mtx_lock(&nmp->nm_lock); 4550 FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd); 4551 nmp->nm_sent -= NFS_CWNDSCALE; 4552 if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { 4553 /* congestion window is open, poke the cwnd queue */ 4554 struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); 4555 TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); 4556 req2->r_cchain.tqe_next = NFSREQNOLIST; 4557 wakeup(req2); 4558 } 4559 lck_mtx_unlock(&nmp->nm_lock); 4560} 4561 4562/* 4563 * Ensure req isn't in use by the timer, then dequeue it. 4564 */ 4565void 4566nfs_reqdequeue(struct nfsreq *req) 4567{ 4568 lck_mtx_lock(nfs_request_mutex); 4569 while (req->r_lflags & RL_BUSY) { 4570 req->r_lflags |= RL_WAITING; 4571 msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL); 4572 } 4573 if (req->r_lflags & RL_QUEUED) { 4574 TAILQ_REMOVE(&nfs_reqq, req, r_chain); 4575 req->r_lflags &= ~RL_QUEUED; 4576 } 4577 lck_mtx_unlock(nfs_request_mutex); 4578} 4579 4580/* 4581 * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not 4582 * free()'d out from under it. 4583 */ 4584void 4585nfs_reqbusy(struct nfsreq *req) 4586{ 4587 if (req->r_lflags & RL_BUSY) 4588 panic("req locked"); 4589 req->r_lflags |= RL_BUSY; 4590} 4591 4592/* 4593 * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied. 4594 */ 4595struct nfsreq * 4596nfs_reqnext(struct nfsreq *req) 4597{ 4598 struct nfsreq * nextreq; 4599 4600 if (req == NULL) 4601 return (NULL); 4602 /* 4603 * We need to get and busy the next req before signalling the 4604 * current one, otherwise wakeup() may block us and we'll race to 4605 * grab the next req. 4606 */ 4607 nextreq = TAILQ_NEXT(req, r_chain); 4608 if (nextreq != NULL) 4609 nfs_reqbusy(nextreq); 4610 /* unbusy and signal. */ 4611 req->r_lflags &= ~RL_BUSY; 4612 if (req->r_lflags & RL_WAITING) { 4613 req->r_lflags &= ~RL_WAITING; 4614 wakeup(&req->r_lflags); 4615 } 4616 return (nextreq); 4617} 4618 4619/* 4620 * NFS request queue timer routine 4621 * 4622 * Scan the NFS request queue for any requests that have timed out. 4623 * 4624 * Alert the system of unresponsive servers. 4625 * Mark expired requests on soft mounts as terminated. 4626 * For UDP, mark/signal requests for retransmission. 4627 */ 4628void 4629nfs_request_timer(__unused void *param0, __unused void *param1) 4630{ 4631 struct nfsreq *req; 4632 struct nfsmount *nmp; 4633 int timeo, maxtime, finish_asyncio, error; 4634 struct timeval now; 4635 TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue; 4636 4637 lck_mtx_lock(nfs_request_mutex); 4638 req = TAILQ_FIRST(&nfs_reqq); 4639 if (req == NULL) { /* no requests - turn timer off */ 4640 nfs_request_timer_on = 0; 4641 lck_mtx_unlock(nfs_request_mutex); 4642 return; 4643 } 4644 4645 nfs_reqbusy(req); 4646 TAILQ_INIT(&nfs_mount_poke_queue); 4647 4648 microuptime(&now); 4649 for ( ; req != NULL ; req = nfs_reqnext(req)) { 4650 nmp = req->r_nmp; 4651 if (!nmp) /* unmounted */ 4652 continue; 4653 if (req->r_error || req->r_nmrep.nmc_mhead) 4654 continue; 4655 if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) { 4656 if (req->r_callback.rcb_func != NULL) { 4657 /* async I/O RPC needs to be finished */ 4658 lck_mtx_lock(&req->r_mtx); 4659 req->r_error = error; 4660 finish_asyncio = !(req->r_flags & R_WAITSENT); 4661 wakeup(req); 4662 lck_mtx_unlock(&req->r_mtx); 4663 if (finish_asyncio) 4664 nfs_asyncio_finish(req); 4665 } 4666 continue; 4667 } 4668 4669 lck_mtx_lock(&req->r_mtx); 4670 4671 if (nmp->nm_tprintf_initial_delay && 4672 ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) && 4673 ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) { 4674 req->r_lastmsg = now.tv_sec; 4675 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO, 4676 "not responding"); 4677 req->r_flags |= R_TPRINTFMSG; 4678 lck_mtx_lock(&nmp->nm_lock); 4679 if (!(nmp->nm_state & NFSSTA_MOUNTED)) { 4680 lck_mtx_unlock(&nmp->nm_lock); 4681 /* we're not yet completely mounted and */ 4682 /* we can't complete an RPC, so we fail */ 4683 OSAddAtomic64(1, &nfsstats.rpctimeouts); 4684 nfs_softterm(req); 4685 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT)); 4686 wakeup(req); 4687 lck_mtx_unlock(&req->r_mtx); 4688 if (finish_asyncio) 4689 nfs_asyncio_finish(req); 4690 continue; 4691 } 4692 lck_mtx_unlock(&nmp->nm_lock); 4693 } 4694 4695 /* 4696 * Put a reasonable limit on the maximum timeout, 4697 * and reduce that limit when soft mounts get timeouts or are in reconnect. 4698 */ 4699 if (!NMFLAG(nmp, SOFT) && !nfs_can_squish(nmp)) 4700 maxtime = NFS_MAXTIMEO; 4701 else if ((req->r_flags & (R_SETUP|R_RECOVER)) || 4702 ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8))) 4703 maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2; 4704 else 4705 maxtime = NFS_MINTIMEO/4; 4706 4707 /* 4708 * Check for request timeout. 4709 */ 4710 if (req->r_rtt >= 0) { 4711 req->r_rtt++; 4712 lck_mtx_lock(&nmp->nm_lock); 4713 if (req->r_flags & R_RESENDERR) { 4714 /* with resend errors, retry every few seconds */ 4715 timeo = 4*hz; 4716 } else { 4717 if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL) 4718 timeo = NFS_MINIDEMTIMEO; // gss context setup 4719 else if (NMFLAG(nmp, DUMBTIMER)) 4720 timeo = nmp->nm_timeo; 4721 else 4722 timeo = NFS_RTO(nmp, proct[req->r_procnum]); 4723 4724 /* ensure 62.5 ms floor */ 4725 while (16 * timeo < hz) 4726 timeo *= 2; 4727 if (nmp->nm_timeouts > 0) 4728 timeo *= nfs_backoff[nmp->nm_timeouts - 1]; 4729 } 4730 /* limit timeout to max */ 4731 if (timeo > maxtime) 4732 timeo = maxtime; 4733 if (req->r_rtt <= timeo) { 4734 lck_mtx_unlock(&nmp->nm_lock); 4735 lck_mtx_unlock(&req->r_mtx); 4736 continue; 4737 } 4738 /* The request has timed out */ 4739 NFS_SOCK_DBG("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n", 4740 req->r_procnum, proct[req->r_procnum], 4741 req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts, 4742 (now.tv_sec - req->r_start)*NFS_HZ, maxtime); 4743 if (nmp->nm_timeouts < 8) 4744 nmp->nm_timeouts++; 4745 nfs_mount_check_dead_timeout(nmp); 4746 /* if it's been a few seconds, try poking the socket */ 4747 if ((nmp->nm_sotype == SOCK_STREAM) && 4748 ((now.tv_sec - req->r_start) >= 3) && 4749 !(nmp->nm_sockflags & (NMSOCK_POKE|NMSOCK_UNMOUNT)) && 4750 (nmp->nm_sockflags & NMSOCK_READY)) { 4751 nmp->nm_sockflags |= NMSOCK_POKE; 4752 TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq); 4753 } 4754 lck_mtx_unlock(&nmp->nm_lock); 4755 } 4756 4757 /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */ 4758 if ((NMFLAG(nmp, SOFT) || (req->r_flags & (R_SETUP|R_RECOVER))) && 4759 ((req->r_rexmit >= req->r_retry) || /* too many */ 4760 ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */ 4761 OSAddAtomic64(1, &nfsstats.rpctimeouts); 4762 lck_mtx_lock(&nmp->nm_lock); 4763 if (!(nmp->nm_state & NFSSTA_TIMEO)) { 4764 lck_mtx_unlock(&nmp->nm_lock); 4765 /* make sure we note the unresponsive server */ 4766 /* (maxtime may be less than tprintf delay) */ 4767 nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO, 4768 "not responding"); 4769 req->r_lastmsg = now.tv_sec; 4770 req->r_flags |= R_TPRINTFMSG; 4771 } else { 4772 lck_mtx_unlock(&nmp->nm_lock); 4773 } 4774 if (req->r_flags & R_NOINTR) { 4775 /* don't terminate nointr requests on timeout */ 4776 lck_mtx_unlock(&req->r_mtx); 4777 continue; 4778 } 4779 NFS_SOCK_DBG("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n", 4780 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, 4781 now.tv_sec - req->r_start); 4782 nfs_softterm(req); 4783 finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT)); 4784 wakeup(req); 4785 lck_mtx_unlock(&req->r_mtx); 4786 if (finish_asyncio) 4787 nfs_asyncio_finish(req); 4788 continue; 4789 } 4790 4791 /* for TCP, only resend if explicitly requested */ 4792 if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) { 4793 if (++req->r_rexmit > NFS_MAXREXMIT) 4794 req->r_rexmit = NFS_MAXREXMIT; 4795 req->r_rtt = 0; 4796 lck_mtx_unlock(&req->r_mtx); 4797 continue; 4798 } 4799 4800 /* 4801 * The request needs to be (re)sent. Kick the requester to resend it. 4802 * (unless it's already marked as needing a resend) 4803 */ 4804 if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) { 4805 lck_mtx_unlock(&req->r_mtx); 4806 continue; 4807 } 4808 NFS_SOCK_DBG("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n", 4809 req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); 4810 req->r_flags |= R_MUSTRESEND; 4811 req->r_rtt = -1; 4812 wakeup(req); 4813 if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) 4814 nfs_asyncio_resend(req); 4815 lck_mtx_unlock(&req->r_mtx); 4816 } 4817 4818 lck_mtx_unlock(nfs_request_mutex); 4819 4820 /* poke any sockets */ 4821 while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) { 4822 TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq); 4823 nfs_sock_poke(nmp); 4824 lck_mtx_lock(&nmp->nm_lock); 4825 nmp->nm_sockflags &= ~NMSOCK_POKE; 4826 wakeup(&nmp->nm_sockflags); 4827 lck_mtx_unlock(&nmp->nm_lock); 4828 } 4829 4830 nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY); 4831} 4832 4833/* 4834 * check a thread's proc for the "noremotehang" flag. 4835 */ 4836int 4837nfs_noremotehang(thread_t thd) 4838{ 4839 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL; 4840 return (p && proc_noremotehang(p)); 4841} 4842 4843/* 4844 * Test for a termination condition pending on the process. 4845 * This is used to determine if we need to bail on a mount. 4846 * ETIMEDOUT is returned if there has been a soft timeout. 4847 * EINTR is returned if there is a signal pending that is not being ignored 4848 * and the mount is interruptable, or if we are a thread that is in the process 4849 * of cancellation (also SIGKILL posted). 4850 */ 4851extern int sigprop[NSIG+1]; 4852int 4853nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked) 4854{ 4855 proc_t p; 4856 int error = 0; 4857 4858 if (nmp == NULL) 4859 return (ENXIO); 4860 4861 if (req && (req->r_flags & R_SOFTTERM)) 4862 return (ETIMEDOUT); /* request has been terminated. */ 4863 if (req && (req->r_flags & R_NOINTR)) 4864 thd = NULL; /* don't check for signal on R_NOINTR */ 4865 4866 if (!nmplocked) 4867 lck_mtx_lock(&nmp->nm_lock); 4868 if (nmp->nm_state & NFSSTA_FORCE) { 4869 /* If a force unmount is in progress then fail. */ 4870 error = EIO; 4871 } else if (nmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) { 4872 /* Someone is unmounting us, go soft and mark it. */ 4873 NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT); 4874 nmp->nm_state |= NFSSTA_FORCE; 4875 } 4876 4877 /* Check if the mount is marked dead. */ 4878 if (!error && (nmp->nm_state & NFSSTA_DEAD)) 4879 error = ENXIO; 4880 4881 /* 4882 * If the mount is hung and we've requested not to hang 4883 * on remote filesystems, then bail now. 4884 */ 4885 if (current_proc() != kernproc && 4886 !error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd)) 4887 error = EIO; 4888 4889 if (!nmplocked) 4890 lck_mtx_unlock(&nmp->nm_lock); 4891 if (error) 4892 return (error); 4893 4894 /* may not have a thread for async I/O */ 4895 if (thd == NULL || current_proc() == kernproc) 4896 return (0); 4897 4898 /* 4899 * Check if the process is aborted, but don't interrupt if we 4900 * were killed by a signal and this is the exiting thread which 4901 * is attempting to dump core. 4902 */ 4903 if (((p = current_proc()) != kernproc) && current_thread_aborted() && 4904 (!(p->p_acflag & AXSIG) || (p->exit_thread != current_thread()) || 4905 (p->p_sigacts == NULL) || 4906 (p->p_sigacts->ps_sig < 1) || (p->p_sigacts->ps_sig > NSIG) || 4907 !(sigprop[p->p_sigacts->ps_sig] & SA_CORE))) 4908 return (EINTR); 4909 4910 /* mask off thread and process blocked signals. */ 4911 if (NMFLAG(nmp, INTR) && ((p = get_bsdthreadtask_info(thd))) && 4912 proc_pendingsignals(p, NFSINT_SIGMASK)) 4913 return (EINTR); 4914 return (0); 4915} 4916 4917/* 4918 * Lock a socket against others. 4919 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 4920 * and also to avoid race conditions between the processes with nfs requests 4921 * in progress when a reconnect is necessary. 4922 */ 4923int 4924nfs_sndlock(struct nfsreq *req) 4925{ 4926 struct nfsmount *nmp = req->r_nmp; 4927 int *statep; 4928 int error = 0, slpflag = 0; 4929 struct timespec ts = { 0, 0 }; 4930 4931 if (nmp == NULL) 4932 return (ENXIO); 4933 4934 lck_mtx_lock(&nmp->nm_lock); 4935 statep = &nmp->nm_state; 4936 4937 if (NMFLAG(nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) 4938 slpflag = PCATCH; 4939 while (*statep & NFSSTA_SNDLOCK) { 4940 if ((error = nfs_sigintr(nmp, req, req->r_thread, 1))) 4941 break; 4942 *statep |= NFSSTA_WANTSND; 4943 if (nfs_noremotehang(req->r_thread)) 4944 ts.tv_sec = 1; 4945 msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck", &ts); 4946 if (slpflag == PCATCH) { 4947 slpflag = 0; 4948 ts.tv_sec = 2; 4949 } 4950 } 4951 if (!error) 4952 *statep |= NFSSTA_SNDLOCK; 4953 lck_mtx_unlock(&nmp->nm_lock); 4954 return (error); 4955} 4956 4957/* 4958 * Unlock the stream socket for others. 4959 */ 4960void 4961nfs_sndunlock(struct nfsreq *req) 4962{ 4963 struct nfsmount *nmp = req->r_nmp; 4964 int *statep, wake = 0; 4965 4966 if (nmp == NULL) 4967 return; 4968 lck_mtx_lock(&nmp->nm_lock); 4969 statep = &nmp->nm_state; 4970 if ((*statep & NFSSTA_SNDLOCK) == 0) 4971 panic("nfs sndunlock"); 4972 *statep &= ~(NFSSTA_SNDLOCK|NFSSTA_SENDING); 4973 if (*statep & NFSSTA_WANTSND) { 4974 *statep &= ~NFSSTA_WANTSND; 4975 wake = 1; 4976 } 4977 lck_mtx_unlock(&nmp->nm_lock); 4978 if (wake) 4979 wakeup(statep); 4980} 4981 4982int 4983nfs_aux_request( 4984 struct nfsmount *nmp, 4985 thread_t thd, 4986 struct sockaddr *saddr, 4987 socket_t so, 4988 int sotype, 4989 mbuf_t mreq, 4990 uint32_t xid, 4991 int bindresv, 4992 int timeo, 4993 struct nfsm_chain *nmrep) 4994{ 4995 int error = 0, on = 1, try, sendat = 2, soproto, recv, optlen, restoreto = 0; 4996 socket_t newso = NULL; 4997 struct sockaddr_storage ss; 4998 struct timeval orig_rcvto, orig_sndto, tv = { 1, 0 }; 4999 mbuf_t m, mrep = NULL; 5000 struct msghdr msg; 5001 uint32_t rxid = 0, reply = 0, reply_status, rejected_status; 5002 uint32_t verf_type, verf_len, accepted_status; 5003 size_t readlen, sentlen; 5004 struct nfs_rpc_record_state nrrs; 5005 5006 if (!so) { 5007 /* create socket and set options */ 5008 soproto = (sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP; 5009 if ((error = sock_socket(saddr->sa_family, sotype, soproto, NULL, NULL, &newso))) 5010 goto nfsmout; 5011 5012 if (bindresv) { 5013 int level = (saddr->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6; 5014 int optname = (saddr->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE; 5015 int portrange = IP_PORTRANGE_LOW; 5016 error = sock_setsockopt(newso, level, optname, &portrange, sizeof(portrange)); 5017 nfsmout_if(error); 5018 ss.ss_len = saddr->sa_len; 5019 ss.ss_family = saddr->sa_family; 5020 if (ss.ss_family == AF_INET) { 5021 ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY; 5022 ((struct sockaddr_in*)&ss)->sin_port = htons(0); 5023 } else if (ss.ss_family == AF_INET6) { 5024 ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any; 5025 ((struct sockaddr_in6*)&ss)->sin6_port = htons(0); 5026 } else { 5027 error = EINVAL; 5028 } 5029 if (!error) 5030 error = sock_bind(newso, (struct sockaddr *)&ss); 5031 nfsmout_if(error); 5032 } 5033 5034 if (sotype == SOCK_STREAM) { 5035 on = 4; /* don't wait too long for the socket to connect */ 5036 sock_setsockopt(newso, IPPROTO_TCP, TCP_CONNECTIONTIMEOUT, &on, sizeof(on)); 5037 error = sock_connect(newso, saddr, 0); 5038 nfsmout_if(error); 5039 } 5040 if (((error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) || 5041 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)))) || 5042 ((error = sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on))))) 5043 goto nfsmout; 5044 so = newso; 5045 } else { 5046 /* make sure socket is using a one second timeout in this function */ 5047 optlen = sizeof(orig_rcvto); 5048 error = sock_getsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, &optlen); 5049 if (!error) { 5050 optlen = sizeof(orig_sndto); 5051 error = sock_getsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, &optlen); 5052 } 5053 if (!error) { 5054 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); 5055 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); 5056 restoreto = 1; 5057 } 5058 } 5059 5060 if (sotype == SOCK_STREAM) { 5061 sendat = 0; /* we only resend the request for UDP */ 5062 nfs_rpc_record_state_init(&nrrs); 5063 } 5064 5065 for (try=0; try < timeo; try++) { 5066 if ((error = nfs_sigintr(nmp, NULL, !try ? NULL : thd, 0))) 5067 break; 5068 if (!try || (try == sendat)) { 5069 /* send the request (resending periodically for UDP) */ 5070 if ((error = mbuf_copym(mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m))) 5071 goto nfsmout; 5072 bzero(&msg, sizeof(msg)); 5073 if ((sotype == SOCK_DGRAM) && !sock_isconnected(so)) { 5074 msg.msg_name = saddr; 5075 msg.msg_namelen = saddr->sa_len; 5076 } 5077 if ((error = sock_sendmbuf(so, &msg, m, 0, &sentlen))) 5078 goto nfsmout; 5079 sendat *= 2; 5080 if (sendat > 30) 5081 sendat = 30; 5082 } 5083 /* wait for the response */ 5084 if (sotype == SOCK_STREAM) { 5085 /* try to read (more of) record */ 5086 error = nfs_rpc_record_read(so, &nrrs, 0, &recv, &mrep); 5087 /* if we don't have the whole record yet, we'll keep trying */ 5088 } else { 5089 readlen = 1<<18; 5090 bzero(&msg, sizeof(msg)); 5091 error = sock_receivembuf(so, &msg, &mrep, 0, &readlen); 5092 } 5093 if (error == EWOULDBLOCK) 5094 continue; 5095 nfsmout_if(error); 5096 /* parse the response */ 5097 nfsm_chain_dissect_init(error, nmrep, mrep); 5098 nfsm_chain_get_32(error, nmrep, rxid); 5099 nfsm_chain_get_32(error, nmrep, reply); 5100 nfsmout_if(error); 5101 if ((rxid != xid) || (reply != RPC_REPLY)) 5102 error = EBADRPC; 5103 nfsm_chain_get_32(error, nmrep, reply_status); 5104 nfsmout_if(error); 5105 if (reply_status == RPC_MSGDENIED) { 5106 nfsm_chain_get_32(error, nmrep, rejected_status); 5107 nfsmout_if(error); 5108 error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES; 5109 goto nfsmout; 5110 } 5111 nfsm_chain_get_32(error, nmrep, verf_type); /* verifier flavor */ 5112 nfsm_chain_get_32(error, nmrep, verf_len); /* verifier length */ 5113 nfsmout_if(error); 5114 if (verf_len) 5115 nfsm_chain_adv(error, nmrep, nfsm_rndup(verf_len)); 5116 nfsm_chain_get_32(error, nmrep, accepted_status); 5117 nfsmout_if(error); 5118 switch (accepted_status) { 5119 case RPC_SUCCESS: 5120 error = 0; 5121 break; 5122 case RPC_PROGUNAVAIL: 5123 error = EPROGUNAVAIL; 5124 break; 5125 case RPC_PROGMISMATCH: 5126 error = EPROGMISMATCH; 5127 break; 5128 case RPC_PROCUNAVAIL: 5129 error = EPROCUNAVAIL; 5130 break; 5131 case RPC_GARBAGE: 5132 error = EBADRPC; 5133 break; 5134 case RPC_SYSTEM_ERR: 5135 default: 5136 error = EIO; 5137 break; 5138 } 5139 break; 5140 } 5141nfsmout: 5142 if (restoreto) { 5143 sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, sizeof(tv)); 5144 sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, sizeof(tv)); 5145 } 5146 if (newso) { 5147 sock_shutdown(newso, SHUT_RDWR); 5148 sock_close(newso); 5149 } 5150 mbuf_freem(mreq); 5151 return (error); 5152} 5153 5154int 5155nfs_portmap_lookup( 5156 struct nfsmount *nmp, 5157 vfs_context_t ctx, 5158 struct sockaddr *sa, 5159 socket_t so, 5160 uint32_t protocol, 5161 uint32_t vers, 5162 uint32_t ipproto, 5163 int timeo) 5164{ 5165 thread_t thd = vfs_context_thread(ctx); 5166 kauth_cred_t cred = vfs_context_ucred(ctx); 5167 struct sockaddr_storage ss; 5168 struct sockaddr *saddr = (struct sockaddr*)&ss; 5169 struct nfsm_chain nmreq, nmrep; 5170 mbuf_t mreq; 5171 int error = 0, ip, pmprog, pmvers, pmproc, ualen = 0; 5172 uint32_t port; 5173 uint64_t xid = 0; 5174 char uaddr[MAX_IPv6_STR_LEN+16]; 5175 5176 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len)); 5177 if (saddr->sa_family == AF_INET) { 5178 ip = 4; 5179 pmprog = PMAPPROG; 5180 pmvers = PMAPVERS; 5181 pmproc = PMAPPROC_GETPORT; 5182 } else if (saddr->sa_family == AF_INET6) { 5183 ip = 6; 5184 pmprog = RPCBPROG; 5185 pmvers = RPCBVERS4; 5186 pmproc = RPCBPROC_GETVERSADDR; 5187 } else { 5188 return (EINVAL); 5189 } 5190 nfsm_chain_null(&nmreq); 5191 nfsm_chain_null(&nmrep); 5192 5193tryagain: 5194 /* send portmapper request to get port/uaddr */ 5195 if (ip == 4) 5196 ((struct sockaddr_in*)saddr)->sin_port = htons(PMAPPORT); 5197 else 5198 ((struct sockaddr_in6*)saddr)->sin6_port = htons(PMAPPORT); 5199 nfsm_chain_build_alloc_init(error, &nmreq, 8*NFSX_UNSIGNED); 5200 nfsm_chain_add_32(error, &nmreq, protocol); 5201 nfsm_chain_add_32(error, &nmreq, vers); 5202 if (ip == 4) { 5203 nfsm_chain_add_32(error, &nmreq, ipproto); 5204 nfsm_chain_add_32(error, &nmreq, 0); 5205 } else { 5206 if (ipproto == IPPROTO_TCP) 5207 nfsm_chain_add_string(error, &nmreq, "tcp6", 4); 5208 else 5209 nfsm_chain_add_string(error, &nmreq, "udp6", 4); 5210 nfsm_chain_add_string(error, &nmreq, "", 0); /* uaddr */ 5211 nfsm_chain_add_string(error, &nmreq, "", 0); /* owner */ 5212 } 5213 nfsm_chain_build_done(error, &nmreq); 5214 nfsmout_if(error); 5215 error = nfsm_rpchead2(nmp, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM, 5216 pmprog, pmvers, pmproc, RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, 5217 &xid, &mreq); 5218 nfsmout_if(error); 5219 nmreq.nmc_mhead = NULL; 5220 error = nfs_aux_request(nmp, thd, saddr, so, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM, 5221 mreq, R_XID32(xid), 0, timeo, &nmrep); 5222 5223 /* grab port from portmap response */ 5224 if (ip == 4) { 5225 nfsm_chain_get_32(error, &nmrep, port); 5226 if (!error) 5227 ((struct sockaddr_in*)sa)->sin_port = htons(port); 5228 } else { 5229 /* get uaddr string and convert to sockaddr */ 5230 nfsm_chain_get_32(error, &nmrep, ualen); 5231 if (!error) { 5232 if (ualen > ((int)sizeof(uaddr)-1)) 5233 error = EIO; 5234 if (ualen < 1) { 5235 /* program is not available, just return a zero port */ 5236 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len)); 5237 ((struct sockaddr_in6*)saddr)->sin6_port = htons(0); 5238 } else { 5239 nfsm_chain_get_opaque(error, &nmrep, ualen, uaddr); 5240 if (!error) { 5241 uaddr[ualen] = '\0'; 5242 if (!nfs_uaddr2sockaddr(uaddr, saddr)) 5243 error = EIO; 5244 } 5245 } 5246 } 5247 if ((error == EPROGMISMATCH) || (error == EPROCUNAVAIL) || (error == EIO) || (error == EBADRPC)) { 5248 /* remote doesn't support rpcbind version or proc (or we couldn't parse uaddr) */ 5249 if (pmvers == RPCBVERS4) { 5250 /* fall back to v3 and GETADDR */ 5251 pmvers = RPCBVERS3; 5252 pmproc = RPCBPROC_GETADDR; 5253 nfsm_chain_cleanup(&nmreq); 5254 nfsm_chain_cleanup(&nmrep); 5255 bcopy(sa, saddr, min(sizeof(ss), sa->sa_len)); 5256 xid = 0; 5257 error = 0; 5258 goto tryagain; 5259 } 5260 } 5261 if (!error) 5262 bcopy(saddr, sa, min(saddr->sa_len, sa->sa_len)); 5263 } 5264nfsmout: 5265 nfsm_chain_cleanup(&nmreq); 5266 nfsm_chain_cleanup(&nmrep); 5267 return (error); 5268} 5269 5270int 5271nfs_msg(thread_t thd, 5272 const char *server, 5273 const char *msg, 5274 int error) 5275{ 5276 proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL; 5277 tpr_t tpr; 5278 5279 if (p) 5280 tpr = tprintf_open(p); 5281 else 5282 tpr = NULL; 5283 if (error) 5284 tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error); 5285 else 5286 tprintf(tpr, "nfs server %s: %s\n", server, msg); 5287 tprintf_close(tpr); 5288 return (0); 5289} 5290 5291#define NFS_SQUISH_MOBILE_ONLY 0x0001 /* Squish mounts only on mobile machines */ 5292#define NFS_SQUISH_AUTOMOUNTED_ONLY 0x0002 /* Squish mounts only if the are automounted */ 5293#define NFS_SQUISH_SOFT 0x0004 /* Treat all soft mounts as though they were on a mobile machine */ 5294#define NFS_SQUISH_QUICK 0x0008 /* Try to squish mounts more quickly. */ 5295#define NFS_SQUISH_SHUTDOWN 0x1000 /* Squish all mounts on shutdown. Currently not implemented */ 5296 5297uint32_t nfs_squishy_flags = NFS_SQUISH_MOBILE_ONLY | NFS_SQUISH_AUTOMOUNTED_ONLY | NFS_SQUISH_QUICK; 5298int32_t nfs_is_mobile; 5299 5300#define NFS_SQUISHY_DEADTIMEOUT 8 /* Dead time out for squishy mounts */ 5301#define NFS_SQUISHY_QUICKTIMEOUT 4 /* Quicker dead time out when nfs_squish_flags NFS_SQUISH_QUICK bit is set*/ 5302 5303/* 5304 * Could this mount be squished? 5305 */ 5306int 5307nfs_can_squish(struct nfsmount *nmp) 5308{ 5309 uint64_t flags = vfs_flags(nmp->nm_mountp); 5310 int softsquish = ((nfs_squishy_flags & NFS_SQUISH_SOFT) & NMFLAG(nmp, SOFT)); 5311 5312 if (!softsquish && (nfs_squishy_flags & NFS_SQUISH_MOBILE_ONLY) && nfs_is_mobile == 0) 5313 return (0); 5314 5315 if ((nfs_squishy_flags & NFS_SQUISH_AUTOMOUNTED_ONLY) && (flags & MNT_AUTOMOUNTED) == 0) 5316 return (0); 5317 5318 return (1); 5319} 5320 5321/* 5322 * NFS mounts default to "rw,hard" - but frequently on mobile clients 5323 * the mount may become "not responding". It's desirable to be able 5324 * to unmount these dead mounts, but only if there is no risk of 5325 * losing data or crashing applications. A "squishy" NFS mount is one 5326 * that can be force unmounted with little risk of harm. 5327 * 5328 * nfs_is_squishy checks if a mount is in a squishy state. A mount is 5329 * in a squishy state iff it is allowed to be squishy and there are no 5330 * dirty pages and there are no mmapped files and there are no files 5331 * open for write. Mounts are allowed to be squishy is controlled by 5332 * the settings of the nfs_squishy_flags and its mobility state. These 5333 * flags can be set by sysctls. 5334 * 5335 * If nfs_is_squishy determines that we are in a squishy state we will 5336 * update the current dead timeout to at least NFS_SQUISHY_DEADTIMEOUT 5337 * (or NFS_SQUISHY_QUICKTIMEOUT if NFS_SQUISH_QUICK is set) (see 5338 * above) or 1/8th of the mount's nm_deadtimeout value, otherwise we just 5339 * update the current dead timeout with the mount's nm_deadtimeout 5340 * value set at mount time. 5341 * 5342 * Assumes that nm_lock is held. 5343 * 5344 * Note this routine is racey, but its effects on setting the 5345 * dead timeout only have effects when we're in trouble and are likely 5346 * to stay that way. Since by default its only for automounted 5347 * volumes on mobile machines; this is a reasonable trade off between 5348 * data integrity and user experience. It can be disabled or set via 5349 * nfs.conf file. 5350 */ 5351 5352int 5353nfs_is_squishy(struct nfsmount *nmp) 5354{ 5355 mount_t mp = nmp->nm_mountp; 5356 int squishy = 0; 5357 int timeo = (nfs_squishy_flags & NFS_SQUISH_QUICK) ? NFS_SQUISHY_QUICKTIMEOUT : NFS_SQUISHY_DEADTIMEOUT; 5358 5359 NFS_SOCK_DBG("%s: nm_curdeadtiemout = %d, nfs_is_mobile = %d\n", 5360 vfs_statfs(mp)->f_mntfromname, nmp->nm_curdeadtimeout, nfs_is_mobile); 5361 5362 if (!nfs_can_squish(nmp)) 5363 goto out; 5364 5365 timeo = (nmp->nm_deadtimeout > timeo) ? max(nmp->nm_deadtimeout/8, timeo) : timeo; 5366 NFS_SOCK_DBG("nm_writers = %d nm_mappers = %d timeo = %d\n", nmp->nm_writers, nmp->nm_mappers, timeo); 5367 5368 if (nmp->nm_writers == 0 && nmp->nm_mappers == 0) { 5369 uint64_t flags = mp ? vfs_flags(mp) : 0; 5370 squishy = 1; 5371 5372 /* 5373 * Walk the nfs nodes and check for dirty buffers it we're not 5374 * RDONLY and we've not already been declared as squishy since 5375 * this can be a bit expensive. 5376 */ 5377 if (!(flags & MNT_RDONLY) && !(nmp->nm_state & NFSSTA_SQUISHY)) 5378 squishy = !nfs_mount_is_dirty(mp); 5379 } 5380 5381out: 5382 if (squishy) 5383 nmp->nm_state |= NFSSTA_SQUISHY; 5384 else 5385 nmp->nm_state &= ~NFSSTA_SQUISHY; 5386 5387 nmp->nm_curdeadtimeout = squishy ? timeo : nmp->nm_deadtimeout; 5388 5389 NFS_SOCK_DBG("nm_curdeadtimeout = %d\n", nmp->nm_curdeadtimeout); 5390 5391 return (squishy); 5392} 5393 5394/* 5395 * On a send operation, if we can't reach the server and we've got only one server to talk to 5396 * and NFS_SQUISH_QUICK flag is set and we are in a squishy state then mark the mount as dead 5397 * and ask to be forcibly unmounted. Return 1 if we're dead and 0 otherwise. 5398 */ 5399static int 5400nfs_is_dead_lock(int error, struct nfsmount *nmp) 5401{ 5402 if (nmp->nm_state & NFSSTA_DEAD) 5403 return (1); 5404 5405 if ((error != ENETUNREACH && error != EHOSTUNREACH && error != EADDRNOTAVAIL) || 5406 !(nmp->nm_locations.nl_numlocs == 1 && nmp->nm_locations.nl_locations[0]->nl_servcount == 1)) 5407 return (0); 5408 5409 if ((nfs_squishy_flags & NFS_SQUISH_QUICK) && nfs_is_squishy(nmp)) { 5410 printf("nfs_is_dead: nfs server %s: unreachable. Squished dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); 5411 nmp->nm_state |= NFSSTA_DEAD; 5412 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0); 5413 return (1); 5414 } 5415 return (0); 5416} 5417 5418int 5419nfs_is_dead(int error, struct nfsmount *nmp) 5420{ 5421 int is_dead; 5422 5423 lck_mtx_lock(&nmp->nm_lock); 5424 is_dead = nfs_is_dead_lock(error, nmp); 5425 lck_mtx_unlock(&nmp->nm_lock); 5426 5427 return (is_dead); 5428} 5429 5430void 5431nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg) 5432{ 5433 int timeoutmask, wasunresponsive, unresponsive, softnobrowse; 5434 uint32_t do_vfs_signal; 5435 struct timeval now; 5436 5437 if (nmp == NULL) 5438 return; 5439 5440 lck_mtx_lock(&nmp->nm_lock); 5441 5442 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; 5443 if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */ 5444 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO; 5445 wasunresponsive = (nmp->nm_state & timeoutmask); 5446 5447 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */ 5448 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE)); 5449 5450 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) 5451 nmp->nm_state |= NFSSTA_TIMEO; 5452 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) 5453 nmp->nm_state |= NFSSTA_LOCKTIMEO; 5454 if ((flags & NFSSTA_JUKEBOXTIMEO) && !(nmp->nm_state & NFSSTA_JUKEBOXTIMEO)) 5455 nmp->nm_state |= NFSSTA_JUKEBOXTIMEO; 5456 5457 unresponsive = (nmp->nm_state & timeoutmask); 5458 5459 nfs_is_squishy(nmp); 5460 5461 if (unresponsive && (nmp->nm_curdeadtimeout > 0)) { 5462 microuptime(&now); 5463 if (!wasunresponsive) { 5464 nmp->nm_deadto_start = now.tv_sec; 5465 nfs_mount_sock_thread_wake(nmp); 5466 } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_curdeadtimeout) { 5467 if (!(nmp->nm_state & NFSSTA_DEAD)) 5468 printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, 5469 (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : ""); 5470 nmp->nm_state |= NFSSTA_DEAD; 5471 } 5472 } 5473 lck_mtx_unlock(&nmp->nm_lock); 5474 5475 if (nmp->nm_state & NFSSTA_DEAD) 5476 do_vfs_signal = VQ_DEAD; 5477 else if (softnobrowse || wasunresponsive || !unresponsive) 5478 do_vfs_signal = 0; 5479 else 5480 do_vfs_signal = VQ_NOTRESP; 5481 if (do_vfs_signal) 5482 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, do_vfs_signal, 0); 5483 5484 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error); 5485} 5486 5487void 5488nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg) 5489{ 5490 int timeoutmask, wasunresponsive, unresponsive, softnobrowse; 5491 int do_vfs_signal; 5492 5493 if (nmp == NULL) 5494 return; 5495 5496 if (msg) 5497 nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0); 5498 5499 lck_mtx_lock(&nmp->nm_lock); 5500 5501 timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; 5502 if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */ 5503 timeoutmask &= ~NFSSTA_JUKEBOXTIMEO; 5504 wasunresponsive = (nmp->nm_state & timeoutmask); 5505 5506 /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */ 5507 softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE)); 5508 5509 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) 5510 nmp->nm_state &= ~NFSSTA_TIMEO; 5511 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) 5512 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 5513 if ((flags & NFSSTA_JUKEBOXTIMEO) && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO)) 5514 nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO; 5515 5516 unresponsive = (nmp->nm_state & timeoutmask); 5517 5518 nmp->nm_deadto_start = 0; 5519 nmp->nm_curdeadtimeout = nmp->nm_deadtimeout; 5520 nmp->nm_state &= ~NFSSTA_SQUISHY; 5521 lck_mtx_unlock(&nmp->nm_lock); 5522 5523 if (softnobrowse) 5524 do_vfs_signal = 0; 5525 else 5526 do_vfs_signal = (wasunresponsive && !unresponsive); 5527 if (do_vfs_signal) 5528 vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1); 5529} 5530 5531 5532#endif /* NFSCLIENT */ 5533 5534#if NFSSERVER 5535 5536/* 5537 * Generate the rpc reply header 5538 * siz arg. is used to decide if adding a cluster is worthwhile 5539 */ 5540int 5541nfsrv_rephead( 5542 struct nfsrv_descript *nd, 5543 __unused struct nfsrv_sock *slp, 5544 struct nfsm_chain *nmrepp, 5545 size_t siz) 5546{ 5547 mbuf_t mrep; 5548 u_int32_t *tl; 5549 struct nfsm_chain nmrep; 5550 int err, error; 5551 5552 err = nd->nd_repstat; 5553 if (err && (nd->nd_vers == NFS_VER2)) 5554 siz = 0; 5555 5556 /* 5557 * If this is a big reply, use a cluster else 5558 * try and leave leading space for the lower level headers. 5559 */ 5560 siz += RPC_REPLYSIZ; 5561 if (siz >= nfs_mbuf_minclsize) { 5562 error = mbuf_getpacket(MBUF_WAITOK, &mrep); 5563 } else { 5564 error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep); 5565 } 5566 if (error) { 5567 /* unable to allocate packet */ 5568 /* XXX should we keep statistics for these errors? */ 5569 return (error); 5570 } 5571 if (siz < nfs_mbuf_minclsize) { 5572 /* leave space for lower level headers */ 5573 tl = mbuf_data(mrep); 5574 tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */ 5575 mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED); 5576 } 5577 nfsm_chain_init(&nmrep, mrep); 5578 nfsm_chain_add_32(error, &nmrep, nd->nd_retxid); 5579 nfsm_chain_add_32(error, &nmrep, RPC_REPLY); 5580 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) { 5581 nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED); 5582 if (err & NFSERR_AUTHERR) { 5583 nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR); 5584 nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR)); 5585 } else { 5586 nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH); 5587 nfsm_chain_add_32(error, &nmrep, RPC_VER2); 5588 nfsm_chain_add_32(error, &nmrep, RPC_VER2); 5589 } 5590 } else { 5591 /* reply status */ 5592 nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED); 5593 if (nd->nd_gss_context != NULL) { 5594 /* RPCSEC_GSS verifier */ 5595 error = nfs_gss_svc_verf_put(nd, &nmrep); 5596 if (error) { 5597 nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR); 5598 goto done; 5599 } 5600 } else { 5601 /* RPCAUTH_NULL verifier */ 5602 nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL); 5603 nfsm_chain_add_32(error, &nmrep, 0); 5604 } 5605 /* accepted status */ 5606 switch (err) { 5607 case EPROGUNAVAIL: 5608 nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL); 5609 break; 5610 case EPROGMISMATCH: 5611 nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH); 5612 /* XXX hard coded versions? */ 5613 nfsm_chain_add_32(error, &nmrep, NFS_VER2); 5614 nfsm_chain_add_32(error, &nmrep, NFS_VER3); 5615 break; 5616 case EPROCUNAVAIL: 5617 nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL); 5618 break; 5619 case EBADRPC: 5620 nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE); 5621 break; 5622 default: 5623 nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS); 5624 if (nd->nd_gss_context != NULL) 5625 error = nfs_gss_svc_prepare_reply(nd, &nmrep); 5626 if (err != NFSERR_RETVOID) 5627 nfsm_chain_add_32(error, &nmrep, 5628 (err ? nfsrv_errmap(nd, err) : 0)); 5629 break; 5630 } 5631 } 5632 5633done: 5634 nfsm_chain_build_done(error, &nmrep); 5635 if (error) { 5636 /* error composing reply header */ 5637 /* XXX should we keep statistics for these errors? */ 5638 mbuf_freem(mrep); 5639 return (error); 5640 } 5641 5642 *nmrepp = nmrep; 5643 if ((err != 0) && (err != NFSERR_RETVOID)) 5644 OSAddAtomic64(1, &nfsstats.srvrpc_errs); 5645 return (0); 5646} 5647 5648/* 5649 * The nfs server send routine. 5650 * 5651 * - return EINTR or ERESTART if interrupted by a signal 5652 * - return EPIPE if a connection is lost for connection based sockets (TCP...) 5653 * - do any cleanup required by recoverable socket errors (???) 5654 */ 5655int 5656nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top) 5657{ 5658 int error; 5659 socket_t so = slp->ns_so; 5660 struct sockaddr *sendnam; 5661 struct msghdr msg; 5662 5663 bzero(&msg, sizeof(msg)); 5664 if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) { 5665 if ((sendnam = mbuf_data(nam))) { 5666 msg.msg_name = (caddr_t)sendnam; 5667 msg.msg_namelen = sendnam->sa_len; 5668 } 5669 } 5670 error = sock_sendmbuf(so, &msg, top, 0, NULL); 5671 if (!error) 5672 return (0); 5673 log(LOG_INFO, "nfsd send error %d\n", error); 5674 5675 if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM)) 5676 error = EPIPE; /* zap TCP sockets if they time out on send */ 5677 5678 /* Handle any recoverable (soft) socket errors here. (???) */ 5679 if (error != EINTR && error != ERESTART && error != EIO && 5680 error != EWOULDBLOCK && error != EPIPE) 5681 error = 0; 5682 5683 return (error); 5684} 5685 5686/* 5687 * Socket upcall routine for the nfsd sockets. 5688 * The caddr_t arg is a pointer to the "struct nfsrv_sock". 5689 * Essentially do as much as possible non-blocking, else punt and it will 5690 * be called with MBUF_WAITOK from an nfsd. 5691 */ 5692void 5693nfsrv_rcv(socket_t so, void *arg, int waitflag) 5694{ 5695 struct nfsrv_sock *slp = arg; 5696 5697 if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID)) 5698 return; 5699 5700 lck_rw_lock_exclusive(&slp->ns_rwlock); 5701 nfsrv_rcv_locked(so, slp, waitflag); 5702 /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */ 5703} 5704void 5705nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag) 5706{ 5707 mbuf_t m, mp, mhck, m2; 5708 int ns_flag=0, error; 5709 struct msghdr msg; 5710 size_t bytes_read; 5711 5712 if ((slp->ns_flag & SLP_VALID) == 0) { 5713 if (waitflag == MBUF_DONTWAIT) 5714 lck_rw_done(&slp->ns_rwlock); 5715 return; 5716 } 5717 5718#ifdef notdef 5719 /* 5720 * Define this to test for nfsds handling this under heavy load. 5721 */ 5722 if (waitflag == MBUF_DONTWAIT) { 5723 ns_flag = SLP_NEEDQ; 5724 goto dorecs; 5725 } 5726#endif 5727 if (slp->ns_sotype == SOCK_STREAM) { 5728 /* 5729 * If there are already records on the queue, defer soreceive() 5730 * to an(other) nfsd so that there is feedback to the TCP layer that 5731 * the nfs servers are heavily loaded. 5732 */ 5733 if (slp->ns_rec) { 5734 ns_flag = SLP_NEEDQ; 5735 goto dorecs; 5736 } 5737 5738 /* 5739 * Do soreceive(). 5740 */ 5741 bytes_read = 1000000000; 5742 error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read); 5743 if (error || mp == NULL) { 5744 if (error == EWOULDBLOCK) 5745 ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0; 5746 else 5747 ns_flag = SLP_DISCONN; 5748 goto dorecs; 5749 } 5750 m = mp; 5751 if (slp->ns_rawend) { 5752 if ((error = mbuf_setnext(slp->ns_rawend, m))) 5753 panic("nfsrv_rcv: mbuf_setnext failed %d\n", error); 5754 slp->ns_cc += bytes_read; 5755 } else { 5756 slp->ns_raw = m; 5757 slp->ns_cc = bytes_read; 5758 } 5759 while ((m2 = mbuf_next(m))) 5760 m = m2; 5761 slp->ns_rawend = m; 5762 5763 /* 5764 * Now try and parse record(s) out of the raw stream data. 5765 */ 5766 error = nfsrv_getstream(slp, waitflag); 5767 if (error) { 5768 if (error == EPERM) 5769 ns_flag = SLP_DISCONN; 5770 else 5771 ns_flag = SLP_NEEDQ; 5772 } 5773 } else { 5774 struct sockaddr_storage nam; 5775 5776 if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) { 5777 /* already have max # RPC records queued on this socket */ 5778 ns_flag = SLP_NEEDQ; 5779 goto dorecs; 5780 } 5781 5782 bzero(&msg, sizeof(msg)); 5783 msg.msg_name = (caddr_t)&nam; 5784 msg.msg_namelen = sizeof(nam); 5785 5786 do { 5787 bytes_read = 1000000000; 5788 error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read); 5789 if (mp) { 5790 if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) { 5791 mbuf_setlen(mhck, nam.ss_len); 5792 bcopy(&nam, mbuf_data(mhck), nam.ss_len); 5793 m = mhck; 5794 if (mbuf_setnext(m, mp)) { 5795 /* trouble... just drop it */ 5796 printf("nfsrv_rcv: mbuf_setnext failed\n"); 5797 mbuf_free(mhck); 5798 m = mp; 5799 } 5800 } else { 5801 m = mp; 5802 } 5803 if (slp->ns_recend) 5804 mbuf_setnextpkt(slp->ns_recend, m); 5805 else { 5806 slp->ns_rec = m; 5807 slp->ns_flag |= SLP_DOREC; 5808 } 5809 slp->ns_recend = m; 5810 mbuf_setnextpkt(m, NULL); 5811 slp->ns_reccnt++; 5812 } 5813 } while (mp); 5814 } 5815 5816 /* 5817 * Now try and process the request records, non-blocking. 5818 */ 5819dorecs: 5820 if (ns_flag) 5821 slp->ns_flag |= ns_flag; 5822 if (waitflag == MBUF_DONTWAIT) { 5823 int wake = (slp->ns_flag & SLP_WORKTODO); 5824 lck_rw_done(&slp->ns_rwlock); 5825 if (wake && nfsd_thread_count) { 5826 lck_mtx_lock(nfsd_mutex); 5827 nfsrv_wakenfsd(slp); 5828 lck_mtx_unlock(nfsd_mutex); 5829 } 5830 } 5831} 5832 5833/* 5834 * Try and extract an RPC request from the mbuf data list received on a 5835 * stream socket. The "waitflag" argument indicates whether or not it 5836 * can sleep. 5837 */ 5838int 5839nfsrv_getstream(struct nfsrv_sock *slp, int waitflag) 5840{ 5841 mbuf_t m; 5842 char *cp1, *cp2, *mdata; 5843 int len, mlen, error; 5844 mbuf_t om, m2, recm; 5845 u_int32_t recmark; 5846 5847 if (slp->ns_flag & SLP_GETSTREAM) 5848 panic("nfs getstream"); 5849 slp->ns_flag |= SLP_GETSTREAM; 5850 for (;;) { 5851 if (slp->ns_reclen == 0) { 5852 if (slp->ns_cc < NFSX_UNSIGNED) { 5853 slp->ns_flag &= ~SLP_GETSTREAM; 5854 return (0); 5855 } 5856 m = slp->ns_raw; 5857 mdata = mbuf_data(m); 5858 mlen = mbuf_len(m); 5859 if (mlen >= NFSX_UNSIGNED) { 5860 bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED); 5861 mdata += NFSX_UNSIGNED; 5862 mlen -= NFSX_UNSIGNED; 5863 mbuf_setdata(m, mdata, mlen); 5864 } else { 5865 cp1 = (caddr_t)&recmark; 5866 cp2 = mdata; 5867 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { 5868 while (mlen == 0) { 5869 m = mbuf_next(m); 5870 cp2 = mbuf_data(m); 5871 mlen = mbuf_len(m); 5872 } 5873 *cp1++ = *cp2++; 5874 mlen--; 5875 mbuf_setdata(m, cp2, mlen); 5876 } 5877 } 5878 slp->ns_cc -= NFSX_UNSIGNED; 5879 recmark = ntohl(recmark); 5880 slp->ns_reclen = recmark & ~0x80000000; 5881 if (recmark & 0x80000000) 5882 slp->ns_flag |= SLP_LASTFRAG; 5883 else 5884 slp->ns_flag &= ~SLP_LASTFRAG; 5885 if (slp->ns_reclen <= 0 || slp->ns_reclen > NFS_MAXPACKET) { 5886 slp->ns_flag &= ~SLP_GETSTREAM; 5887 return (EPERM); 5888 } 5889 } 5890 5891 /* 5892 * Now get the record part. 5893 * 5894 * Note that slp->ns_reclen may be 0. Linux sometimes 5895 * generates 0-length RPCs 5896 */ 5897 recm = NULL; 5898 if (slp->ns_cc == slp->ns_reclen) { 5899 recm = slp->ns_raw; 5900 slp->ns_raw = slp->ns_rawend = NULL; 5901 slp->ns_cc = slp->ns_reclen = 0; 5902 } else if (slp->ns_cc > slp->ns_reclen) { 5903 len = 0; 5904 m = slp->ns_raw; 5905 mlen = mbuf_len(m); 5906 mdata = mbuf_data(m); 5907 om = NULL; 5908 while (len < slp->ns_reclen) { 5909 if ((len + mlen) > slp->ns_reclen) { 5910 if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) { 5911 slp->ns_flag &= ~SLP_GETSTREAM; 5912 return (EWOULDBLOCK); 5913 } 5914 if (om) { 5915 if (mbuf_setnext(om, m2)) { 5916 /* trouble... just drop it */ 5917 printf("nfsrv_getstream: mbuf_setnext failed\n"); 5918 mbuf_freem(m2); 5919 slp->ns_flag &= ~SLP_GETSTREAM; 5920 return (EWOULDBLOCK); 5921 } 5922 recm = slp->ns_raw; 5923 } else { 5924 recm = m2; 5925 } 5926 mdata += slp->ns_reclen - len; 5927 mlen -= slp->ns_reclen - len; 5928 mbuf_setdata(m, mdata, mlen); 5929 len = slp->ns_reclen; 5930 } else if ((len + mlen) == slp->ns_reclen) { 5931 om = m; 5932 len += mlen; 5933 m = mbuf_next(m); 5934 recm = slp->ns_raw; 5935 if (mbuf_setnext(om, NULL)) { 5936 printf("nfsrv_getstream: mbuf_setnext failed 2\n"); 5937 slp->ns_flag &= ~SLP_GETSTREAM; 5938 return (EWOULDBLOCK); 5939 } 5940 mlen = mbuf_len(m); 5941 mdata = mbuf_data(m); 5942 } else { 5943 om = m; 5944 len += mlen; 5945 m = mbuf_next(m); 5946 mlen = mbuf_len(m); 5947 mdata = mbuf_data(m); 5948 } 5949 } 5950 slp->ns_raw = m; 5951 slp->ns_cc -= len; 5952 slp->ns_reclen = 0; 5953 } else { 5954 slp->ns_flag &= ~SLP_GETSTREAM; 5955 return (0); 5956 } 5957 5958 /* 5959 * Accumulate the fragments into a record. 5960 */ 5961 if (slp->ns_frag == NULL) { 5962 slp->ns_frag = recm; 5963 } else { 5964 m = slp->ns_frag; 5965 while ((m2 = mbuf_next(m))) 5966 m = m2; 5967 if ((error = mbuf_setnext(m, recm))) 5968 panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error); 5969 } 5970 if (slp->ns_flag & SLP_LASTFRAG) { 5971 if (slp->ns_recend) 5972 mbuf_setnextpkt(slp->ns_recend, slp->ns_frag); 5973 else { 5974 slp->ns_rec = slp->ns_frag; 5975 slp->ns_flag |= SLP_DOREC; 5976 } 5977 slp->ns_recend = slp->ns_frag; 5978 slp->ns_frag = NULL; 5979 } 5980 } 5981} 5982 5983/* 5984 * Parse an RPC header. 5985 */ 5986int 5987nfsrv_dorec( 5988 struct nfsrv_sock *slp, 5989 struct nfsd *nfsd, 5990 struct nfsrv_descript **ndp) 5991{ 5992 mbuf_t m; 5993 mbuf_t nam; 5994 struct nfsrv_descript *nd; 5995 int error = 0; 5996 5997 *ndp = NULL; 5998 if (!(slp->ns_flag & (SLP_VALID|SLP_DOREC)) || (slp->ns_rec == NULL)) 5999 return (ENOBUFS); 6000 MALLOC_ZONE(nd, struct nfsrv_descript *, 6001 sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK); 6002 if (!nd) 6003 return (ENOMEM); 6004 m = slp->ns_rec; 6005 slp->ns_rec = mbuf_nextpkt(m); 6006 if (slp->ns_rec) 6007 mbuf_setnextpkt(m, NULL); 6008 else { 6009 slp->ns_flag &= ~SLP_DOREC; 6010 slp->ns_recend = NULL; 6011 } 6012 slp->ns_reccnt--; 6013 if (mbuf_type(m) == MBUF_TYPE_SONAME) { 6014 nam = m; 6015 m = mbuf_next(m); 6016 if ((error = mbuf_setnext(nam, NULL))) 6017 panic("nfsrv_dorec: mbuf_setnext failed %d\n", error); 6018 } else 6019 nam = NULL; 6020 nd->nd_nam2 = nam; 6021 nfsm_chain_dissect_init(error, &nd->nd_nmreq, m); 6022 if (!error) 6023 error = nfsrv_getreq(nd); 6024 if (error) { 6025 if (nam) 6026 mbuf_freem(nam); 6027 if (nd->nd_gss_context) 6028 nfs_gss_svc_ctx_deref(nd->nd_gss_context); 6029 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); 6030 return (error); 6031 } 6032 nd->nd_mrep = NULL; 6033 *ndp = nd; 6034 nfsd->nfsd_nd = nd; 6035 return (0); 6036} 6037 6038/* 6039 * Parse an RPC request 6040 * - verify it 6041 * - fill in the cred struct. 6042 */ 6043int 6044nfsrv_getreq(struct nfsrv_descript *nd) 6045{ 6046 struct nfsm_chain *nmreq; 6047 int len, i; 6048 u_int32_t nfsvers, auth_type; 6049 int error = 0; 6050 uid_t user_id; 6051 gid_t group_id; 6052 int ngroups; 6053 uint32_t val; 6054 6055 nd->nd_cr = NULL; 6056 nd->nd_gss_context = NULL; 6057 nd->nd_gss_seqnum = 0; 6058 nd->nd_gss_mb = NULL; 6059 6060 user_id = group_id = -2; 6061 val = auth_type = len = 0; 6062 6063 nmreq = &nd->nd_nmreq; 6064 nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID 6065 nfsm_chain_get_32(error, nmreq, val); // RPC Call 6066 if (!error && (val != RPC_CALL)) 6067 error = EBADRPC; 6068 nfsmout_if(error); 6069 nd->nd_repstat = 0; 6070 nfsm_chain_get_32(error, nmreq, val); // RPC Version 6071 nfsmout_if(error); 6072 if (val != RPC_VER2) { 6073 nd->nd_repstat = ERPCMISMATCH; 6074 nd->nd_procnum = NFSPROC_NOOP; 6075 return (0); 6076 } 6077 nfsm_chain_get_32(error, nmreq, val); // RPC Program Number 6078 nfsmout_if(error); 6079 if (val != NFS_PROG) { 6080 nd->nd_repstat = EPROGUNAVAIL; 6081 nd->nd_procnum = NFSPROC_NOOP; 6082 return (0); 6083 } 6084 nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number 6085 nfsmout_if(error); 6086 if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) { 6087 nd->nd_repstat = EPROGMISMATCH; 6088 nd->nd_procnum = NFSPROC_NOOP; 6089 return (0); 6090 } 6091 nd->nd_vers = nfsvers; 6092 nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number 6093 nfsmout_if(error); 6094 if ((nd->nd_procnum >= NFS_NPROCS) || 6095 ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) { 6096 nd->nd_repstat = EPROCUNAVAIL; 6097 nd->nd_procnum = NFSPROC_NOOP; 6098 return (0); 6099 } 6100 if (nfsvers != NFS_VER3) 6101 nd->nd_procnum = nfsv3_procid[nd->nd_procnum]; 6102 nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor 6103 nfsm_chain_get_32(error, nmreq, len); // Auth Length 6104 if (!error && (len < 0 || len > RPCAUTH_MAXSIZ)) 6105 error = EBADRPC; 6106 nfsmout_if(error); 6107 6108 /* Handle authentication */ 6109 if (auth_type == RPCAUTH_SYS) { 6110 struct posix_cred temp_pcred; 6111 if (nd->nd_procnum == NFSPROC_NULL) 6112 return (0); 6113 nd->nd_sec = RPCAUTH_SYS; 6114 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp 6115 nfsm_chain_get_32(error, nmreq, len); // hostname length 6116 if (len < 0 || len > NFS_MAXNAMLEN) 6117 error = EBADRPC; 6118 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname 6119 nfsmout_if(error); 6120 6121 /* create a temporary credential using the bits from the wire */ 6122 bzero(&temp_pcred, sizeof(temp_pcred)); 6123 nfsm_chain_get_32(error, nmreq, user_id); 6124 nfsm_chain_get_32(error, nmreq, group_id); 6125 temp_pcred.cr_groups[0] = group_id; 6126 nfsm_chain_get_32(error, nmreq, len); // extra GID count 6127 if ((len < 0) || (len > RPCAUTH_UNIXGIDS)) 6128 error = EBADRPC; 6129 nfsmout_if(error); 6130 for (i = 1; i <= len; i++) 6131 if (i < NGROUPS) 6132 nfsm_chain_get_32(error, nmreq, temp_pcred.cr_groups[i]); 6133 else 6134 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); 6135 nfsmout_if(error); 6136 ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 6137 if (ngroups > 1) 6138 nfsrv_group_sort(&temp_pcred.cr_groups[0], ngroups); 6139 nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE) 6140 nfsm_chain_get_32(error, nmreq, len); // verifier length 6141 if (len < 0 || len > RPCAUTH_MAXSIZ) 6142 error = EBADRPC; 6143 if (len > 0) 6144 nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); 6145 6146 /* request creation of a real credential */ 6147 temp_pcred.cr_uid = user_id; 6148 temp_pcred.cr_ngroups = ngroups; 6149 nd->nd_cr = posix_cred_create(&temp_pcred); 6150 if (nd->nd_cr == NULL) { 6151 nd->nd_repstat = ENOMEM; 6152 nd->nd_procnum = NFSPROC_NOOP; 6153 return (0); 6154 } 6155 } else if (auth_type == RPCSEC_GSS) { 6156 error = nfs_gss_svc_cred_get(nd, nmreq); 6157 if (error) { 6158 if (error == EINVAL) 6159 goto nfsmout; // drop the request 6160 nd->nd_repstat = error; 6161 nd->nd_procnum = NFSPROC_NOOP; 6162 return (0); 6163 } 6164 } else { 6165 if (nd->nd_procnum == NFSPROC_NULL) // assume it's AUTH_NONE 6166 return (0); 6167 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED); 6168 nd->nd_procnum = NFSPROC_NOOP; 6169 return (0); 6170 } 6171 return (0); 6172nfsmout: 6173 if (IS_VALID_CRED(nd->nd_cr)) 6174 kauth_cred_unref(&nd->nd_cr); 6175 nfsm_chain_cleanup(nmreq); 6176 return (error); 6177} 6178 6179/* 6180 * Search for a sleeping nfsd and wake it up. 6181 * SIDE EFFECT: If none found, make sure the socket is queued up so that one 6182 * of the running nfsds will go look for the work in the nfsrv_sockwait list. 6183 * Note: Must be called with nfsd_mutex held. 6184 */ 6185void 6186nfsrv_wakenfsd(struct nfsrv_sock *slp) 6187{ 6188 struct nfsd *nd; 6189 6190 if ((slp->ns_flag & SLP_VALID) == 0) 6191 return; 6192 6193 lck_rw_lock_exclusive(&slp->ns_rwlock); 6194 /* if there's work to do on this socket, make sure it's queued up */ 6195 if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) { 6196 TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq); 6197 slp->ns_flag |= SLP_WAITQ; 6198 } 6199 lck_rw_done(&slp->ns_rwlock); 6200 6201 /* wake up a waiting nfsd, if possible */ 6202 nd = TAILQ_FIRST(&nfsd_queue); 6203 if (!nd) 6204 return; 6205 6206 TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue); 6207 nd->nfsd_flag &= ~NFSD_WAITING; 6208 wakeup(nd); 6209} 6210 6211#endif /* NFSSERVER */ 6212