1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1982, 1986, 1988, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 61 * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.51.2.9 2001/08/22 00:59:12 silby Exp $ 62 */ 63 64 65#include <sys/param.h> 66#include <sys/systm.h> 67#include <sys/kernel.h> 68#include <sys/sysctl.h> 69#include <sys/mbuf.h> 70#if INET6 71#include <sys/domain.h> 72#endif /* INET6 */ 73#include <sys/kasl.h> 74#include <sys/socket.h> 75#include <sys/socketvar.h> 76#include <sys/protosw.h> 77#include <sys/syslog.h> 78 79#include <net/if.h> 80#include <net/route.h> 81#include <net/ntstat.h> 82 83#include <netinet/in.h> 84#include <netinet/in_systm.h> 85#if INET6 86#include <netinet/ip6.h> 87#endif 88#include <netinet/in_pcb.h> 89#if INET6 90#include <netinet6/in6_pcb.h> 91#endif 92#include <netinet/in_var.h> 93#include <netinet/ip_var.h> 94#if INET6 95#include <netinet6/ip6_var.h> 96#endif 97#include <netinet/tcp.h> 98#include <netinet/tcp_fsm.h> 99#include <netinet/tcp_seq.h> 100#include <netinet/tcp_timer.h> 101#include <netinet/tcp_var.h> 102#include <netinet/tcpip.h> 103#include <mach/sdt.h> 104#if TCPDEBUG 105#include <netinet/tcp_debug.h> 106#endif 107#if MPTCP 108#include <netinet/mptcp_var.h> 109#endif /* MPTCP */ 110 111#if IPSEC 112#include <netinet6/ipsec.h> 113#endif /*IPSEC*/ 114 115#if FLOW_DIVERT 116#include <netinet/flow_divert.h> 117#endif /* FLOW_DIVERT */ 118 119void tcp_fill_info(struct tcpcb *, struct tcp_info *); 120errno_t tcp_fill_info_for_info_tuple(struct info_tuple *, struct tcp_info *); 121 122int tcp_sysctl_info(struct sysctl_oid *, void *, int , struct sysctl_req *); 123 124/* 125 * TCP protocol interface to socket abstraction. 126 */ 127extern char *tcpstates[]; /* XXX ??? */ 128 129static int tcp_attach(struct socket *, struct proc *); 130static int tcp_connect(struct tcpcb *, struct sockaddr *, struct proc *); 131#if INET6 132static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct proc *); 133static int tcp6_usr_connect(struct socket *, struct sockaddr *, 134 struct proc *); 135#endif /* INET6 */ 136static struct tcpcb * 137 tcp_disconnect(struct tcpcb *); 138static struct tcpcb * 139 tcp_usrclosed(struct tcpcb *); 140 141extern uint32_t tcp_autorcvbuf_max; 142 143extern void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb); 144 145#if TCPDEBUG 146#define TCPDEBUG0 int ostate = 0 147#define TCPDEBUG1() ostate = tp ? tp->t_state : 0 148#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 149 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 150#else 151#define TCPDEBUG0 152#define TCPDEBUG1() 153#define TCPDEBUG2(req) 154#endif 155 156SYSCTL_PROC(_net_inet_tcp, OID_AUTO, info, 157 CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_KERN, 158 0 , 0, tcp_sysctl_info, "S", "TCP info per tuple"); 159 160/* 161 * TCP attaches to socket via pru_attach(), reserving space, 162 * and an internet control block. 163 * 164 * Returns: 0 Success 165 * EISCONN 166 * tcp_attach:ENOBUFS 167 * tcp_attach:ENOMEM 168 * tcp_attach:??? [IPSEC specific] 169 */ 170static int 171tcp_usr_attach(struct socket *so, __unused int proto, struct proc *p) 172{ 173 int error; 174 struct inpcb *inp = sotoinpcb(so); 175 struct tcpcb *tp = 0; 176 TCPDEBUG0; 177 178 TCPDEBUG1(); 179 if (inp) { 180 error = EISCONN; 181 goto out; 182 } 183 184 error = tcp_attach(so, p); 185 if (error) 186 goto out; 187 188 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 189 so->so_linger = TCP_LINGERTIME * hz; 190 tp = sototcpcb(so); 191out: 192 TCPDEBUG2(PRU_ATTACH); 193 return error; 194} 195 196/* 197 * pru_detach() detaches the TCP protocol from the socket. 198 * If the protocol state is non-embryonic, then can't 199 * do this directly: have to initiate a pru_disconnect(), 200 * which may finish later; embryonic TCB's can just 201 * be discarded here. 202 */ 203static int 204tcp_usr_detach(struct socket *so) 205{ 206 int error = 0; 207 struct inpcb *inp = sotoinpcb(so); 208 struct tcpcb *tp; 209 TCPDEBUG0; 210 211 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { 212 return EINVAL; /* XXX */ 213 } 214 lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); 215 tp = intotcpcb(inp); 216 /* In case we got disconnected from the peer */ 217 if (tp == NULL) 218 goto out; 219 TCPDEBUG1(); 220 221 calculate_tcp_clock(); 222 223 tp = tcp_disconnect(tp); 224out: 225 TCPDEBUG2(PRU_DETACH); 226 return error; 227} 228 229#define COMMON_START() TCPDEBUG0; \ 230do { \ 231 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \ 232 return (EINVAL); \ 233 if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) \ 234 return (EPROTOTYPE); \ 235 tp = intotcpcb(inp); \ 236 TCPDEBUG1(); \ 237 calculate_tcp_clock(); \ 238} while (0) 239 240#define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out 241 242 243/* 244 * Give the socket an address. 245 * 246 * Returns: 0 Success 247 * EINVAL Invalid argument [COMMON_START] 248 * EAFNOSUPPORT Address family not supported 249 * in_pcbbind:EADDRNOTAVAIL Address not available. 250 * in_pcbbind:EINVAL Invalid argument 251 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef] 252 * in_pcbbind:EACCES Permission denied 253 * in_pcbbind:EADDRINUSE Address in use 254 * in_pcbbind:EAGAIN Resource unavailable, try again 255 * in_pcbbind:EPERM Operation not permitted 256 */ 257static int 258tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p) 259{ 260 int error = 0; 261 struct inpcb *inp = sotoinpcb(so); 262 struct tcpcb *tp; 263 struct sockaddr_in *sinp; 264 265 COMMON_START(); 266 267 if (nam->sa_family != 0 && nam->sa_family != AF_INET) { 268 error = EAFNOSUPPORT; 269 goto out; 270 } 271 272 /* 273 * Must check for multicast addresses and disallow binding 274 * to them. 275 */ 276 sinp = (struct sockaddr_in *)(void *)nam; 277 if (sinp->sin_family == AF_INET && 278 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 279 error = EAFNOSUPPORT; 280 goto out; 281 } 282 error = in_pcbbind(inp, nam, p); 283 if (error) 284 goto out; 285 COMMON_END(PRU_BIND); 286 287} 288 289#if INET6 290static int 291tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p) 292{ 293 int error = 0; 294 struct inpcb *inp = sotoinpcb(so); 295 struct tcpcb *tp; 296 struct sockaddr_in6 *sin6p; 297 298 COMMON_START(); 299 300 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) { 301 error = EAFNOSUPPORT; 302 goto out; 303 } 304 305 /* 306 * Must check for multicast addresses and disallow binding 307 * to them. 308 */ 309 sin6p = (struct sockaddr_in6 *)(void *)nam; 310 if (sin6p->sin6_family == AF_INET6 && 311 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { 312 error = EAFNOSUPPORT; 313 goto out; 314 } 315 inp->inp_vflag &= ~INP_IPV4; 316 inp->inp_vflag |= INP_IPV6; 317 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 318 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 319 inp->inp_vflag |= INP_IPV4; 320 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 321 struct sockaddr_in sin; 322 323 in6_sin6_2_sin(&sin, sin6p); 324 inp->inp_vflag |= INP_IPV4; 325 inp->inp_vflag &= ~INP_IPV6; 326 error = in_pcbbind(inp, (struct sockaddr *)&sin, p); 327 goto out; 328 } 329 } 330 error = in6_pcbbind(inp, nam, p); 331 if (error) 332 goto out; 333 COMMON_END(PRU_BIND); 334} 335#endif /* INET6 */ 336 337/* 338 * Prepare to accept connections. 339 * 340 * Returns: 0 Success 341 * EINVAL [COMMON_START] 342 * in_pcbbind:EADDRNOTAVAIL Address not available. 343 * in_pcbbind:EINVAL Invalid argument 344 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef] 345 * in_pcbbind:EACCES Permission denied 346 * in_pcbbind:EADDRINUSE Address in use 347 * in_pcbbind:EAGAIN Resource unavailable, try again 348 * in_pcbbind:EPERM Operation not permitted 349 */ 350static int 351tcp_usr_listen(struct socket *so, struct proc *p) 352{ 353 int error = 0; 354 struct inpcb *inp = sotoinpcb(so); 355 struct tcpcb *tp; 356 357 COMMON_START(); 358 if (inp->inp_lport == 0) 359 error = in_pcbbind(inp, NULL, p); 360 if (error == 0) 361 tp->t_state = TCPS_LISTEN; 362 COMMON_END(PRU_LISTEN); 363} 364 365#if INET6 366static int 367tcp6_usr_listen(struct socket *so, struct proc *p) 368{ 369 int error = 0; 370 struct inpcb *inp = sotoinpcb(so); 371 struct tcpcb *tp; 372 373 COMMON_START(); 374 if (inp->inp_lport == 0) { 375 inp->inp_vflag &= ~INP_IPV4; 376 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 377 inp->inp_vflag |= INP_IPV4; 378 error = in6_pcbbind(inp, NULL, p); 379 } 380 if (error == 0) 381 tp->t_state = TCPS_LISTEN; 382 COMMON_END(PRU_LISTEN); 383} 384#endif /* INET6 */ 385 386/* 387 * Initiate connection to peer. 388 * Create a template for use in transmissions on this connection. 389 * Enter SYN_SENT state, and mark socket as connecting. 390 * Start keep-alive timer, and seed output sequence space. 391 * Send initial segment on connection. 392 */ 393static int 394tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) 395{ 396 int error = 0; 397 struct inpcb *inp = sotoinpcb(so); 398 struct tcpcb *tp; 399 struct sockaddr_in *sinp; 400 401 TCPDEBUG0; 402 if (inp == NULL) { 403 return EINVAL; 404 } else if (inp->inp_state == INPCB_STATE_DEAD) { 405 if (so->so_error) { 406 error = so->so_error; 407 so->so_error = 0; 408 return error; 409 } else 410 return EINVAL; 411 } 412#if FLOW_DIVERT 413 else if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) { 414 uint32_t fd_ctl_unit = 0; 415 error = flow_divert_check_policy(so, p, FALSE, &fd_ctl_unit); 416 if (error == 0) { 417 if (fd_ctl_unit > 0) { 418 error = flow_divert_pcb_init(so, fd_ctl_unit); 419 if (error == 0) { 420 error = flow_divert_connect_out(so, nam, p); 421 } 422 } else { 423 error = ENETDOWN; 424 } 425 } 426 return error; 427 } 428#endif /* FLOW_DIVERT */ 429 tp = intotcpcb(inp); 430 TCPDEBUG1(); 431 432 calculate_tcp_clock(); 433 434 if (nam->sa_family != 0 && nam->sa_family != AF_INET) { 435 error = EAFNOSUPPORT; 436 goto out; 437 } 438 /* 439 * Must disallow TCP ``connections'' to multicast addresses. 440 */ 441 sinp = (struct sockaddr_in *)(void *)nam; 442 if (sinp->sin_family == AF_INET 443 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 444 error = EAFNOSUPPORT; 445 goto out; 446 } 447 448 if ((error = tcp_connect(tp, nam, p)) != 0) 449 goto out; 450 error = tcp_output(tp); 451 COMMON_END(PRU_CONNECT); 452} 453 454static int 455tcp_usr_connectx_common(struct socket *so, int af, 456 struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, 457 struct proc *p, uint32_t ifscope, associd_t aid, connid_t *pcid, 458 uint32_t flags, void *arg, uint32_t arglen) 459{ 460#pragma unused(aid) 461#if !MPTCP 462#pragma unused(flags, arg, arglen) 463#endif /* !MPTCP */ 464 struct sockaddr_entry *src_se = NULL, *dst_se = NULL; 465 struct inpcb *inp = sotoinpcb(so); 466 int error; 467 468 if (inp == NULL) 469 return (EINVAL); 470 471 VERIFY(dst_sl != NULL); 472 473 /* select source (if specified) and destination addresses */ 474 error = in_selectaddrs(af, src_sl, &src_se, dst_sl, &dst_se); 475 if (error != 0) 476 return (error); 477 478 VERIFY(*dst_sl != NULL && dst_se != NULL); 479 VERIFY(src_se == NULL || *src_sl != NULL); 480 VERIFY(dst_se->se_addr->sa_family == af); 481 VERIFY(src_se == NULL || src_se->se_addr->sa_family == af); 482 483 /* 484 * We get here for 2 cases: 485 * 486 * a. From MPTCP, to connect a subflow. There is no need to 487 * bind the socket to the source address and/or interface, 488 * since everything has been taken care of by MPTCP. We 489 * simply check whether or not this is for the initial 490 * MPTCP connection attempt, or to join an existing one. 491 * 492 * b. From the socket layer, to connect a TCP. Perform the 493 * bind to source address and/or interface as necessary. 494 */ 495#if MPTCP 496 if (flags & TCP_CONNREQF_MPTCP) { 497 struct mptsub_connreq *mpcr = arg; 498 499 /* Check to make sure this came down from MPTCP */ 500 if (arg == NULL || arglen != sizeof (*mpcr)) 501 return (EOPNOTSUPP); 502 503 switch (mpcr->mpcr_type) { 504 case MPTSUB_CONNREQ_MP_ENABLE: 505 break; 506 case MPTSUB_CONNREQ_MP_ADD: 507 break; 508 default: 509 return (EOPNOTSUPP); 510 } 511 } else 512#endif /* MPTCP */ 513 { 514 /* bind socket to the specified interface, if requested */ 515 if (ifscope != IFSCOPE_NONE && 516 (error = inp_bindif(inp, ifscope, NULL)) != 0) 517 return (error); 518 519 /* if source address and/or port is specified, bind to it */ 520 if (src_se != NULL) { 521 struct sockaddr *sa = src_se->se_addr; 522 error = sobindlock(so, sa, 0); /* already locked */ 523 if (error != 0) 524 return (error); 525 } 526 } 527 528 switch (af) { 529 case AF_INET: 530 error = tcp_usr_connect(so, dst_se->se_addr, p); 531 break; 532#if INET6 533 case AF_INET6: 534 error = tcp6_usr_connect(so, dst_se->se_addr, p); 535 break; 536#endif /* INET6 */ 537 default: 538 VERIFY(0); 539 /* NOTREACHED */ 540 } 541 542 if (error == 0 && pcid != NULL) 543 *pcid = 1; /* there is only 1 connection for a TCP */ 544 545 return (error); 546} 547 548static int 549tcp_usr_connectx(struct socket *so, struct sockaddr_list **src_sl, 550 struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, 551 associd_t aid, connid_t *pcid, uint32_t flags, void *arg, 552 uint32_t arglen) 553{ 554 return (tcp_usr_connectx_common(so, AF_INET, src_sl, dst_sl, 555 p, ifscope, aid, pcid, flags, arg, arglen)); 556} 557 558#if INET6 559static int 560tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) 561{ 562 int error = 0; 563 struct inpcb *inp = sotoinpcb(so); 564 struct tcpcb *tp; 565 struct sockaddr_in6 *sin6p; 566 567 TCPDEBUG0; 568 if (inp == NULL) { 569 return EINVAL; 570 } else if (inp->inp_state == INPCB_STATE_DEAD) { 571 if (so->so_error) { 572 error = so->so_error; 573 so->so_error = 0; 574 return error; 575 } else 576 return EINVAL; 577 } 578#if FLOW_DIVERT 579 else if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) { 580 uint32_t fd_ctl_unit = 0; 581 error = flow_divert_check_policy(so, p, FALSE, &fd_ctl_unit); 582 if (error == 0) { 583 if (fd_ctl_unit > 0) { 584 error = flow_divert_pcb_init(so, fd_ctl_unit); 585 if (error == 0) { 586 error = flow_divert_connect_out(so, nam, p); 587 } 588 } else { 589 error = ENETDOWN; 590 } 591 } 592 return error; 593 } 594#endif /* FLOW_DIVERT */ 595 tp = intotcpcb(inp); 596 TCPDEBUG1(); 597 598 calculate_tcp_clock(); 599 600 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) { 601 error = EAFNOSUPPORT; 602 goto out; 603 } 604 605 /* 606 * Must disallow TCP ``connections'' to multicast addresses. 607 */ 608 sin6p = (struct sockaddr_in6 *)(void *)nam; 609 if (sin6p->sin6_family == AF_INET6 610 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { 611 error = EAFNOSUPPORT; 612 goto out; 613 } 614 615 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 616 struct sockaddr_in sin; 617 618 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) 619 return (EINVAL); 620 621 in6_sin6_2_sin(&sin, sin6p); 622 inp->inp_vflag |= INP_IPV4; 623 inp->inp_vflag &= ~INP_IPV6; 624 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0) 625 goto out; 626 error = tcp_output(tp); 627 goto out; 628 } 629 inp->inp_vflag &= ~INP_IPV4; 630 inp->inp_vflag |= INP_IPV6; 631 if ((error = tcp6_connect(tp, nam, p)) != 0) 632 goto out; 633 error = tcp_output(tp); 634 if (error) 635 goto out; 636 COMMON_END(PRU_CONNECT); 637} 638 639static int 640tcp6_usr_connectx(struct socket *so, struct sockaddr_list **src_sl, 641 struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, 642 associd_t aid, connid_t *pcid, uint32_t flags, void *arg, 643 uint32_t arglen) 644{ 645 return (tcp_usr_connectx_common(so, AF_INET6, src_sl, dst_sl, 646 p, ifscope, aid, pcid, flags, arg, arglen)); 647} 648#endif /* INET6 */ 649 650/* 651 * Initiate disconnect from peer. 652 * If connection never passed embryonic stage, just drop; 653 * else if don't need to let data drain, then can just drop anyways, 654 * else have to begin TCP shutdown process: mark socket disconnecting, 655 * drain unread data, state switch to reflect user close, and 656 * send segment (e.g. FIN) to peer. Socket will be really disconnected 657 * when peer sends FIN and acks ours. 658 * 659 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 660 */ 661static int 662tcp_usr_disconnect(struct socket *so) 663{ 664 int error = 0; 665 struct inpcb *inp = sotoinpcb(so); 666 struct tcpcb *tp; 667 668 lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, 669 LCK_MTX_ASSERT_OWNED); 670 COMMON_START(); 671 /* In case we got disconnected from the peer */ 672 if (tp == NULL) 673 goto out; 674 tp = tcp_disconnect(tp); 675 COMMON_END(PRU_DISCONNECT); 676} 677 678/* 679 * User-protocol pru_disconnectx callback. 680 */ 681static int 682tcp_usr_disconnectx(struct socket *so, associd_t aid, connid_t cid) 683{ 684#pragma unused(cid) 685 if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) 686 return (EINVAL); 687 688 return (tcp_usr_disconnect(so)); 689} 690 691/* 692 * Accept a connection. Essentially all the work is 693 * done at higher levels; just return the address 694 * of the peer, storing through addr. 695 */ 696static int 697tcp_usr_accept(struct socket *so, struct sockaddr **nam) 698{ 699 int error = 0; 700 struct inpcb *inp = sotoinpcb(so); 701 struct tcpcb *tp = NULL; 702 TCPDEBUG0; 703 704 in_getpeeraddr(so, nam); 705 706 if (so->so_state & SS_ISDISCONNECTED) { 707 error = ECONNABORTED; 708 goto out; 709 } 710 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) 711 return (EINVAL); 712 else if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) 713 return (EPROTOTYPE); 714 715 tp = intotcpcb(inp); 716 TCPDEBUG1(); 717 718 calculate_tcp_clock(); 719 720 COMMON_END(PRU_ACCEPT); 721} 722 723#if INET6 724static int 725tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 726{ 727 int error = 0; 728 struct inpcb *inp = sotoinpcb(so); 729 struct tcpcb *tp = NULL; 730 TCPDEBUG0; 731 732 if (so->so_state & SS_ISDISCONNECTED) { 733 error = ECONNABORTED; 734 goto out; 735 } 736 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) 737 return (EINVAL); 738 else if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) 739 return (EPROTOTYPE); 740 741 tp = intotcpcb(inp); 742 TCPDEBUG1(); 743 744 calculate_tcp_clock(); 745 746 in6_mapped_peeraddr(so, nam); 747 COMMON_END(PRU_ACCEPT); 748} 749#endif /* INET6 */ 750 751/* 752 * Mark the connection as being incapable of further output. 753 * 754 * Returns: 0 Success 755 * EINVAL [COMMON_START] 756 * tcp_output:EADDRNOTAVAIL 757 * tcp_output:ENOBUFS 758 * tcp_output:EMSGSIZE 759 * tcp_output:EHOSTUNREACH 760 * tcp_output:ENETUNREACH 761 * tcp_output:ENETDOWN 762 * tcp_output:ENOMEM 763 * tcp_output:EACCES 764 * tcp_output:EMSGSIZE 765 * tcp_output:ENOBUFS 766 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL] 767 */ 768static int 769tcp_usr_shutdown(struct socket *so) 770{ 771 int error = 0; 772 struct inpcb *inp = sotoinpcb(so); 773 struct tcpcb *tp; 774 775 TCPDEBUG0; 776 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) 777 return (EINVAL); 778 779 socantsendmore(so); 780 781 /* 782 * In case we got disconnected from the peer, or if this is 783 * a socket that is to be flow-diverted (but not yet). 784 */ 785 tp = intotcpcb(inp); 786 TCPDEBUG1(); 787 if (tp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { 788 if (tp != NULL) 789 error = EPROTOTYPE; 790 goto out; 791 } 792 793 calculate_tcp_clock(); 794 795 tp = tcp_usrclosed(tp); 796#if MPTCP 797 /* A reset has been sent but socket exists, do not send FIN */ 798 if ((so->so_flags & SOF_MP_SUBFLOW) && 799 (tp) && (tp->t_mpflags & TMPF_RESET)) { 800 goto out; 801 } 802#endif 803 if (tp) 804 error = tcp_output(tp); 805 COMMON_END(PRU_SHUTDOWN); 806} 807 808/* 809 * After a receive, possibly send window update to peer. 810 */ 811static int 812tcp_usr_rcvd(struct socket *so, __unused int flags) 813{ 814 int error = 0; 815 struct inpcb *inp = sotoinpcb(so); 816 struct tcpcb *tp; 817 818 COMMON_START(); 819 /* In case we got disconnected from the peer */ 820 if (tp == NULL) 821 goto out; 822 tcp_sbrcv_trim(tp, &so->so_rcv); 823 824 tcp_output(tp); 825 COMMON_END(PRU_RCVD); 826} 827 828/* 829 * Do a send by putting data in output queue and updating urgent 830 * marker if URG set. Possibly send more data. Unlike the other 831 * pru_*() routines, the mbuf chains are our responsibility. We 832 * must either enqueue them or free them. The other pru_* routines 833 * generally are caller-frees. 834 * 835 * Returns: 0 Success 836 * ECONNRESET 837 * EINVAL 838 * ENOBUFS 839 * tcp_connect:EADDRINUSE Address in use 840 * tcp_connect:EADDRNOTAVAIL Address not available. 841 * tcp_connect:EINVAL Invalid argument 842 * tcp_connect:EAFNOSUPPORT Address family not supported [notdef] 843 * tcp_connect:EACCES Permission denied 844 * tcp_connect:EAGAIN Resource unavailable, try again 845 * tcp_connect:EPERM Operation not permitted 846 * tcp_output:EADDRNOTAVAIL 847 * tcp_output:ENOBUFS 848 * tcp_output:EMSGSIZE 849 * tcp_output:EHOSTUNREACH 850 * tcp_output:ENETUNREACH 851 * tcp_output:ENETDOWN 852 * tcp_output:ENOMEM 853 * tcp_output:EACCES 854 * tcp_output:EMSGSIZE 855 * tcp_output:ENOBUFS 856 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL] 857 * tcp6_connect:??? [IPV6 only] 858 */ 859static int 860tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 861 struct sockaddr *nam, struct mbuf *control, struct proc *p) 862{ 863 int error = 0; 864 struct inpcb *inp = sotoinpcb(so); 865 struct tcpcb *tp; 866 uint32_t msgpri = MSG_PRI_DEFAULT; 867#if INET6 868 int isipv6; 869#endif 870 TCPDEBUG0; 871 872 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD || 873 (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { 874 /* 875 * OOPS! we lost a race, the TCP session got reset after 876 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a 877 * network interrupt in the non-splnet() section of sosend(). 878 */ 879 if (m != NULL) 880 m_freem(m); 881 if (control != NULL) { 882 m_freem(control); 883 control = NULL; 884 } 885 if (inp != NULL && (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) 886 error = EPROTOTYPE; 887 else 888 error = ECONNRESET; /* XXX EPIPE? */ 889 tp = NULL; 890 TCPDEBUG1(); 891 goto out; 892 } 893#if INET6 894 isipv6 = nam && nam->sa_family == AF_INET6; 895#endif /* INET6 */ 896 tp = intotcpcb(inp); 897 TCPDEBUG1(); 898 899 calculate_tcp_clock(); 900 901 if (control != NULL) { 902 if (so->so_flags & SOF_ENABLE_MSGS) { 903 /* Get the msg priority from control mbufs */ 904 error = tcp_get_msg_priority(control, &msgpri); 905 if (error) { 906 m_freem(control); 907 if (m != NULL) 908 m_freem(m); 909 control = NULL; 910 m = NULL; 911 goto out; 912 } 913 m_freem(control); 914 control = NULL; 915 } else if (control->m_len) { 916 /* 917 * if not unordered, TCP should not have 918 * control mbufs 919 */ 920 m_freem(control); 921 if (m != NULL) 922 m_freem(m); 923 control = NULL; 924 m = NULL; 925 error = EINVAL; 926 goto out; 927 } 928 } 929 930 if (so->so_flags & SOF_ENABLE_MSGS) { 931 VERIFY(m->m_flags & M_PKTHDR); 932 m->m_pkthdr.msg_pri = msgpri; 933 } 934 935 /* MPTCP sublow socket buffers must not be compressed */ 936 VERIFY(!(so->so_flags & SOF_MP_SUBFLOW) || 937 (so->so_snd.sb_flags & SB_NOCOMPRESS)); 938 939 if(!(flags & PRUS_OOB)) { 940 /* Call msg send if message delivery is enabled */ 941 if (so->so_flags & SOF_ENABLE_MSGS) 942 sbappendmsg_snd(&so->so_snd, m); 943 else 944 sbappendstream(&so->so_snd, m); 945 946 if (nam && tp->t_state < TCPS_SYN_SENT) { 947 /* 948 * Do implied connect if not yet connected, 949 * initialize window to default value, and 950 * initialize maxseg/maxopd using peer's cached 951 * MSS. 952 */ 953#if INET6 954 if (isipv6) 955 error = tcp6_connect(tp, nam, p); 956 else 957#endif /* INET6 */ 958 error = tcp_connect(tp, nam, p); 959 if (error) 960 goto out; 961 tp->snd_wnd = TTCP_CLIENT_SND_WND; 962 tcp_mss(tp, -1, IFSCOPE_NONE); 963 } 964 965 if (flags & PRUS_EOF) { 966 /* 967 * Close the send side of the connection after 968 * the data is sent. 969 */ 970 socantsendmore(so); 971 tp = tcp_usrclosed(tp); 972 } 973 if (tp != NULL) { 974 if (flags & PRUS_MORETOCOME) 975 tp->t_flags |= TF_MORETOCOME; 976 error = tcp_output(tp); 977 if (flags & PRUS_MORETOCOME) 978 tp->t_flags &= ~TF_MORETOCOME; 979 } 980 } else { 981 if (sbspace(&so->so_snd) == 0) { 982 /* if no space is left in sockbuf, 983 * do not try to squeeze in OOB traffic */ 984 m_freem(m); 985 error = ENOBUFS; 986 goto out; 987 } 988 /* 989 * According to RFC961 (Assigned Protocols), 990 * the urgent pointer points to the last octet 991 * of urgent data. We continue, however, 992 * to consider it to indicate the first octet 993 * of data past the urgent section. 994 * Otherwise, snd_up should be one lower. 995 */ 996 sbappendstream(&so->so_snd, m); 997 if (nam && tp->t_state < TCPS_SYN_SENT) { 998 /* 999 * Do implied connect if not yet connected, 1000 * initialize window to default value, and 1001 * initialize maxseg/maxopd using peer's cached 1002 * MSS. 1003 */ 1004#if INET6 1005 if (isipv6) 1006 error = tcp6_connect(tp, nam, p); 1007 else 1008#endif /* INET6 */ 1009 error = tcp_connect(tp, nam, p); 1010 if (error) 1011 goto out; 1012 tp->snd_wnd = TTCP_CLIENT_SND_WND; 1013 tcp_mss(tp, -1, IFSCOPE_NONE); 1014 } 1015 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 1016 tp->t_force = 1; 1017 error = tcp_output(tp); 1018 tp->t_force = 0; 1019 } 1020 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : 1021 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 1022} 1023 1024/* 1025 * Abort the TCP. 1026 */ 1027static int 1028tcp_usr_abort(struct socket *so) 1029{ 1030 int error = 0; 1031 struct inpcb *inp = sotoinpcb(so); 1032 struct tcpcb *tp; 1033 1034 COMMON_START(); 1035 /* In case we got disconnected from the peer */ 1036 if (tp == NULL) 1037 goto out; 1038 tp = tcp_drop(tp, ECONNABORTED); 1039 so->so_usecount--; 1040 COMMON_END(PRU_ABORT); 1041} 1042 1043/* 1044 * Receive out-of-band data. 1045 * 1046 * Returns: 0 Success 1047 * EINVAL [COMMON_START] 1048 * EINVAL 1049 * EWOULDBLOCK 1050 */ 1051static int 1052tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 1053{ 1054 int error = 0; 1055 struct inpcb *inp = sotoinpcb(so); 1056 struct tcpcb *tp; 1057 1058 COMMON_START(); 1059 if ((so->so_oobmark == 0 && 1060 (so->so_state & SS_RCVATMARK) == 0) || 1061 so->so_options & SO_OOBINLINE || 1062 tp->t_oobflags & TCPOOB_HADDATA) { 1063 error = EINVAL; 1064 goto out; 1065 } 1066 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 1067 error = EWOULDBLOCK; 1068 goto out; 1069 } 1070 m->m_len = 1; 1071 *mtod(m, caddr_t) = tp->t_iobc; 1072 if ((flags & MSG_PEEK) == 0) 1073 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 1074 COMMON_END(PRU_RCVOOB); 1075} 1076 1077/* xxx - should be const */ 1078struct pr_usrreqs tcp_usrreqs = { 1079 .pru_abort = tcp_usr_abort, 1080 .pru_accept = tcp_usr_accept, 1081 .pru_attach = tcp_usr_attach, 1082 .pru_bind = tcp_usr_bind, 1083 .pru_connect = tcp_usr_connect, 1084 .pru_connectx = tcp_usr_connectx, 1085 .pru_control = in_control, 1086 .pru_detach = tcp_usr_detach, 1087 .pru_disconnect = tcp_usr_disconnect, 1088 .pru_disconnectx = tcp_usr_disconnectx, 1089 .pru_listen = tcp_usr_listen, 1090 .pru_peeraddr = in_getpeeraddr, 1091 .pru_rcvd = tcp_usr_rcvd, 1092 .pru_rcvoob = tcp_usr_rcvoob, 1093 .pru_send = tcp_usr_send, 1094 .pru_shutdown = tcp_usr_shutdown, 1095 .pru_sockaddr = in_getsockaddr, 1096 .pru_sosend = sosend, 1097 .pru_soreceive = soreceive, 1098}; 1099 1100#if INET6 1101struct pr_usrreqs tcp6_usrreqs = { 1102 .pru_abort = tcp_usr_abort, 1103 .pru_accept = tcp6_usr_accept, 1104 .pru_attach = tcp_usr_attach, 1105 .pru_bind = tcp6_usr_bind, 1106 .pru_connect = tcp6_usr_connect, 1107 .pru_connectx = tcp6_usr_connectx, 1108 .pru_control = in6_control, 1109 .pru_detach = tcp_usr_detach, 1110 .pru_disconnect = tcp_usr_disconnect, 1111 .pru_disconnectx = tcp_usr_disconnectx, 1112 .pru_listen = tcp6_usr_listen, 1113 .pru_peeraddr = in6_mapped_peeraddr, 1114 .pru_rcvd = tcp_usr_rcvd, 1115 .pru_rcvoob = tcp_usr_rcvoob, 1116 .pru_send = tcp_usr_send, 1117 .pru_shutdown = tcp_usr_shutdown, 1118 .pru_sockaddr = in6_mapped_sockaddr, 1119 .pru_sosend = sosend, 1120 .pru_soreceive = soreceive, 1121}; 1122#endif /* INET6 */ 1123 1124/* 1125 * Common subroutine to open a TCP connection to remote host specified 1126 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 1127 * port number if needed. Call in_pcbladdr to do the routing and to choose 1128 * a local host address (interface). If there is an existing incarnation 1129 * of the same connection in TIME-WAIT state and if the remote host was 1130 * sending CC options and if the connection duration was < MSL, then 1131 * truncate the previous TIME-WAIT state and proceed. 1132 * Initialize connection parameters and enter SYN-SENT state. 1133 * 1134 * Returns: 0 Success 1135 * EADDRINUSE 1136 * EINVAL 1137 * in_pcbbind:EADDRNOTAVAIL Address not available. 1138 * in_pcbbind:EINVAL Invalid argument 1139 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef] 1140 * in_pcbbind:EACCES Permission denied 1141 * in_pcbbind:EADDRINUSE Address in use 1142 * in_pcbbind:EAGAIN Resource unavailable, try again 1143 * in_pcbbind:EPERM Operation not permitted 1144 * in_pcbladdr:EINVAL Invalid argument 1145 * in_pcbladdr:EAFNOSUPPORT Address family not supported 1146 * in_pcbladdr:EADDRNOTAVAIL Address not available 1147 */ 1148static int 1149tcp_connect(tp, nam, p) 1150 register struct tcpcb *tp; 1151 struct sockaddr *nam; 1152 struct proc *p; 1153{ 1154 struct inpcb *inp = tp->t_inpcb, *oinp; 1155 struct socket *so = inp->inp_socket; 1156 struct tcpcb *otp; 1157 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; 1158 struct in_addr laddr; 1159 struct rmxp_tao *taop; 1160 struct rmxp_tao tao_noncached; 1161 int error = 0; 1162 struct ifnet *outif = NULL; 1163 1164 if (inp->inp_lport == 0) { 1165 error = in_pcbbind(inp, NULL, p); 1166 if (error) 1167 goto done; 1168 } 1169 1170 /* 1171 * Cannot simply call in_pcbconnect, because there might be an 1172 * earlier incarnation of this same connection still in 1173 * TIME_WAIT state, creating an ADDRINUSE error. 1174 */ 1175 error = in_pcbladdr(inp, nam, &laddr, IFSCOPE_NONE, &outif); 1176 if (error) 1177 goto done; 1178 1179 tcp_unlock(inp->inp_socket, 0, 0); 1180 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 1181 sin->sin_addr, sin->sin_port, 1182 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr : laddr, 1183 inp->inp_lport, 0, NULL); 1184 1185 tcp_lock(inp->inp_socket, 0, 0); 1186 if (oinp) { 1187 if (oinp != inp) /* 4143933: avoid deadlock if inp == oinp */ 1188 tcp_lock(oinp->inp_socket, 1, 0); 1189 if (in_pcb_checkstate(oinp, WNT_RELEASE, 1) == WNT_STOPUSING) { 1190 if (oinp != inp) 1191 tcp_unlock(oinp->inp_socket, 1, 0); 1192 goto skip_oinp; 1193 } 1194 1195 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && 1196 otp->t_state == TCPS_TIME_WAIT && 1197 ((int)(tcp_now - otp->t_starttime)) < tcp_msl && 1198 (otp->t_flags & TF_RCVD_CC)) { 1199 otp = tcp_close(otp); 1200 } else { 1201 printf("tcp_connect: inp=0x%llx err=EADDRINUSE\n", 1202 (uint64_t)VM_KERNEL_ADDRPERM(inp)); 1203 if (oinp != inp) 1204 tcp_unlock(oinp->inp_socket, 1, 0); 1205 error = EADDRINUSE; 1206 goto done; 1207 } 1208 if (oinp != inp) 1209 tcp_unlock(oinp->inp_socket, 1, 0); 1210 } 1211skip_oinp: 1212 if ((inp->inp_laddr.s_addr == INADDR_ANY ? laddr.s_addr : 1213 inp->inp_laddr.s_addr) == sin->sin_addr.s_addr && 1214 inp->inp_lport == sin->sin_port) { 1215 error = EINVAL; 1216 goto done; 1217 } 1218 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { 1219 /*lock inversion issue, mostly with udp multicast packets */ 1220 socket_unlock(inp->inp_socket, 0); 1221 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); 1222 socket_lock(inp->inp_socket, 0); 1223 } 1224 if (inp->inp_laddr.s_addr == INADDR_ANY) { 1225 inp->inp_laddr = laddr; 1226 /* no reference needed */ 1227 inp->inp_last_outifp = outif; 1228 inp->inp_flags |= INP_INADDR_ANY; 1229 } 1230 inp->inp_faddr = sin->sin_addr; 1231 inp->inp_fport = sin->sin_port; 1232 in_pcbrehash(inp); 1233 lck_rw_done(inp->inp_pcbinfo->ipi_lock); 1234 1235 if (inp->inp_flowhash == 0) 1236 inp->inp_flowhash = inp_calc_flowhash(inp); 1237 1238 tcp_set_max_rwinscale(tp, so); 1239 1240 soisconnecting(so); 1241 tcpstat.tcps_connattempt++; 1242 tp->t_state = TCPS_SYN_SENT; 1243 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_CONN_KEEPINIT(tp)); 1244 tp->iss = tcp_new_isn(tp); 1245 tcp_sendseqinit(tp); 1246 if (nstat_collect) 1247 nstat_route_connect_attempt(inp->inp_route.ro_rt); 1248 1249 /* 1250 * Generate a CC value for this connection and 1251 * check whether CC or CCnew should be used. 1252 */ 1253 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { 1254 taop = &tao_noncached; 1255 bzero(taop, sizeof(*taop)); 1256 } 1257 1258 tp->cc_send = CC_INC(tcp_ccgen); 1259 if (taop->tao_ccsent != 0 && 1260 CC_GEQ(tp->cc_send, taop->tao_ccsent)) { 1261 taop->tao_ccsent = tp->cc_send; 1262 } else { 1263 taop->tao_ccsent = 0; 1264 tp->t_flags |= TF_SENDCCNEW; 1265 } 1266 1267done: 1268 if (outif != NULL) 1269 ifnet_release(outif); 1270 1271 return (error); 1272} 1273 1274#if INET6 1275static int 1276tcp6_connect(tp, nam, p) 1277 register struct tcpcb *tp; 1278 struct sockaddr *nam; 1279 struct proc *p; 1280{ 1281 struct inpcb *inp = tp->t_inpcb, *oinp; 1282 struct socket *so = inp->inp_socket; 1283 struct tcpcb *otp; 1284 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam; 1285 struct in6_addr addr6; 1286 struct rmxp_tao *taop; 1287 struct rmxp_tao tao_noncached; 1288 int error = 0; 1289 struct ifnet *outif = NULL; 1290 1291 if (inp->inp_lport == 0) { 1292 error = in6_pcbbind(inp, NULL, p); 1293 if (error) 1294 goto done; 1295 } 1296 1297 /* 1298 * Cannot simply call in_pcbconnect, because there might be an 1299 * earlier incarnation of this same connection still in 1300 * TIME_WAIT state, creating an ADDRINUSE error. 1301 * 1302 * in6_pcbladdr() might return an ifp with its reference held 1303 * even in the error case, so make sure that it's released 1304 * whenever it's non-NULL. 1305 */ 1306 error = in6_pcbladdr(inp, nam, &addr6, &outif); 1307 if (error) 1308 goto done; 1309 tcp_unlock(inp->inp_socket, 0, 0); 1310 oinp = in6_pcblookup_hash(inp->inp_pcbinfo, 1311 &sin6->sin6_addr, sin6->sin6_port, 1312 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 1313 ? &addr6 1314 : &inp->in6p_laddr, 1315 inp->inp_lport, 0, NULL); 1316 tcp_lock(inp->inp_socket, 0, 0); 1317 if (oinp) { 1318 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && 1319 otp->t_state == TCPS_TIME_WAIT && 1320 ((int)(tcp_now - otp->t_starttime)) < tcp_msl && 1321 (otp->t_flags & TF_RCVD_CC)) { 1322 otp = tcp_close(otp); 1323 } else { 1324 error = EADDRINUSE; 1325 goto done; 1326 } 1327 } 1328 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { 1329 /*lock inversion issue, mostly with udp multicast packets */ 1330 socket_unlock(inp->inp_socket, 0); 1331 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); 1332 socket_lock(inp->inp_socket, 0); 1333 } 1334 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 1335 inp->in6p_laddr = addr6; 1336 inp->in6p_last_outifp = outif; /* no reference needed */ 1337 inp->in6p_flags |= INP_IN6ADDR_ANY; 1338 } 1339 inp->in6p_faddr = sin6->sin6_addr; 1340 inp->inp_fport = sin6->sin6_port; 1341 if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0) 1342 inp->inp_flow = sin6->sin6_flowinfo; 1343 in_pcbrehash(inp); 1344 lck_rw_done(inp->inp_pcbinfo->ipi_lock); 1345 1346 if (inp->inp_flowhash == 0) 1347 inp->inp_flowhash = inp_calc_flowhash(inp); 1348 /* update flowinfo - RFC 6437 */ 1349 if (inp->inp_flow == 0 && inp->in6p_flags & IN6P_AUTOFLOWLABEL) { 1350 inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; 1351 inp->inp_flow |= 1352 (htonl(inp->inp_flowhash) & IPV6_FLOWLABEL_MASK); 1353 } 1354 1355 tcp_set_max_rwinscale(tp, so); 1356 1357 soisconnecting(so); 1358 tcpstat.tcps_connattempt++; 1359 tp->t_state = TCPS_SYN_SENT; 1360 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 1361 TCP_CONN_KEEPINIT(tp)); 1362 tp->iss = tcp_new_isn(tp); 1363 tcp_sendseqinit(tp); 1364 if (nstat_collect) 1365 nstat_route_connect_attempt(inp->inp_route.ro_rt); 1366 1367 /* 1368 * Generate a CC value for this connection and 1369 * check whether CC or CCnew should be used. 1370 */ 1371 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { 1372 taop = &tao_noncached; 1373 bzero(taop, sizeof(*taop)); 1374 } 1375 1376 tp->cc_send = CC_INC(tcp_ccgen); 1377 if (taop->tao_ccsent != 0 && 1378 CC_GEQ(tp->cc_send, taop->tao_ccsent)) { 1379 taop->tao_ccsent = tp->cc_send; 1380 } else { 1381 taop->tao_ccsent = 0; 1382 tp->t_flags |= TF_SENDCCNEW; 1383 } 1384 1385done: 1386 if (outif != NULL) 1387 ifnet_release(outif); 1388 1389 return (error); 1390} 1391#endif /* INET6 */ 1392 1393/* 1394 * Export TCP internal state information via a struct tcp_info 1395 */ 1396__private_extern__ void 1397tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) 1398{ 1399 struct inpcb *inp = tp->t_inpcb; 1400 1401 bzero(ti, sizeof(*ti)); 1402 1403 ti->tcpi_state = tp->t_state; 1404 1405 if (tp->t_state > TCPS_LISTEN) { 1406 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) 1407 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; 1408 if (tp->t_flags & TF_SACK_PERMIT) 1409 ti->tcpi_options |= TCPI_OPT_SACK; 1410 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { 1411 ti->tcpi_options |= TCPI_OPT_WSCALE; 1412 ti->tcpi_snd_wscale = tp->snd_scale; 1413 ti->tcpi_rcv_wscale = tp->rcv_scale; 1414 } 1415 1416 /* Are we in retranmission episode */ 1417 if (tp->snd_max != tp->snd_nxt) 1418 ti->tcpi_flags |= TCPI_FLAG_LOSSRECOVERY; 1419 else 1420 ti->tcpi_flags &= ~TCPI_FLAG_LOSSRECOVERY; 1421 1422 ti->tcpi_rto = tp->t_timer[TCPT_REXMT] ? tp->t_rxtcur : 0; 1423 ti->tcpi_snd_mss = tp->t_maxseg; 1424 ti->tcpi_rcv_mss = tp->t_maxseg; 1425 1426 ti->tcpi_rttcur = tp->t_rttcur; 1427 ti->tcpi_srtt = tp->t_srtt >> TCP_RTT_SHIFT; 1428 ti->tcpi_rttvar = tp->t_rttvar >> TCP_RTTVAR_SHIFT; 1429 ti->tcpi_rttbest = tp->t_rttbest >> TCP_RTT_SHIFT; 1430 1431 ti->tcpi_snd_ssthresh = tp->snd_ssthresh; 1432 ti->tcpi_snd_cwnd = tp->snd_cwnd; 1433 ti->tcpi_snd_sbbytes = tp->t_inpcb->inp_socket->so_snd.sb_cc; 1434 1435 ti->tcpi_rcv_space = tp->rcv_wnd; 1436 1437 ti->tcpi_snd_wnd = tp->snd_wnd; 1438 ti->tcpi_snd_nxt = tp->snd_nxt; 1439 ti->tcpi_rcv_nxt = tp->rcv_nxt; 1440 1441 /* convert bytes/msec to bits/sec */ 1442 if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 && 1443 tp->t_bwmeas != NULL) { 1444 ti->tcpi_snd_bw = (tp->t_bwmeas->bw_sndbw * 8000); 1445 } 1446 1447 ti->tcpi_last_outif = (tp->t_inpcb->inp_last_outifp == NULL) ? 0 : 1448 tp->t_inpcb->inp_last_outifp->if_index; 1449 1450 //atomic_get_64(ti->tcpi_txbytes, &inp->inp_stat->txbytes); 1451 ti->tcpi_txpackets = inp->inp_stat->txpackets; 1452 ti->tcpi_txbytes = inp->inp_stat->txbytes; 1453 ti->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes; 1454 ti->tcpi_txunacked = tp->snd_max - tp->snd_una; 1455 1456 //atomic_get_64(ti->tcpi_rxbytes, &inp->inp_stat->rxbytes); 1457 ti->tcpi_rxpackets = inp->inp_stat->rxpackets; 1458 ti->tcpi_rxbytes = inp->inp_stat->rxbytes; 1459 ti->tcpi_rxduplicatebytes = tp->t_stat.rxduplicatebytes; 1460 ti->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes; 1461 1462 if (tp->t_state > TCPS_LISTEN) { 1463 ti->tcpi_synrexmits = tp->t_stat.synrxtshift; 1464 } 1465 ti->tcpi_cell_rxpackets = inp->inp_cstat->rxpackets; 1466 ti->tcpi_cell_rxbytes = inp->inp_cstat->rxbytes; 1467 ti->tcpi_cell_txpackets = inp->inp_cstat->txpackets; 1468 ti->tcpi_cell_txbytes = inp->inp_cstat->txbytes; 1469 1470 ti->tcpi_wifi_rxpackets = inp->inp_wstat->rxpackets; 1471 ti->tcpi_wifi_rxbytes = inp->inp_wstat->rxbytes; 1472 ti->tcpi_wifi_txpackets = inp->inp_wstat->txpackets; 1473 ti->tcpi_wifi_txbytes = inp->inp_wstat->txbytes; 1474 } 1475} 1476 1477__private_extern__ errno_t 1478tcp_fill_info_for_info_tuple(struct info_tuple *itpl, struct tcp_info *ti) 1479{ 1480 struct inpcbinfo *pcbinfo = NULL; 1481 struct inpcb *inp = NULL; 1482 struct socket *so; 1483 struct tcpcb *tp; 1484 1485 if (itpl->itpl_proto == IPPROTO_TCP) 1486 pcbinfo = &tcbinfo; 1487 else 1488 return EINVAL; 1489 1490 if (itpl->itpl_local_sa.sa_family == AF_INET && 1491 itpl->itpl_remote_sa.sa_family == AF_INET) { 1492 inp = in_pcblookup_hash(pcbinfo, 1493 itpl->itpl_remote_sin.sin_addr, 1494 itpl->itpl_remote_sin.sin_port, 1495 itpl->itpl_local_sin.sin_addr, 1496 itpl->itpl_local_sin.sin_port, 1497 0, NULL); 1498 } else if (itpl->itpl_local_sa.sa_family == AF_INET6 && 1499 itpl->itpl_remote_sa.sa_family == AF_INET6) { 1500 struct in6_addr ina6_local; 1501 struct in6_addr ina6_remote; 1502 1503 ina6_local = itpl->itpl_local_sin6.sin6_addr; 1504 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_local) && 1505 itpl->itpl_local_sin6.sin6_scope_id) 1506 ina6_local.s6_addr16[1] = htons(itpl->itpl_local_sin6.sin6_scope_id); 1507 1508 ina6_remote = itpl->itpl_remote_sin6.sin6_addr; 1509 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_remote) && 1510 itpl->itpl_remote_sin6.sin6_scope_id) 1511 ina6_remote.s6_addr16[1] = htons(itpl->itpl_remote_sin6.sin6_scope_id); 1512 1513 inp = in6_pcblookup_hash(pcbinfo, 1514 &ina6_remote, 1515 itpl->itpl_remote_sin6.sin6_port, 1516 &ina6_local, 1517 itpl->itpl_local_sin6.sin6_port, 1518 0, NULL); 1519 } else { 1520 return EINVAL; 1521 } 1522 if (inp == NULL || (so = inp->inp_socket) == NULL) 1523 return ENOENT; 1524 1525 socket_lock(so, 0); 1526 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { 1527 socket_unlock(so, 0); 1528 return ENOENT; 1529 } 1530 tp = intotcpcb(inp); 1531 1532 tcp_fill_info(tp, ti); 1533 socket_unlock(so, 0); 1534 1535 return 0; 1536} 1537 1538 1539__private_extern__ int 1540tcp_sysctl_info(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) 1541{ 1542 int error; 1543 struct tcp_info ti; 1544 struct info_tuple itpl; 1545 proc_t caller = PROC_NULL; 1546 proc_t caller_parent = PROC_NULL; 1547 char command_name[MAXCOMLEN + 1] = ""; 1548 char parent_name[MAXCOMLEN + 1] = ""; 1549 1550 if ((caller = proc_self()) != PROC_NULL) { 1551 /* get process name */ 1552 strlcpy(command_name, caller->p_comm, sizeof(command_name)); 1553 1554 /* get parent process name if possible */ 1555 if ((caller_parent = proc_find(caller->p_ppid)) != PROC_NULL) { 1556 strlcpy(parent_name, caller_parent->p_comm, 1557 sizeof(parent_name)); 1558 proc_rele(caller_parent); 1559 } 1560 1561 if ((escape_str(command_name, strlen(command_name), 1562 sizeof(command_name)) == 0) && 1563 (escape_str(parent_name, strlen(parent_name), 1564 sizeof(parent_name)) == 0)) { 1565 kern_asl_msg(LOG_DEBUG, "messagetracer", 1566 5, 1567 "com.apple.message.domain", 1568 "com.apple.kernel.tcpstat", /* 1 */ 1569 "com.apple.message.signature", 1570 "tcpinfo", /* 2 */ 1571 "com.apple.message.signature2", command_name, /* 3 */ 1572 "com.apple.message.signature3", parent_name, /* 4 */ 1573 "com.apple.message.summarize", "YES", /* 5 */ 1574 NULL); 1575 } 1576 } 1577 1578 if (caller != PROC_NULL) 1579 proc_rele(caller); 1580 1581 if (req->newptr == USER_ADDR_NULL) { 1582 return EINVAL; 1583 } 1584 if (req->newlen < sizeof(struct info_tuple)) { 1585 return EINVAL; 1586 } 1587 error = SYSCTL_IN(req, &itpl, sizeof(struct info_tuple)); 1588 if (error != 0) { 1589 return error; 1590 } 1591 error = tcp_fill_info_for_info_tuple(&itpl, &ti); 1592 if (error != 0) { 1593 return error; 1594 } 1595 error = SYSCTL_OUT(req, &ti, sizeof(struct tcp_info)); 1596 if (error != 0) { 1597 return error; 1598 } 1599 1600 return 0; 1601} 1602 1603static int 1604tcp_lookup_peer_pid_locked(struct socket *so, pid_t *out_pid) 1605{ 1606 int error = EHOSTUNREACH; 1607 *out_pid = -1; 1608 if ((so->so_state & SS_ISCONNECTED) == 0) return ENOTCONN; 1609 1610 struct inpcb *inp = (struct inpcb*)so->so_pcb; 1611 uint16_t lport = inp->inp_lport; 1612 uint16_t fport = inp->inp_fport; 1613 struct inpcb *finp = NULL; 1614 1615 if (inp->inp_vflag & INP_IPV6) { 1616 struct in6_addr laddr6 = inp->in6p_laddr; 1617 struct in6_addr faddr6 = inp->in6p_faddr; 1618 socket_unlock(so, 0); 1619 finp = in6_pcblookup_hash(&tcbinfo, &laddr6, lport, &faddr6, fport, 0, NULL); 1620 socket_lock(so, 0); 1621 } else if (inp->inp_vflag & INP_IPV4) { 1622 struct in_addr laddr4 = inp->inp_laddr; 1623 struct in_addr faddr4 = inp->inp_faddr; 1624 socket_unlock(so, 0); 1625 finp = in_pcblookup_hash(&tcbinfo, laddr4, lport, faddr4, fport, 0, NULL); 1626 socket_lock(so, 0); 1627 } 1628 1629 if (finp) { 1630 *out_pid = finp->inp_socket->last_pid; 1631 error = 0; 1632 in_pcb_checkstate(finp, WNT_RELEASE, 0); 1633 } 1634 1635 return error; 1636} 1637 1638void 1639tcp_getconninfo(struct socket *so, struct conninfo_tcp *tcp_ci) 1640{ 1641 (void) tcp_lookup_peer_pid_locked(so, &tcp_ci->tcpci_peer_pid); 1642 tcp_fill_info(sototcpcb(so), &tcp_ci->tcpci_tcp_info); 1643} 1644 1645/* 1646 * The new sockopt interface makes it possible for us to block in the 1647 * copyin/out step (if we take a page fault). Taking a page fault at 1648 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now 1649 * use TSM, there probably isn't any need for this function to run at 1650 * splnet() any more. This needs more examination.) 1651 */ 1652int 1653tcp_ctloutput(so, sopt) 1654 struct socket *so; 1655 struct sockopt *sopt; 1656{ 1657 int error, opt, optval; 1658 struct inpcb *inp; 1659 struct tcpcb *tp; 1660 1661 error = 0; 1662 inp = sotoinpcb(so); 1663 if (inp == NULL) { 1664 return (ECONNRESET); 1665 } 1666 /* Allow <SOL_SOCKET,SO_FLUSH/SO_TRAFFIC_MGT_BACKGROUND> at this level */ 1667 if (sopt->sopt_level != IPPROTO_TCP && 1668 !(sopt->sopt_level == SOL_SOCKET && (sopt->sopt_name == SO_FLUSH || 1669 sopt->sopt_name == SO_TRAFFIC_MGT_BACKGROUND))) { 1670#if INET6 1671 if (SOCK_CHECK_DOM(so, PF_INET6)) 1672 error = ip6_ctloutput(so, sopt); 1673 else 1674#endif /* INET6 */ 1675 error = ip_ctloutput(so, sopt); 1676 return (error); 1677 } 1678 tp = intotcpcb(inp); 1679 if (tp == NULL) { 1680 return (ECONNRESET); 1681 } 1682 1683 calculate_tcp_clock(); 1684 1685 switch (sopt->sopt_dir) { 1686 case SOPT_SET: 1687 switch (sopt->sopt_name) { 1688 case TCP_NODELAY: 1689 case TCP_NOOPT: 1690 case TCP_NOPUSH: 1691 error = sooptcopyin(sopt, &optval, sizeof optval, 1692 sizeof optval); 1693 if (error) 1694 break; 1695 1696 switch (sopt->sopt_name) { 1697 case TCP_NODELAY: 1698 opt = TF_NODELAY; 1699 break; 1700 case TCP_NOOPT: 1701 opt = TF_NOOPT; 1702 break; 1703 case TCP_NOPUSH: 1704 opt = TF_NOPUSH; 1705 break; 1706 default: 1707 opt = 0; /* dead code to fool gcc */ 1708 break; 1709 } 1710 1711 if (optval) 1712 tp->t_flags |= opt; 1713 else 1714 tp->t_flags &= ~opt; 1715 break; 1716 case TCP_RXT_FINDROP: 1717 error = sooptcopyin(sopt, &optval, sizeof optval, 1718 sizeof optval); 1719 if (error) 1720 break; 1721 opt = TF_RXTFINDROP; 1722 if (optval) 1723 tp->t_flagsext |= opt; 1724 else 1725 tp->t_flagsext &= ~opt; 1726 break; 1727 case TCP_MEASURE_SND_BW: 1728 error = sooptcopyin(sopt, &optval, sizeof optval, 1729 sizeof optval); 1730 if (error) 1731 break; 1732 opt = TF_MEASURESNDBW; 1733 if (optval) { 1734 if (tp->t_bwmeas == NULL) { 1735 tp->t_bwmeas = tcp_bwmeas_alloc(tp); 1736 if (tp->t_bwmeas == NULL) { 1737 error = ENOMEM; 1738 break; 1739 } 1740 } 1741 tp->t_flagsext |= opt; 1742 } else { 1743 tp->t_flagsext &= ~opt; 1744 /* Reset snd bw measurement state */ 1745 tp->t_flagsext &= ~(TF_BWMEAS_INPROGRESS); 1746 if (tp->t_bwmeas != NULL) { 1747 tcp_bwmeas_free(tp); 1748 } 1749 } 1750 break; 1751 case TCP_MEASURE_BW_BURST: { 1752 struct tcp_measure_bw_burst in; 1753 uint32_t minpkts, maxpkts; 1754 bzero(&in, sizeof(in)); 1755 1756 error = sooptcopyin(sopt, &in, sizeof(in), 1757 sizeof(in)); 1758 if (error) 1759 break; 1760 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 || 1761 tp->t_bwmeas == NULL) { 1762 error = EINVAL; 1763 break; 1764 } 1765 minpkts = (in.min_burst_size != 0) ? in.min_burst_size : 1766 tp->t_bwmeas->bw_minsizepkts; 1767 maxpkts = (in.max_burst_size != 0) ? in.max_burst_size : 1768 tp->t_bwmeas->bw_maxsizepkts; 1769 if (minpkts > maxpkts) { 1770 error = EINVAL; 1771 break; 1772 } 1773 tp->t_bwmeas->bw_minsizepkts = minpkts; 1774 tp->t_bwmeas->bw_maxsizepkts = maxpkts; 1775 tp->t_bwmeas->bw_minsize = (minpkts * tp->t_maxseg); 1776 tp->t_bwmeas->bw_maxsize = (maxpkts * tp->t_maxseg); 1777 break; 1778 } 1779 case TCP_MAXSEG: 1780 error = sooptcopyin(sopt, &optval, sizeof optval, 1781 sizeof optval); 1782 if (error) 1783 break; 1784 1785 if (optval > 0 && optval <= tp->t_maxseg && 1786 optval + 40 >= tcp_minmss) 1787 tp->t_maxseg = optval; 1788 else 1789 error = EINVAL; 1790 break; 1791 1792 case TCP_KEEPALIVE: 1793 error = sooptcopyin(sopt, &optval, sizeof optval, 1794 sizeof optval); 1795 if (error) 1796 break; 1797 if (optval < 0 || optval > UINT32_MAX/TCP_RETRANSHZ) { 1798 error = EINVAL; 1799 } else { 1800 tp->t_keepidle = optval * TCP_RETRANSHZ; 1801 /* reset the timer to new value */ 1802 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 1803 TCP_CONN_KEEPIDLE(tp)); 1804 tcp_check_timer_state(tp); 1805 } 1806 break; 1807 1808 case TCP_CONNECTIONTIMEOUT: 1809 error = sooptcopyin(sopt, &optval, sizeof optval, 1810 sizeof optval); 1811 if (error) 1812 break; 1813 if (optval < 0 || optval > UINT32_MAX/TCP_RETRANSHZ) { 1814 error = EINVAL; 1815 } else { 1816 tp->t_keepinit = optval * TCP_RETRANSHZ; 1817 if (tp->t_state == TCPS_SYN_RECEIVED || 1818 tp->t_state == TCPS_SYN_SENT) { 1819 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 1820 TCP_CONN_KEEPINIT(tp)); 1821 tcp_check_timer_state(tp); 1822 } 1823 } 1824 break; 1825 1826 case TCP_KEEPINTVL: 1827 error = sooptcopyin(sopt, &optval, sizeof(optval), 1828 sizeof(optval)); 1829 if (error) 1830 break; 1831 if (optval < 0 || optval > UINT32_MAX/TCP_RETRANSHZ) { 1832 error = EINVAL; 1833 } else { 1834 tp->t_keepintvl = optval * TCP_RETRANSHZ; 1835 if (tp->t_state == TCPS_FIN_WAIT_2 && 1836 TCP_CONN_MAXIDLE(tp) > 0) { 1837 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, 1838 TCP_CONN_MAXIDLE(tp)); 1839 tcp_check_timer_state(tp); 1840 } 1841 } 1842 break; 1843 1844 case TCP_KEEPCNT: 1845 error = sooptcopyin(sopt, &optval, sizeof(optval), 1846 sizeof(optval)); 1847 if (error) 1848 break; 1849 if (optval < 0 || optval > INT32_MAX) { 1850 error = EINVAL; 1851 } else { 1852 tp->t_keepcnt = optval; 1853 if (tp->t_state == TCPS_FIN_WAIT_2 && 1854 TCP_CONN_MAXIDLE(tp) > 0) { 1855 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, 1856 TCP_CONN_MAXIDLE(tp)); 1857 tcp_check_timer_state(tp); 1858 } 1859 } 1860 break; 1861 1862 case PERSIST_TIMEOUT: 1863 error = sooptcopyin(sopt, &optval, sizeof optval, 1864 sizeof optval); 1865 if (error) 1866 break; 1867 if (optval < 0) 1868 error = EINVAL; 1869 else 1870 tp->t_persist_timeout = optval * TCP_RETRANSHZ; 1871 break; 1872 case TCP_RXT_CONNDROPTIME: 1873 error = sooptcopyin(sopt, &optval, sizeof(optval), 1874 sizeof(optval)); 1875 if (error) 1876 break; 1877 if (optval < 0) 1878 error = EINVAL; 1879 else 1880 tp->t_rxt_conndroptime = optval * TCP_RETRANSHZ; 1881 break; 1882 case TCP_NOTSENT_LOWAT: 1883 error = sooptcopyin(sopt, &optval, sizeof(optval), 1884 sizeof(optval)); 1885 if (error) 1886 break; 1887 if (optval < 0) { 1888 error = EINVAL; 1889 break; 1890 } else { 1891 if (optval == 0) { 1892 so->so_flags &= ~(SOF_NOTSENT_LOWAT); 1893 tp->t_notsent_lowat = 0; 1894 } else { 1895 so->so_flags |= SOF_NOTSENT_LOWAT; 1896 tp->t_notsent_lowat = optval; 1897 } 1898 } 1899 break; 1900 case TCP_ADAPTIVE_READ_TIMEOUT: 1901 error = sooptcopyin(sopt, &optval, sizeof (optval), 1902 sizeof(optval)); 1903 if (error) 1904 break; 1905 if (optval < 0 || 1906 optval > TCP_ADAPTIVE_TIMEOUT_MAX) { 1907 error = EINVAL; 1908 break; 1909 } else if (optval == 0) { 1910 tp->t_adaptive_rtimo = 0; 1911 tcp_keepalive_reset(tp); 1912 } else { 1913 tp->t_adaptive_rtimo = optval; 1914 } 1915 break; 1916 case TCP_ADAPTIVE_WRITE_TIMEOUT: 1917 error = sooptcopyin(sopt, &optval, sizeof (optval), 1918 sizeof (optval)); 1919 if (error) 1920 break; 1921 if (optval < 0 || 1922 optval > TCP_ADAPTIVE_TIMEOUT_MAX) { 1923 error = EINVAL; 1924 break; 1925 } else { 1926 tp->t_adaptive_wtimo = optval; 1927 } 1928 break; 1929 case TCP_ENABLE_MSGS: 1930 error = sooptcopyin(sopt, &optval, sizeof(optval), 1931 sizeof(optval)); 1932 if (error) 1933 break; 1934 if (optval < 0 || optval > 1) { 1935 error = EINVAL; 1936 } else if (optval == 1) { 1937 /* 1938 * Check if messages option is already 1939 * enabled, if so return. 1940 */ 1941 if (so->so_flags & SOF_ENABLE_MSGS) { 1942 VERIFY(so->so_msg_state != NULL); 1943 break; 1944 } 1945 1946 /* 1947 * allocate memory for storing message 1948 * related state 1949 */ 1950 VERIFY(so->so_msg_state == NULL); 1951 MALLOC(so->so_msg_state, 1952 struct msg_state *, 1953 sizeof(struct msg_state), 1954 M_TEMP, M_WAITOK | M_ZERO); 1955 if (so->so_msg_state == NULL) { 1956 error = ENOMEM; 1957 break; 1958 } 1959 1960 /* Enable message delivery */ 1961 so->so_flags |= SOF_ENABLE_MSGS; 1962 } else { 1963 /* 1964 * Can't disable message delivery on socket 1965 * because of restrictions imposed by 1966 * encoding/decoding 1967 */ 1968 error = EINVAL; 1969 } 1970 break; 1971 case TCP_SENDMOREACKS: 1972 error = sooptcopyin(sopt, &optval, sizeof(optval), 1973 sizeof(optval)); 1974 if (error) 1975 break; 1976 if (optval < 0 || optval > 1) { 1977 error = EINVAL; 1978 } else if (optval == 0) { 1979 tp->t_flagsext &= ~(TF_NOSTRETCHACK); 1980 } else { 1981 tp->t_flagsext |= TF_NOSTRETCHACK; 1982 } 1983 break; 1984 case SO_FLUSH: 1985 if ((error = sooptcopyin(sopt, &optval, sizeof (optval), 1986 sizeof (optval))) != 0) 1987 break; 1988 1989 error = inp_flush(inp, optval); 1990 break; 1991 1992 case SO_TRAFFIC_MGT_BACKGROUND: 1993 if ((error = sooptcopyin(sopt, &optval, sizeof (optval), 1994 sizeof (optval))) != 0) 1995 break; 1996 1997 if (optval) { 1998 socket_set_traffic_mgt_flags_locked(so, 1999 TRAFFIC_MGT_SO_BACKGROUND); 2000 } else { 2001 socket_clear_traffic_mgt_flags_locked(so, 2002 TRAFFIC_MGT_SO_BACKGROUND); 2003 } 2004 break; 2005 2006 default: 2007 error = ENOPROTOOPT; 2008 break; 2009 } 2010 break; 2011 2012 case SOPT_GET: 2013 switch (sopt->sopt_name) { 2014 case TCP_NODELAY: 2015 optval = tp->t_flags & TF_NODELAY; 2016 break; 2017 case TCP_MAXSEG: 2018 optval = tp->t_maxseg; 2019 break; 2020 case TCP_KEEPALIVE: 2021 optval = tp->t_keepidle / TCP_RETRANSHZ; 2022 break; 2023 case TCP_KEEPINTVL: 2024 optval = tp->t_keepintvl / TCP_RETRANSHZ; 2025 break; 2026 case TCP_KEEPCNT: 2027 optval = tp->t_keepcnt; 2028 break; 2029 case TCP_NOOPT: 2030 optval = tp->t_flags & TF_NOOPT; 2031 break; 2032 case TCP_NOPUSH: 2033 optval = tp->t_flags & TF_NOPUSH; 2034 break; 2035 case TCP_CONNECTIONTIMEOUT: 2036 optval = tp->t_keepinit / TCP_RETRANSHZ; 2037 break; 2038 case PERSIST_TIMEOUT: 2039 optval = tp->t_persist_timeout / TCP_RETRANSHZ; 2040 break; 2041 case TCP_RXT_CONNDROPTIME: 2042 optval = tp->t_rxt_conndroptime / TCP_RETRANSHZ; 2043 break; 2044 case TCP_RXT_FINDROP: 2045 optval = tp->t_flagsext & TF_RXTFINDROP; 2046 break; 2047 case TCP_MEASURE_SND_BW: 2048 optval = tp->t_flagsext & TF_MEASURESNDBW; 2049 break; 2050 case TCP_INFO: { 2051 struct tcp_info ti; 2052 2053 tcp_fill_info(tp, &ti); 2054 error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info)); 2055 goto done; 2056 /* NOT REACHED */ 2057 } 2058 case TCP_MEASURE_BW_BURST: { 2059 struct tcp_measure_bw_burst out; 2060 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 || 2061 tp->t_bwmeas == NULL) { 2062 error = EINVAL; 2063 break; 2064 } 2065 out.min_burst_size = tp->t_bwmeas->bw_minsizepkts; 2066 out.max_burst_size = tp->t_bwmeas->bw_maxsizepkts; 2067 error = sooptcopyout(sopt, &out, sizeof(out)); 2068 goto done; 2069 } 2070 case TCP_NOTSENT_LOWAT: 2071 if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) { 2072 optval = tp->t_notsent_lowat; 2073 } else { 2074 optval = 0; 2075 } 2076 break; 2077 2078 case TCP_ENABLE_MSGS: 2079 if (so->so_flags & SOF_ENABLE_MSGS) { 2080 optval = 1; 2081 } else { 2082 optval = 0; 2083 } 2084 break; 2085 case TCP_SENDMOREACKS: 2086 if (tp->t_flagsext & TF_NOSTRETCHACK) 2087 optval = 1; 2088 else 2089 optval = 0; 2090 break; 2091 case TCP_PEER_PID: { 2092 pid_t pid; 2093 error = tcp_lookup_peer_pid_locked(so, &pid); 2094 if (error == 0) 2095 error = sooptcopyout(sopt, &pid, sizeof(pid)); 2096 goto done; 2097 } 2098 case TCP_ADAPTIVE_READ_TIMEOUT: 2099 optval = tp->t_adaptive_rtimo; 2100 break; 2101 case TCP_ADAPTIVE_WRITE_TIMEOUT: 2102 optval = tp->t_adaptive_wtimo; 2103 break; 2104 case SO_TRAFFIC_MGT_BACKGROUND: 2105 optval = (so->so_traffic_mgt_flags & 2106 TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0; 2107 break; 2108 default: 2109 error = ENOPROTOOPT; 2110 break; 2111 } 2112 if (error == 0) 2113 error = sooptcopyout(sopt, &optval, sizeof optval); 2114 break; 2115 } 2116done: 2117 return (error); 2118} 2119 2120/* 2121 * tcp_sendspace and tcp_recvspace are the default send and receive window 2122 * sizes, respectively. These are obsolescent (this information should 2123 * be set by the route). 2124 */ 2125u_int32_t tcp_sendspace = 1448*256; 2126u_int32_t tcp_recvspace = 1448*384; 2127 2128/* During attach, the size of socket buffer allocated is limited to 2129 * sb_max in sbreserve. Disallow setting the tcp send and recv space 2130 * to be more than sb_max because that will cause tcp_attach to fail 2131 * (see radar 5713060) 2132 */ 2133static int 2134sysctl_tcp_sospace(struct sysctl_oid *oidp, __unused void *arg1, 2135 __unused int arg2, struct sysctl_req *req) { 2136 u_int32_t new_value = 0, *space_p = NULL; 2137 int changed = 0, error = 0; 2138 u_quad_t sb_effective_max = (sb_max / (MSIZE+MCLBYTES)) * MCLBYTES; 2139 2140 switch (oidp->oid_number) { 2141 case TCPCTL_SENDSPACE: 2142 space_p = &tcp_sendspace; 2143 break; 2144 case TCPCTL_RECVSPACE: 2145 space_p = &tcp_recvspace; 2146 break; 2147 default: 2148 return EINVAL; 2149 } 2150 error = sysctl_io_number(req, *space_p, sizeof(u_int32_t), 2151 &new_value, &changed); 2152 if (changed) { 2153 if (new_value > 0 && new_value <= sb_effective_max) { 2154 *space_p = new_value; 2155 } else { 2156 error = ERANGE; 2157 } 2158 } 2159 return error; 2160} 2161 2162SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 2163 &tcp_sendspace , 0, &sysctl_tcp_sospace, "IU", "Maximum outgoing TCP datagram size"); 2164SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 2165 &tcp_recvspace , 0, &sysctl_tcp_sospace, "IU", "Maximum incoming TCP datagram size"); 2166 2167 2168/* 2169 * Attach TCP protocol to socket, allocating 2170 * internet protocol control block, tcp control block, 2171 * bufer space, and entering LISTEN state if to accept connections. 2172 * 2173 * Returns: 0 Success 2174 * in_pcballoc:ENOBUFS 2175 * in_pcballoc:ENOMEM 2176 * in_pcballoc:??? [IPSEC specific] 2177 * soreserve:ENOBUFS 2178 */ 2179static int 2180tcp_attach(so, p) 2181 struct socket *so; 2182 struct proc *p; 2183{ 2184 register struct tcpcb *tp; 2185 struct inpcb *inp; 2186 int error; 2187#if INET6 2188 int isipv6 = SOCK_CHECK_DOM(so, PF_INET6) != 0; 2189#endif 2190 2191 error = in_pcballoc(so, &tcbinfo, p); 2192 if (error) 2193 return (error); 2194 2195 inp = sotoinpcb(so); 2196 2197 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 2198 error = soreserve(so, tcp_sendspace, tcp_recvspace); 2199 if (error) 2200 return (error); 2201 } 2202 if ((so->so_rcv.sb_flags & SB_USRSIZE) == 0) 2203 so->so_rcv.sb_flags |= SB_AUTOSIZE; 2204 if ((so->so_snd.sb_flags & SB_USRSIZE) == 0) 2205 so->so_snd.sb_flags |= SB_AUTOSIZE; 2206 2207#if INET6 2208 if (isipv6) { 2209 inp->inp_vflag |= INP_IPV6; 2210 inp->in6p_hops = -1; /* use kernel default */ 2211 } 2212 else 2213#endif /* INET6 */ 2214 inp->inp_vflag |= INP_IPV4; 2215 tp = tcp_newtcpcb(inp); 2216 if (tp == NULL) { 2217 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 2218 2219 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 2220#if INET6 2221 if (isipv6) 2222 in6_pcbdetach(inp); 2223 else 2224#endif /* INET6 */ 2225 in_pcbdetach(inp); 2226 so->so_state |= nofd; 2227 return (ENOBUFS); 2228 } 2229 if (nstat_collect) { 2230 nstat_tcp_new_pcb(inp); 2231 } 2232 tp->t_state = TCPS_CLOSED; 2233 return (0); 2234} 2235 2236/* 2237 * Initiate (or continue) disconnect. 2238 * If embryonic state, just send reset (once). 2239 * If in ``let data drain'' option and linger null, just drop. 2240 * Otherwise (hard), mark socket disconnecting and drop 2241 * current input data; switch states based on user close, and 2242 * send segment to peer (with FIN). 2243 */ 2244static struct tcpcb * 2245tcp_disconnect(tp) 2246 register struct tcpcb *tp; 2247{ 2248 struct socket *so = tp->t_inpcb->inp_socket; 2249 2250 if (tp->t_state < TCPS_ESTABLISHED) 2251 tp = tcp_close(tp); 2252 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 2253 tp = tcp_drop(tp, 0); 2254 else { 2255 soisdisconnecting(so); 2256 sbflush(&so->so_rcv); 2257 tp = tcp_usrclosed(tp); 2258#if MPTCP 2259 /* A reset has been sent but socket exists, do not send FIN */ 2260 if ((so->so_flags & SOF_MP_SUBFLOW) && 2261 (tp) && (tp->t_mpflags & TMPF_RESET)) 2262 return (tp); 2263#endif 2264 if (tp) 2265 (void) tcp_output(tp); 2266 } 2267 return (tp); 2268} 2269 2270/* 2271 * User issued close, and wish to trail through shutdown states: 2272 * if never received SYN, just forget it. If got a SYN from peer, 2273 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 2274 * If already got a FIN from peer, then almost done; go to LAST_ACK 2275 * state. In all other cases, have already sent FIN to peer (e.g. 2276 * after PRU_SHUTDOWN), and just have to play tedious game waiting 2277 * for peer to send FIN or not respond to keep-alives, etc. 2278 * We can let the user exit from the close as soon as the FIN is acked. 2279 */ 2280static struct tcpcb * 2281tcp_usrclosed(tp) 2282 register struct tcpcb *tp; 2283{ 2284 2285 switch (tp->t_state) { 2286 2287 case TCPS_CLOSED: 2288 case TCPS_LISTEN: 2289 tp = tcp_close(tp); 2290 break; 2291 2292 case TCPS_SYN_SENT: 2293 case TCPS_SYN_RECEIVED: 2294 tp->t_flags |= TF_NEEDFIN; 2295 break; 2296 2297 case TCPS_ESTABLISHED: 2298 DTRACE_TCP4(state__change, void, NULL, 2299 struct inpcb *, tp->t_inpcb, 2300 struct tcpcb *, tp, 2301 int32_t, TCPS_FIN_WAIT_1); 2302 tp->t_state = TCPS_FIN_WAIT_1; 2303 break; 2304 2305 case TCPS_CLOSE_WAIT: 2306 DTRACE_TCP4(state__change, void, NULL, 2307 struct inpcb *, tp->t_inpcb, 2308 struct tcpcb *, tp, 2309 int32_t, TCPS_LAST_ACK); 2310 tp->t_state = TCPS_LAST_ACK; 2311 break; 2312 } 2313 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 2314 soisdisconnected(tp->t_inpcb->inp_socket); 2315 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 2316 if (tp->t_state == TCPS_FIN_WAIT_2) 2317 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, 2318 TCP_CONN_MAXIDLE(tp)); 2319 } 2320 return (tp); 2321} 2322 2323void 2324tcp_in_cksum_stats(u_int32_t len) 2325{ 2326 tcpstat.tcps_rcv_swcsum++; 2327 tcpstat.tcps_rcv_swcsum_bytes += len; 2328} 2329 2330void 2331tcp_out_cksum_stats(u_int32_t len) 2332{ 2333 tcpstat.tcps_snd_swcsum++; 2334 tcpstat.tcps_snd_swcsum_bytes += len; 2335} 2336 2337#if INET6 2338void 2339tcp_in6_cksum_stats(u_int32_t len) 2340{ 2341 tcpstat.tcps_rcv6_swcsum++; 2342 tcpstat.tcps_rcv6_swcsum_bytes += len; 2343} 2344 2345void 2346tcp_out6_cksum_stats(u_int32_t len) 2347{ 2348 tcpstat.tcps_snd6_swcsum++; 2349 tcpstat.tcps_snd6_swcsum_bytes += len; 2350} 2351 2352/* 2353 * When messages are enabled on a TCP socket, the message priority 2354 * is sent as a control message. This function will extract it. 2355 */ 2356int 2357tcp_get_msg_priority(struct mbuf *control, uint32_t *msgpri) 2358{ 2359 struct cmsghdr *cm; 2360 if (control == NULL) 2361 return(EINVAL); 2362 2363 for (cm = M_FIRST_CMSGHDR(control); cm; 2364 cm = M_NXT_CMSGHDR(control, cm)) { 2365 if (cm->cmsg_len < sizeof(struct cmsghdr) || 2366 cm->cmsg_len > control->m_len) { 2367 return (EINVAL); 2368 } 2369 if (cm->cmsg_level == SOL_SOCKET && 2370 cm->cmsg_type == SCM_MSG_PRIORITY) { 2371 *msgpri = *(unsigned int *)(void *)CMSG_DATA(cm); 2372 break; 2373 } 2374 } 2375 2376 VERIFY(*msgpri >= MSG_PRI_MIN && *msgpri <= MSG_PRI_MAX); 2377 return (0); 2378} 2379#endif /* INET6 */ 2380