1/* 2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1982, 1986, 1988, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 61 * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.51.2.9 2001/08/22 00:59:12 silby Exp $ 62 */ 63 64 65#include <sys/param.h> 66#include <sys/systm.h> 67#include <sys/kernel.h> 68#include <sys/sysctl.h> 69#include <sys/mbuf.h> 70#if INET6 71#include <sys/domain.h> 72#endif /* INET6 */ 73#include <sys/kasl.h> 74#include <sys/socket.h> 75#include <sys/socketvar.h> 76#include <sys/protosw.h> 77#include <sys/syslog.h> 78 79#include <net/if.h> 80#include <net/route.h> 81#include <net/ntstat.h> 82#include <net/content_filter.h> 83 84#include <netinet/in.h> 85#include <netinet/in_systm.h> 86#if INET6 87#include <netinet/ip6.h> 88#endif 89#include <netinet/in_pcb.h> 90#if INET6 91#include <netinet6/in6_pcb.h> 92#endif 93#include <netinet/in_var.h> 94#include <netinet/ip_var.h> 95#if INET6 96#include <netinet6/ip6_var.h> 97#endif 98#include <netinet/tcp.h> 99#include <netinet/tcp_fsm.h> 100#include <netinet/tcp_seq.h> 101#include <netinet/tcp_timer.h> 102#include <netinet/tcp_var.h> 103#include <netinet/tcpip.h> 104#include <mach/sdt.h> 105#if TCPDEBUG 106#include <netinet/tcp_debug.h> 107#endif 108#if MPTCP 109#include <netinet/mptcp_var.h> 110#endif /* MPTCP */ 111 112#if IPSEC 113#include <netinet6/ipsec.h> 114#endif /*IPSEC*/ 115 116#if FLOW_DIVERT 117#include <netinet/flow_divert.h> 118#endif /* FLOW_DIVERT */ 119 120void tcp_fill_info(struct tcpcb *, struct tcp_info *); 121errno_t tcp_fill_info_for_info_tuple(struct info_tuple *, struct tcp_info *); 122 123int tcp_sysctl_info(struct sysctl_oid *, void *, int , struct sysctl_req *); 124 125/* 126 * TCP protocol interface to socket abstraction. 127 */ 128extern char *tcpstates[]; /* XXX ??? */ 129 130static int tcp_attach(struct socket *, struct proc *); 131static int tcp_connect(struct tcpcb *, struct sockaddr *, struct proc *); 132#if INET6 133static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct proc *); 134static int tcp6_usr_connect(struct socket *, struct sockaddr *, 135 struct proc *); 136#endif /* INET6 */ 137static struct tcpcb * 138 tcp_disconnect(struct tcpcb *); 139static struct tcpcb * 140 tcp_usrclosed(struct tcpcb *); 141 142extern uint32_t tcp_autorcvbuf_max; 143 144extern void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb); 145 146#if TCPDEBUG 147#define TCPDEBUG0 int ostate = 0 148#define TCPDEBUG1() ostate = tp ? tp->t_state : 0 149#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 150 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 151#else 152#define TCPDEBUG0 153#define TCPDEBUG1() 154#define TCPDEBUG2(req) 155#endif 156 157SYSCTL_PROC(_net_inet_tcp, OID_AUTO, info, 158 CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_KERN, 159 0 , 0, tcp_sysctl_info, "S", "TCP info per tuple"); 160 161/* 162 * TCP attaches to socket via pru_attach(), reserving space, 163 * and an internet control block. 164 * 165 * Returns: 0 Success 166 * EISCONN 167 * tcp_attach:ENOBUFS 168 * tcp_attach:ENOMEM 169 * tcp_attach:??? [IPSEC specific] 170 */ 171static int 172tcp_usr_attach(struct socket *so, __unused int proto, struct proc *p) 173{ 174 int error; 175 struct inpcb *inp = sotoinpcb(so); 176 struct tcpcb *tp = 0; 177 TCPDEBUG0; 178 179 TCPDEBUG1(); 180 if (inp) { 181 error = EISCONN; 182 goto out; 183 } 184 185 error = tcp_attach(so, p); 186 if (error) 187 goto out; 188 189 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 190 so->so_linger = TCP_LINGERTIME * hz; 191 tp = sototcpcb(so); 192out: 193 TCPDEBUG2(PRU_ATTACH); 194 return error; 195} 196 197/* 198 * pru_detach() detaches the TCP protocol from the socket. 199 * If the protocol state is non-embryonic, then can't 200 * do this directly: have to initiate a pru_disconnect(), 201 * which may finish later; embryonic TCB's can just 202 * be discarded here. 203 */ 204static int 205tcp_usr_detach(struct socket *so) 206{ 207 int error = 0; 208 struct inpcb *inp = sotoinpcb(so); 209 struct tcpcb *tp; 210 TCPDEBUG0; 211 212 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { 213 return EINVAL; /* XXX */ 214 } 215 lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); 216 tp = intotcpcb(inp); 217 /* In case we got disconnected from the peer */ 218 if (tp == NULL) 219 goto out; 220 TCPDEBUG1(); 221 222 calculate_tcp_clock(); 223 224 tp = tcp_disconnect(tp); 225out: 226 TCPDEBUG2(PRU_DETACH); 227 return error; 228} 229 230#if NECP 231#define COMMON_START() TCPDEBUG0; \ 232do { \ 233 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \ 234 return (EINVAL); \ 235 if (necp_socket_should_use_flow_divert(inp)) \ 236 return (EPROTOTYPE); \ 237 tp = intotcpcb(inp); \ 238 TCPDEBUG1(); \ 239 calculate_tcp_clock(); \ 240} while (0) 241#else /* NECP */ 242#define COMMON_START() TCPDEBUG0; \ 243do { \ 244 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \ 245 return (EINVAL); \ 246 tp = intotcpcb(inp); \ 247 TCPDEBUG1(); \ 248 calculate_tcp_clock(); \ 249} while (0) 250#endif /* !NECP */ 251 252#define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out 253 254 255/* 256 * Give the socket an address. 257 * 258 * Returns: 0 Success 259 * EINVAL Invalid argument [COMMON_START] 260 * EAFNOSUPPORT Address family not supported 261 * in_pcbbind:EADDRNOTAVAIL Address not available. 262 * in_pcbbind:EINVAL Invalid argument 263 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef] 264 * in_pcbbind:EACCES Permission denied 265 * in_pcbbind:EADDRINUSE Address in use 266 * in_pcbbind:EAGAIN Resource unavailable, try again 267 * in_pcbbind:EPERM Operation not permitted 268 */ 269static int 270tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p) 271{ 272 int error = 0; 273 struct inpcb *inp = sotoinpcb(so); 274 struct tcpcb *tp; 275 struct sockaddr_in *sinp; 276 277 COMMON_START(); 278 279 if (nam->sa_family != 0 && nam->sa_family != AF_INET) { 280 error = EAFNOSUPPORT; 281 goto out; 282 } 283 284 /* 285 * Must check for multicast addresses and disallow binding 286 * to them. 287 */ 288 sinp = (struct sockaddr_in *)(void *)nam; 289 if (sinp->sin_family == AF_INET && 290 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 291 error = EAFNOSUPPORT; 292 goto out; 293 } 294 error = in_pcbbind(inp, nam, p); 295 if (error) 296 goto out; 297 COMMON_END(PRU_BIND); 298 299} 300 301#if INET6 302static int 303tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p) 304{ 305 int error = 0; 306 struct inpcb *inp = sotoinpcb(so); 307 struct tcpcb *tp; 308 struct sockaddr_in6 *sin6p; 309 310 COMMON_START(); 311 312 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) { 313 error = EAFNOSUPPORT; 314 goto out; 315 } 316 317 /* 318 * Must check for multicast addresses and disallow binding 319 * to them. 320 */ 321 sin6p = (struct sockaddr_in6 *)(void *)nam; 322 if (sin6p->sin6_family == AF_INET6 && 323 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { 324 error = EAFNOSUPPORT; 325 goto out; 326 } 327 inp->inp_vflag &= ~INP_IPV4; 328 inp->inp_vflag |= INP_IPV6; 329 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 330 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 331 inp->inp_vflag |= INP_IPV4; 332 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 333 struct sockaddr_in sin; 334 335 in6_sin6_2_sin(&sin, sin6p); 336 inp->inp_vflag |= INP_IPV4; 337 inp->inp_vflag &= ~INP_IPV6; 338 error = in_pcbbind(inp, (struct sockaddr *)&sin, p); 339 goto out; 340 } 341 } 342 error = in6_pcbbind(inp, nam, p); 343 if (error) 344 goto out; 345 COMMON_END(PRU_BIND); 346} 347#endif /* INET6 */ 348 349/* 350 * Prepare to accept connections. 351 * 352 * Returns: 0 Success 353 * EINVAL [COMMON_START] 354 * in_pcbbind:EADDRNOTAVAIL Address not available. 355 * in_pcbbind:EINVAL Invalid argument 356 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef] 357 * in_pcbbind:EACCES Permission denied 358 * in_pcbbind:EADDRINUSE Address in use 359 * in_pcbbind:EAGAIN Resource unavailable, try again 360 * in_pcbbind:EPERM Operation not permitted 361 */ 362static int 363tcp_usr_listen(struct socket *so, struct proc *p) 364{ 365 int error = 0; 366 struct inpcb *inp = sotoinpcb(so); 367 struct tcpcb *tp; 368 369 COMMON_START(); 370 if (inp->inp_lport == 0) 371 error = in_pcbbind(inp, NULL, p); 372 if (error == 0) 373 tp->t_state = TCPS_LISTEN; 374 COMMON_END(PRU_LISTEN); 375} 376 377#if INET6 378static int 379tcp6_usr_listen(struct socket *so, struct proc *p) 380{ 381 int error = 0; 382 struct inpcb *inp = sotoinpcb(so); 383 struct tcpcb *tp; 384 385 COMMON_START(); 386 if (inp->inp_lport == 0) { 387 inp->inp_vflag &= ~INP_IPV4; 388 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 389 inp->inp_vflag |= INP_IPV4; 390 error = in6_pcbbind(inp, NULL, p); 391 } 392 if (error == 0) 393 tp->t_state = TCPS_LISTEN; 394 COMMON_END(PRU_LISTEN); 395} 396#endif /* INET6 */ 397 398/* 399 * Initiate connection to peer. 400 * Create a template for use in transmissions on this connection. 401 * Enter SYN_SENT state, and mark socket as connecting. 402 * Start keep-alive timer, and seed output sequence space. 403 * Send initial segment on connection. 404 */ 405static int 406tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) 407{ 408 int error = 0; 409 struct inpcb *inp = sotoinpcb(so); 410 struct tcpcb *tp; 411 struct sockaddr_in *sinp; 412 413 TCPDEBUG0; 414 if (inp == NULL) { 415 return EINVAL; 416 } else if (inp->inp_state == INPCB_STATE_DEAD) { 417 if (so->so_error) { 418 error = so->so_error; 419 so->so_error = 0; 420 return error; 421 } else 422 return EINVAL; 423 } 424#if NECP 425#if FLOW_DIVERT 426 else if (necp_socket_should_use_flow_divert(inp)) { 427 uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp); 428 if (fd_ctl_unit > 0) { 429 error = flow_divert_pcb_init(so, fd_ctl_unit); 430 if (error == 0) { 431 error = flow_divert_connect_out(so, nam, p); 432 } 433 } else { 434 error = ENETDOWN; 435 } 436 return error; 437 } 438#endif /* FLOW_DIVERT */ 439#if CONTENT_FILTER 440 error = cfil_sock_attach(so); 441 if (error != 0) 442 return error; 443#endif /* CONTENT_FILTER */ 444#endif /* NECP */ 445 tp = intotcpcb(inp); 446 TCPDEBUG1(); 447 448 calculate_tcp_clock(); 449 450 if (nam->sa_family != 0 && nam->sa_family != AF_INET) { 451 error = EAFNOSUPPORT; 452 goto out; 453 } 454 /* 455 * Must disallow TCP ``connections'' to multicast addresses. 456 */ 457 sinp = (struct sockaddr_in *)(void *)nam; 458 if (sinp->sin_family == AF_INET 459 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 460 error = EAFNOSUPPORT; 461 goto out; 462 } 463 464 if ((error = tcp_connect(tp, nam, p)) != 0) 465 goto out; 466 error = tcp_output(tp); 467 COMMON_END(PRU_CONNECT); 468} 469 470static int 471tcp_usr_connectx_common(struct socket *so, int af, 472 struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, 473 struct proc *p, uint32_t ifscope, associd_t aid, connid_t *pcid, 474 uint32_t flags, void *arg, uint32_t arglen) 475{ 476#pragma unused(aid) 477#if !MPTCP 478#pragma unused(flags, arg, arglen) 479#endif /* !MPTCP */ 480 struct sockaddr_entry *src_se = NULL, *dst_se = NULL; 481 struct inpcb *inp = sotoinpcb(so); 482 int error; 483 484 if (inp == NULL) 485 return (EINVAL); 486 487 VERIFY(dst_sl != NULL); 488 489 /* select source (if specified) and destination addresses */ 490 error = in_selectaddrs(af, src_sl, &src_se, dst_sl, &dst_se); 491 if (error != 0) 492 return (error); 493 494 VERIFY(*dst_sl != NULL && dst_se != NULL); 495 VERIFY(src_se == NULL || *src_sl != NULL); 496 VERIFY(dst_se->se_addr->sa_family == af); 497 VERIFY(src_se == NULL || src_se->se_addr->sa_family == af); 498 499#if NECP 500 inp_update_necp_policy(inp, src_se ? src_se->se_addr : NULL, dst_se ? dst_se->se_addr : NULL, ifscope); 501#endif /* NECP */ 502 503 /* 504 * We get here for 2 cases: 505 * 506 * a. From MPTCP, to connect a subflow. There is no need to 507 * bind the socket to the source address and/or interface, 508 * since everything has been taken care of by MPTCP. We 509 * simply check whether or not this is for the initial 510 * MPTCP connection attempt, or to join an existing one. 511 * 512 * b. From the socket layer, to connect a TCP. Perform the 513 * bind to source address and/or interface as necessary. 514 */ 515#if MPTCP 516 if (flags & TCP_CONNREQF_MPTCP) { 517 struct mptsub_connreq *mpcr = arg; 518 519 /* Check to make sure this came down from MPTCP */ 520 if (arg == NULL || arglen != sizeof (*mpcr)) 521 return (EOPNOTSUPP); 522 523 switch (mpcr->mpcr_type) { 524 case MPTSUB_CONNREQ_MP_ENABLE: 525 break; 526 case MPTSUB_CONNREQ_MP_ADD: 527 break; 528 default: 529 return (EOPNOTSUPP); 530 } 531 } else 532#endif /* MPTCP */ 533 { 534 /* bind socket to the specified interface, if requested */ 535 if (ifscope != IFSCOPE_NONE && 536 (error = inp_bindif(inp, ifscope, NULL)) != 0) 537 return (error); 538 539 /* if source address and/or port is specified, bind to it */ 540 if (src_se != NULL) { 541 struct sockaddr *sa = src_se->se_addr; 542 error = sobindlock(so, sa, 0); /* already locked */ 543 if (error != 0) 544 return (error); 545 } 546 } 547 548 switch (af) { 549 case AF_INET: 550 error = tcp_usr_connect(so, dst_se->se_addr, p); 551 break; 552#if INET6 553 case AF_INET6: 554 error = tcp6_usr_connect(so, dst_se->se_addr, p); 555 break; 556#endif /* INET6 */ 557 default: 558 VERIFY(0); 559 /* NOTREACHED */ 560 } 561 562 if (error == 0 && pcid != NULL) 563 *pcid = 1; /* there is only 1 connection for a TCP */ 564 565 return (error); 566} 567 568static int 569tcp_usr_connectx(struct socket *so, struct sockaddr_list **src_sl, 570 struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, 571 associd_t aid, connid_t *pcid, uint32_t flags, void *arg, 572 uint32_t arglen) 573{ 574 return (tcp_usr_connectx_common(so, AF_INET, src_sl, dst_sl, 575 p, ifscope, aid, pcid, flags, arg, arglen)); 576} 577 578#if INET6 579static int 580tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) 581{ 582 int error = 0; 583 struct inpcb *inp = sotoinpcb(so); 584 struct tcpcb *tp; 585 struct sockaddr_in6 *sin6p; 586 587 TCPDEBUG0; 588 if (inp == NULL) { 589 return EINVAL; 590 } else if (inp->inp_state == INPCB_STATE_DEAD) { 591 if (so->so_error) { 592 error = so->so_error; 593 so->so_error = 0; 594 return error; 595 } else 596 return EINVAL; 597 } 598#if NECP 599#if FLOW_DIVERT 600 else if (necp_socket_should_use_flow_divert(inp)) { 601 uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp); 602 if (fd_ctl_unit > 0) { 603 error = flow_divert_pcb_init(so, fd_ctl_unit); 604 if (error == 0) { 605 error = flow_divert_connect_out(so, nam, p); 606 } 607 } else { 608 error = ENETDOWN; 609 } 610 return error; 611 } 612#endif /* FLOW_DIVERT */ 613#if CONTENT_FILTER 614 error = cfil_sock_attach(so); 615 if (error != 0) 616 return error; 617#endif /* CONTENT_FILTER */ 618#endif /* NECP */ 619 620 tp = intotcpcb(inp); 621 TCPDEBUG1(); 622 623 calculate_tcp_clock(); 624 625 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) { 626 error = EAFNOSUPPORT; 627 goto out; 628 } 629 630 /* 631 * Must disallow TCP ``connections'' to multicast addresses. 632 */ 633 sin6p = (struct sockaddr_in6 *)(void *)nam; 634 if (sin6p->sin6_family == AF_INET6 635 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { 636 error = EAFNOSUPPORT; 637 goto out; 638 } 639 640 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 641 struct sockaddr_in sin; 642 643 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) 644 return (EINVAL); 645 646 in6_sin6_2_sin(&sin, sin6p); 647 inp->inp_vflag |= INP_IPV4; 648 inp->inp_vflag &= ~INP_IPV6; 649 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0) 650 goto out; 651 error = tcp_output(tp); 652 goto out; 653 } 654 inp->inp_vflag &= ~INP_IPV4; 655 inp->inp_vflag |= INP_IPV6; 656 if ((error = tcp6_connect(tp, nam, p)) != 0) 657 goto out; 658 error = tcp_output(tp); 659 if (error) 660 goto out; 661 COMMON_END(PRU_CONNECT); 662} 663 664static int 665tcp6_usr_connectx(struct socket *so, struct sockaddr_list **src_sl, 666 struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, 667 associd_t aid, connid_t *pcid, uint32_t flags, void *arg, 668 uint32_t arglen) 669{ 670 return (tcp_usr_connectx_common(so, AF_INET6, src_sl, dst_sl, 671 p, ifscope, aid, pcid, flags, arg, arglen)); 672} 673#endif /* INET6 */ 674 675/* 676 * Initiate disconnect from peer. 677 * If connection never passed embryonic stage, just drop; 678 * else if don't need to let data drain, then can just drop anyways, 679 * else have to begin TCP shutdown process: mark socket disconnecting, 680 * drain unread data, state switch to reflect user close, and 681 * send segment (e.g. FIN) to peer. Socket will be really disconnected 682 * when peer sends FIN and acks ours. 683 * 684 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 685 */ 686static int 687tcp_usr_disconnect(struct socket *so) 688{ 689 int error = 0; 690 struct inpcb *inp = sotoinpcb(so); 691 struct tcpcb *tp; 692 693 lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, 694 LCK_MTX_ASSERT_OWNED); 695 COMMON_START(); 696 /* In case we got disconnected from the peer */ 697 if (tp == NULL) 698 goto out; 699 tp = tcp_disconnect(tp); 700 COMMON_END(PRU_DISCONNECT); 701} 702 703/* 704 * User-protocol pru_disconnectx callback. 705 */ 706static int 707tcp_usr_disconnectx(struct socket *so, associd_t aid, connid_t cid) 708{ 709#pragma unused(cid) 710 if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) 711 return (EINVAL); 712 713 return (tcp_usr_disconnect(so)); 714} 715 716/* 717 * Accept a connection. Essentially all the work is 718 * done at higher levels; just return the address 719 * of the peer, storing through addr. 720 */ 721static int 722tcp_usr_accept(struct socket *so, struct sockaddr **nam) 723{ 724 int error = 0; 725 struct inpcb *inp = sotoinpcb(so); 726 struct tcpcb *tp = NULL; 727 TCPDEBUG0; 728 729 in_getpeeraddr(so, nam); 730 731 if (so->so_state & SS_ISDISCONNECTED) { 732 error = ECONNABORTED; 733 goto out; 734 } 735 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) 736 return (EINVAL); 737#if NECP 738 else if (necp_socket_should_use_flow_divert(inp)) 739 return (EPROTOTYPE); 740#if CONTENT_FILTER 741 error = cfil_sock_attach(so); 742 if (error != 0) 743 return (error); 744#endif /* CONTENT_FILTER */ 745#endif /* NECP */ 746 747 tp = intotcpcb(inp); 748 TCPDEBUG1(); 749 750 calculate_tcp_clock(); 751 752 COMMON_END(PRU_ACCEPT); 753} 754 755#if INET6 756static int 757tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 758{ 759 int error = 0; 760 struct inpcb *inp = sotoinpcb(so); 761 struct tcpcb *tp = NULL; 762 TCPDEBUG0; 763 764 if (so->so_state & SS_ISDISCONNECTED) { 765 error = ECONNABORTED; 766 goto out; 767 } 768 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) 769 return (EINVAL); 770#if NECP 771 else if (necp_socket_should_use_flow_divert(inp)) 772 return (EPROTOTYPE); 773#if CONTENT_FILTER 774 error = cfil_sock_attach(so); 775 if (error != 0) 776 return (error); 777#endif /* CONTENT_FILTER */ 778#endif /* NECP */ 779 780 tp = intotcpcb(inp); 781 TCPDEBUG1(); 782 783 calculate_tcp_clock(); 784 785 in6_mapped_peeraddr(so, nam); 786 COMMON_END(PRU_ACCEPT); 787} 788#endif /* INET6 */ 789 790/* 791 * Mark the connection as being incapable of further output. 792 * 793 * Returns: 0 Success 794 * EINVAL [COMMON_START] 795 * tcp_output:EADDRNOTAVAIL 796 * tcp_output:ENOBUFS 797 * tcp_output:EMSGSIZE 798 * tcp_output:EHOSTUNREACH 799 * tcp_output:ENETUNREACH 800 * tcp_output:ENETDOWN 801 * tcp_output:ENOMEM 802 * tcp_output:EACCES 803 * tcp_output:EMSGSIZE 804 * tcp_output:ENOBUFS 805 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL] 806 */ 807static int 808tcp_usr_shutdown(struct socket *so) 809{ 810 int error = 0; 811 struct inpcb *inp = sotoinpcb(so); 812 struct tcpcb *tp; 813 814 TCPDEBUG0; 815 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) 816 return (EINVAL); 817 818 socantsendmore(so); 819 820 /* 821 * In case we got disconnected from the peer, or if this is 822 * a socket that is to be flow-diverted (but not yet). 823 */ 824 tp = intotcpcb(inp); 825 TCPDEBUG1(); 826 827 if (tp == NULL 828#if NECP 829 || (necp_socket_should_use_flow_divert(inp)) 830#endif /* NECP */ 831 ) { 832 if (tp != NULL) 833 error = EPROTOTYPE; 834 goto out; 835 } 836 837 calculate_tcp_clock(); 838 839 tp = tcp_usrclosed(tp); 840#if MPTCP 841 /* A reset has been sent but socket exists, do not send FIN */ 842 if ((so->so_flags & SOF_MP_SUBFLOW) && 843 (tp) && (tp->t_mpflags & TMPF_RESET)) { 844 goto out; 845 } 846#endif 847#if CONTENT_FILTER 848 /* Don't send a FIN yet */ 849 if (tp && !(so->so_state & SS_ISDISCONNECTED) && 850 cfil_sock_data_pending(&so->so_snd)) 851 goto out; 852#endif /* CONTENT_FILTER */ 853 if (tp) 854 error = tcp_output(tp); 855 COMMON_END(PRU_SHUTDOWN); 856} 857 858/* 859 * After a receive, possibly send window update to peer. 860 */ 861static int 862tcp_usr_rcvd(struct socket *so, __unused int flags) 863{ 864 int error = 0; 865 struct inpcb *inp = sotoinpcb(so); 866 struct tcpcb *tp; 867 868 COMMON_START(); 869 /* In case we got disconnected from the peer */ 870 if (tp == NULL) 871 goto out; 872 tcp_sbrcv_trim(tp, &so->so_rcv); 873 874 tcp_output(tp); 875 876#if CONTENT_FILTER 877 cfil_sock_buf_update(&so->so_rcv); 878#endif /* CONTENT_FILTER */ 879 880 COMMON_END(PRU_RCVD); 881} 882 883/* 884 * Do a send by putting data in output queue and updating urgent 885 * marker if URG set. Possibly send more data. Unlike the other 886 * pru_*() routines, the mbuf chains are our responsibility. We 887 * must either enqueue them or free them. The other pru_* routines 888 * generally are caller-frees. 889 * 890 * Returns: 0 Success 891 * ECONNRESET 892 * EINVAL 893 * ENOBUFS 894 * tcp_connect:EADDRINUSE Address in use 895 * tcp_connect:EADDRNOTAVAIL Address not available. 896 * tcp_connect:EINVAL Invalid argument 897 * tcp_connect:EAFNOSUPPORT Address family not supported [notdef] 898 * tcp_connect:EACCES Permission denied 899 * tcp_connect:EAGAIN Resource unavailable, try again 900 * tcp_connect:EPERM Operation not permitted 901 * tcp_output:EADDRNOTAVAIL 902 * tcp_output:ENOBUFS 903 * tcp_output:EMSGSIZE 904 * tcp_output:EHOSTUNREACH 905 * tcp_output:ENETUNREACH 906 * tcp_output:ENETDOWN 907 * tcp_output:ENOMEM 908 * tcp_output:EACCES 909 * tcp_output:EMSGSIZE 910 * tcp_output:ENOBUFS 911 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL] 912 * tcp6_connect:??? [IPV6 only] 913 */ 914static int 915tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 916 struct sockaddr *nam, struct mbuf *control, struct proc *p) 917{ 918 int error = 0; 919 struct inpcb *inp = sotoinpcb(so); 920 struct tcpcb *tp; 921 uint32_t msgpri = MSG_PRI_DEFAULT; 922#if INET6 923 int isipv6; 924#endif 925 TCPDEBUG0; 926 927 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD 928#if NECP 929 || (necp_socket_should_use_flow_divert(inp)) 930#endif /* NECP */ 931 ) { 932 /* 933 * OOPS! we lost a race, the TCP session got reset after 934 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a 935 * network interrupt in the non-splnet() section of sosend(). 936 */ 937 if (m != NULL) 938 m_freem(m); 939 if (control != NULL) { 940 m_freem(control); 941 control = NULL; 942 } 943 944 if (inp == NULL) 945 error = ECONNRESET; /* XXX EPIPE? */ 946 else 947 error = EPROTOTYPE; 948 tp = NULL; 949 TCPDEBUG1(); 950 goto out; 951 } 952#if INET6 953 isipv6 = nam && nam->sa_family == AF_INET6; 954#endif /* INET6 */ 955 tp = intotcpcb(inp); 956 TCPDEBUG1(); 957 958 calculate_tcp_clock(); 959 960 if (control != NULL) { 961 if (so->so_flags & SOF_ENABLE_MSGS) { 962 /* Get the msg priority from control mbufs */ 963 error = tcp_get_msg_priority(control, &msgpri); 964 if (error) { 965 m_freem(control); 966 if (m != NULL) 967 m_freem(m); 968 control = NULL; 969 m = NULL; 970 goto out; 971 } 972 m_freem(control); 973 control = NULL; 974 } else if (control->m_len) { 975 /* 976 * if not unordered, TCP should not have 977 * control mbufs 978 */ 979 m_freem(control); 980 if (m != NULL) 981 m_freem(m); 982 control = NULL; 983 m = NULL; 984 error = EINVAL; 985 goto out; 986 } 987 } 988 989 if (so->so_flags & SOF_ENABLE_MSGS) { 990 VERIFY(m->m_flags & M_PKTHDR); 991 m->m_pkthdr.msg_pri = msgpri; 992 } 993 994 /* MPTCP sublow socket buffers must not be compressed */ 995 VERIFY(!(so->so_flags & SOF_MP_SUBFLOW) || 996 (so->so_snd.sb_flags & SB_NOCOMPRESS)); 997 998 if(!(flags & PRUS_OOB)) { 999 /* Call msg send if message delivery is enabled */ 1000 if (so->so_flags & SOF_ENABLE_MSGS) 1001 sbappendmsg_snd(&so->so_snd, m); 1002 else 1003 sbappendstream(&so->so_snd, m); 1004 1005 if (nam && tp->t_state < TCPS_SYN_SENT) { 1006 /* 1007 * Do implied connect if not yet connected, 1008 * initialize window to default value, and 1009 * initialize maxseg/maxopd using peer's cached 1010 * MSS. 1011 */ 1012#if INET6 1013 if (isipv6) 1014 error = tcp6_connect(tp, nam, p); 1015 else 1016#endif /* INET6 */ 1017 error = tcp_connect(tp, nam, p); 1018 if (error) 1019 goto out; 1020 tp->snd_wnd = TTCP_CLIENT_SND_WND; 1021 tcp_mss(tp, -1, IFSCOPE_NONE); 1022 } 1023 1024 if (flags & PRUS_EOF) { 1025 /* 1026 * Close the send side of the connection after 1027 * the data is sent. 1028 */ 1029 socantsendmore(so); 1030 tp = tcp_usrclosed(tp); 1031 } 1032 if (tp != NULL) { 1033 if (flags & PRUS_MORETOCOME) 1034 tp->t_flags |= TF_MORETOCOME; 1035 error = tcp_output(tp); 1036 if (flags & PRUS_MORETOCOME) 1037 tp->t_flags &= ~TF_MORETOCOME; 1038 } 1039 } else { 1040 if (sbspace(&so->so_snd) == 0) { 1041 /* if no space is left in sockbuf, 1042 * do not try to squeeze in OOB traffic */ 1043 m_freem(m); 1044 error = ENOBUFS; 1045 goto out; 1046 } 1047 /* 1048 * According to RFC961 (Assigned Protocols), 1049 * the urgent pointer points to the last octet 1050 * of urgent data. We continue, however, 1051 * to consider it to indicate the first octet 1052 * of data past the urgent section. 1053 * Otherwise, snd_up should be one lower. 1054 */ 1055 sbappendstream(&so->so_snd, m); 1056 if (nam && tp->t_state < TCPS_SYN_SENT) { 1057 /* 1058 * Do implied connect if not yet connected, 1059 * initialize window to default value, and 1060 * initialize maxseg/maxopd using peer's cached 1061 * MSS. 1062 */ 1063#if INET6 1064 if (isipv6) 1065 error = tcp6_connect(tp, nam, p); 1066 else 1067#endif /* INET6 */ 1068 error = tcp_connect(tp, nam, p); 1069 if (error) 1070 goto out; 1071 tp->snd_wnd = TTCP_CLIENT_SND_WND; 1072 tcp_mss(tp, -1, IFSCOPE_NONE); 1073 } 1074 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 1075 tp->t_flagsext |= TF_FORCE; 1076 error = tcp_output(tp); 1077 tp->t_flagsext &= ~TF_FORCE; 1078 } 1079 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : 1080 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 1081} 1082 1083/* 1084 * Abort the TCP. 1085 */ 1086static int 1087tcp_usr_abort(struct socket *so) 1088{ 1089 int error = 0; 1090 struct inpcb *inp = sotoinpcb(so); 1091 struct tcpcb *tp; 1092 1093 COMMON_START(); 1094 /* In case we got disconnected from the peer */ 1095 if (tp == NULL) 1096 goto out; 1097 tp = tcp_drop(tp, ECONNABORTED); 1098 so->so_usecount--; 1099 COMMON_END(PRU_ABORT); 1100} 1101 1102/* 1103 * Receive out-of-band data. 1104 * 1105 * Returns: 0 Success 1106 * EINVAL [COMMON_START] 1107 * EINVAL 1108 * EWOULDBLOCK 1109 */ 1110static int 1111tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 1112{ 1113 int error = 0; 1114 struct inpcb *inp = sotoinpcb(so); 1115 struct tcpcb *tp; 1116 1117 COMMON_START(); 1118 if ((so->so_oobmark == 0 && 1119 (so->so_state & SS_RCVATMARK) == 0) || 1120 so->so_options & SO_OOBINLINE || 1121 tp->t_oobflags & TCPOOB_HADDATA) { 1122 error = EINVAL; 1123 goto out; 1124 } 1125 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 1126 error = EWOULDBLOCK; 1127 goto out; 1128 } 1129 m->m_len = 1; 1130 *mtod(m, caddr_t) = tp->t_iobc; 1131 if ((flags & MSG_PEEK) == 0) 1132 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 1133 COMMON_END(PRU_RCVOOB); 1134} 1135 1136/* xxx - should be const */ 1137struct pr_usrreqs tcp_usrreqs = { 1138 .pru_abort = tcp_usr_abort, 1139 .pru_accept = tcp_usr_accept, 1140 .pru_attach = tcp_usr_attach, 1141 .pru_bind = tcp_usr_bind, 1142 .pru_connect = tcp_usr_connect, 1143 .pru_connectx = tcp_usr_connectx, 1144 .pru_control = in_control, 1145 .pru_detach = tcp_usr_detach, 1146 .pru_disconnect = tcp_usr_disconnect, 1147 .pru_disconnectx = tcp_usr_disconnectx, 1148 .pru_listen = tcp_usr_listen, 1149 .pru_peeraddr = in_getpeeraddr, 1150 .pru_rcvd = tcp_usr_rcvd, 1151 .pru_rcvoob = tcp_usr_rcvoob, 1152 .pru_send = tcp_usr_send, 1153 .pru_shutdown = tcp_usr_shutdown, 1154 .pru_sockaddr = in_getsockaddr, 1155 .pru_sosend = sosend, 1156 .pru_soreceive = soreceive, 1157}; 1158 1159#if INET6 1160struct pr_usrreqs tcp6_usrreqs = { 1161 .pru_abort = tcp_usr_abort, 1162 .pru_accept = tcp6_usr_accept, 1163 .pru_attach = tcp_usr_attach, 1164 .pru_bind = tcp6_usr_bind, 1165 .pru_connect = tcp6_usr_connect, 1166 .pru_connectx = tcp6_usr_connectx, 1167 .pru_control = in6_control, 1168 .pru_detach = tcp_usr_detach, 1169 .pru_disconnect = tcp_usr_disconnect, 1170 .pru_disconnectx = tcp_usr_disconnectx, 1171 .pru_listen = tcp6_usr_listen, 1172 .pru_peeraddr = in6_mapped_peeraddr, 1173 .pru_rcvd = tcp_usr_rcvd, 1174 .pru_rcvoob = tcp_usr_rcvoob, 1175 .pru_send = tcp_usr_send, 1176 .pru_shutdown = tcp_usr_shutdown, 1177 .pru_sockaddr = in6_mapped_sockaddr, 1178 .pru_sosend = sosend, 1179 .pru_soreceive = soreceive, 1180}; 1181#endif /* INET6 */ 1182 1183/* 1184 * Common subroutine to open a TCP connection to remote host specified 1185 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 1186 * port number if needed. Call in_pcbladdr to do the routing and to choose 1187 * a local host address (interface). If there is an existing incarnation 1188 * of the same connection in TIME-WAIT state and if the remote host was 1189 * sending CC options and if the connection duration was < MSL, then 1190 * truncate the previous TIME-WAIT state and proceed. 1191 * Initialize connection parameters and enter SYN-SENT state. 1192 * 1193 * Returns: 0 Success 1194 * EADDRINUSE 1195 * EINVAL 1196 * in_pcbbind:EADDRNOTAVAIL Address not available. 1197 * in_pcbbind:EINVAL Invalid argument 1198 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef] 1199 * in_pcbbind:EACCES Permission denied 1200 * in_pcbbind:EADDRINUSE Address in use 1201 * in_pcbbind:EAGAIN Resource unavailable, try again 1202 * in_pcbbind:EPERM Operation not permitted 1203 * in_pcbladdr:EINVAL Invalid argument 1204 * in_pcbladdr:EAFNOSUPPORT Address family not supported 1205 * in_pcbladdr:EADDRNOTAVAIL Address not available 1206 */ 1207static int 1208tcp_connect(tp, nam, p) 1209 register struct tcpcb *tp; 1210 struct sockaddr *nam; 1211 struct proc *p; 1212{ 1213 struct inpcb *inp = tp->t_inpcb, *oinp; 1214 struct socket *so = inp->inp_socket; 1215 struct tcpcb *otp; 1216 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; 1217 struct in_addr laddr; 1218 struct rmxp_tao *taop; 1219 struct rmxp_tao tao_noncached; 1220 int error = 0; 1221 struct ifnet *outif = NULL; 1222 1223 if (inp->inp_lport == 0) { 1224 error = in_pcbbind(inp, NULL, p); 1225 if (error) 1226 goto done; 1227 } 1228 1229 /* 1230 * Cannot simply call in_pcbconnect, because there might be an 1231 * earlier incarnation of this same connection still in 1232 * TIME_WAIT state, creating an ADDRINUSE error. 1233 */ 1234 error = in_pcbladdr(inp, nam, &laddr, IFSCOPE_NONE, &outif); 1235 if (error) 1236 goto done; 1237 1238 tcp_unlock(inp->inp_socket, 0, 0); 1239 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 1240 sin->sin_addr, sin->sin_port, 1241 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr : laddr, 1242 inp->inp_lport, 0, NULL); 1243 1244 tcp_lock(inp->inp_socket, 0, 0); 1245 if (oinp) { 1246 if (oinp != inp) /* 4143933: avoid deadlock if inp == oinp */ 1247 tcp_lock(oinp->inp_socket, 1, 0); 1248 if (in_pcb_checkstate(oinp, WNT_RELEASE, 1) == WNT_STOPUSING) { 1249 if (oinp != inp) 1250 tcp_unlock(oinp->inp_socket, 1, 0); 1251 goto skip_oinp; 1252 } 1253 1254 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && 1255 otp->t_state == TCPS_TIME_WAIT && 1256 ((int)(tcp_now - otp->t_starttime)) < tcp_msl && 1257 (otp->t_flags & TF_RCVD_CC)) { 1258 otp = tcp_close(otp); 1259 } else { 1260 printf("tcp_connect: inp=0x%llx err=EADDRINUSE\n", 1261 (uint64_t)VM_KERNEL_ADDRPERM(inp)); 1262 if (oinp != inp) 1263 tcp_unlock(oinp->inp_socket, 1, 0); 1264 error = EADDRINUSE; 1265 goto done; 1266 } 1267 if (oinp != inp) 1268 tcp_unlock(oinp->inp_socket, 1, 0); 1269 } 1270skip_oinp: 1271 if ((inp->inp_laddr.s_addr == INADDR_ANY ? laddr.s_addr : 1272 inp->inp_laddr.s_addr) == sin->sin_addr.s_addr && 1273 inp->inp_lport == sin->sin_port) { 1274 error = EINVAL; 1275 goto done; 1276 } 1277 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { 1278 /*lock inversion issue, mostly with udp multicast packets */ 1279 socket_unlock(inp->inp_socket, 0); 1280 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); 1281 socket_lock(inp->inp_socket, 0); 1282 } 1283 if (inp->inp_laddr.s_addr == INADDR_ANY) { 1284 inp->inp_laddr = laddr; 1285 /* no reference needed */ 1286 inp->inp_last_outifp = outif; 1287 inp->inp_flags |= INP_INADDR_ANY; 1288 } 1289 inp->inp_faddr = sin->sin_addr; 1290 inp->inp_fport = sin->sin_port; 1291 in_pcbrehash(inp); 1292 lck_rw_done(inp->inp_pcbinfo->ipi_lock); 1293 1294 if (inp->inp_flowhash == 0) 1295 inp->inp_flowhash = inp_calc_flowhash(inp); 1296 1297 tcp_set_max_rwinscale(tp, so); 1298 1299 soisconnecting(so); 1300 tcpstat.tcps_connattempt++; 1301 tp->t_state = TCPS_SYN_SENT; 1302 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_CONN_KEEPINIT(tp)); 1303 tp->iss = tcp_new_isn(tp); 1304 tcp_sendseqinit(tp); 1305 if (nstat_collect) 1306 nstat_route_connect_attempt(inp->inp_route.ro_rt); 1307 1308 /* 1309 * Generate a CC value for this connection and 1310 * check whether CC or CCnew should be used. 1311 */ 1312 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { 1313 taop = &tao_noncached; 1314 bzero(taop, sizeof(*taop)); 1315 } 1316 1317 tp->cc_send = CC_INC(tcp_ccgen); 1318 if (taop->tao_ccsent != 0 && 1319 CC_GEQ(tp->cc_send, taop->tao_ccsent)) { 1320 taop->tao_ccsent = tp->cc_send; 1321 } else { 1322 taop->tao_ccsent = 0; 1323 tp->t_flags |= TF_SENDCCNEW; 1324 } 1325 1326done: 1327 if (outif != NULL) 1328 ifnet_release(outif); 1329 1330 return (error); 1331} 1332 1333#if INET6 1334static int 1335tcp6_connect(tp, nam, p) 1336 register struct tcpcb *tp; 1337 struct sockaddr *nam; 1338 struct proc *p; 1339{ 1340 struct inpcb *inp = tp->t_inpcb, *oinp; 1341 struct socket *so = inp->inp_socket; 1342 struct tcpcb *otp; 1343 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam; 1344 struct in6_addr addr6; 1345 struct rmxp_tao *taop; 1346 struct rmxp_tao tao_noncached; 1347 int error = 0; 1348 struct ifnet *outif = NULL; 1349 1350 if (inp->inp_lport == 0) { 1351 error = in6_pcbbind(inp, NULL, p); 1352 if (error) 1353 goto done; 1354 } 1355 1356 /* 1357 * Cannot simply call in_pcbconnect, because there might be an 1358 * earlier incarnation of this same connection still in 1359 * TIME_WAIT state, creating an ADDRINUSE error. 1360 * 1361 * in6_pcbladdr() might return an ifp with its reference held 1362 * even in the error case, so make sure that it's released 1363 * whenever it's non-NULL. 1364 */ 1365 error = in6_pcbladdr(inp, nam, &addr6, &outif); 1366 if (error) 1367 goto done; 1368 tcp_unlock(inp->inp_socket, 0, 0); 1369 oinp = in6_pcblookup_hash(inp->inp_pcbinfo, 1370 &sin6->sin6_addr, sin6->sin6_port, 1371 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 1372 ? &addr6 1373 : &inp->in6p_laddr, 1374 inp->inp_lport, 0, NULL); 1375 tcp_lock(inp->inp_socket, 0, 0); 1376 if (oinp) { 1377 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && 1378 otp->t_state == TCPS_TIME_WAIT && 1379 ((int)(tcp_now - otp->t_starttime)) < tcp_msl && 1380 (otp->t_flags & TF_RCVD_CC)) { 1381 otp = tcp_close(otp); 1382 } else { 1383 error = EADDRINUSE; 1384 goto done; 1385 } 1386 } 1387 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { 1388 /*lock inversion issue, mostly with udp multicast packets */ 1389 socket_unlock(inp->inp_socket, 0); 1390 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); 1391 socket_lock(inp->inp_socket, 0); 1392 } 1393 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 1394 inp->in6p_laddr = addr6; 1395 inp->in6p_last_outifp = outif; /* no reference needed */ 1396 inp->in6p_flags |= INP_IN6ADDR_ANY; 1397 } 1398 inp->in6p_faddr = sin6->sin6_addr; 1399 inp->inp_fport = sin6->sin6_port; 1400 if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0) 1401 inp->inp_flow = sin6->sin6_flowinfo; 1402 in_pcbrehash(inp); 1403 lck_rw_done(inp->inp_pcbinfo->ipi_lock); 1404 1405 if (inp->inp_flowhash == 0) 1406 inp->inp_flowhash = inp_calc_flowhash(inp); 1407 /* update flowinfo - RFC 6437 */ 1408 if (inp->inp_flow == 0 && inp->in6p_flags & IN6P_AUTOFLOWLABEL) { 1409 inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; 1410 inp->inp_flow |= 1411 (htonl(inp->inp_flowhash) & IPV6_FLOWLABEL_MASK); 1412 } 1413 1414 tcp_set_max_rwinscale(tp, so); 1415 1416 soisconnecting(so); 1417 tcpstat.tcps_connattempt++; 1418 tp->t_state = TCPS_SYN_SENT; 1419 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 1420 TCP_CONN_KEEPINIT(tp)); 1421 tp->iss = tcp_new_isn(tp); 1422 tcp_sendseqinit(tp); 1423 if (nstat_collect) 1424 nstat_route_connect_attempt(inp->inp_route.ro_rt); 1425 1426 /* 1427 * Generate a CC value for this connection and 1428 * check whether CC or CCnew should be used. 1429 */ 1430 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { 1431 taop = &tao_noncached; 1432 bzero(taop, sizeof(*taop)); 1433 } 1434 1435 tp->cc_send = CC_INC(tcp_ccgen); 1436 if (taop->tao_ccsent != 0 && 1437 CC_GEQ(tp->cc_send, taop->tao_ccsent)) { 1438 taop->tao_ccsent = tp->cc_send; 1439 } else { 1440 taop->tao_ccsent = 0; 1441 tp->t_flags |= TF_SENDCCNEW; 1442 } 1443 1444done: 1445 if (outif != NULL) 1446 ifnet_release(outif); 1447 1448 return (error); 1449} 1450#endif /* INET6 */ 1451 1452/* 1453 * Export TCP internal state information via a struct tcp_info 1454 */ 1455__private_extern__ void 1456tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) 1457{ 1458 struct inpcb *inp = tp->t_inpcb; 1459 1460 bzero(ti, sizeof(*ti)); 1461 1462 ti->tcpi_state = tp->t_state; 1463 1464 if (tp->t_state > TCPS_LISTEN) { 1465 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) 1466 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; 1467 if (tp->t_flags & TF_SACK_PERMIT) 1468 ti->tcpi_options |= TCPI_OPT_SACK; 1469 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { 1470 ti->tcpi_options |= TCPI_OPT_WSCALE; 1471 ti->tcpi_snd_wscale = tp->snd_scale; 1472 ti->tcpi_rcv_wscale = tp->rcv_scale; 1473 } 1474 1475 /* Are we in retranmission episode */ 1476 if (tp->snd_max != tp->snd_nxt) 1477 ti->tcpi_flags |= TCPI_FLAG_LOSSRECOVERY; 1478 else 1479 ti->tcpi_flags &= ~TCPI_FLAG_LOSSRECOVERY; 1480 1481 ti->tcpi_rto = tp->t_timer[TCPT_REXMT] ? tp->t_rxtcur : 0; 1482 ti->tcpi_snd_mss = tp->t_maxseg; 1483 ti->tcpi_rcv_mss = tp->t_maxseg; 1484 1485 ti->tcpi_rttcur = tp->t_rttcur; 1486 ti->tcpi_srtt = tp->t_srtt >> TCP_RTT_SHIFT; 1487 ti->tcpi_rttvar = tp->t_rttvar >> TCP_RTTVAR_SHIFT; 1488 ti->tcpi_rttbest = tp->t_rttbest >> TCP_RTT_SHIFT; 1489 1490 ti->tcpi_snd_ssthresh = tp->snd_ssthresh; 1491 ti->tcpi_snd_cwnd = tp->snd_cwnd; 1492 ti->tcpi_snd_sbbytes = tp->t_inpcb->inp_socket->so_snd.sb_cc; 1493 1494 ti->tcpi_rcv_space = tp->rcv_wnd; 1495 1496 ti->tcpi_snd_wnd = tp->snd_wnd; 1497 ti->tcpi_snd_nxt = tp->snd_nxt; 1498 ti->tcpi_rcv_nxt = tp->rcv_nxt; 1499 1500 /* convert bytes/msec to bits/sec */ 1501 if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 && 1502 tp->t_bwmeas != NULL) { 1503 ti->tcpi_snd_bw = (tp->t_bwmeas->bw_sndbw * 8000); 1504 } 1505 1506 ti->tcpi_last_outif = (tp->t_inpcb->inp_last_outifp == NULL) ? 0 : 1507 tp->t_inpcb->inp_last_outifp->if_index; 1508 1509 //atomic_get_64(ti->tcpi_txbytes, &inp->inp_stat->txbytes); 1510 ti->tcpi_txpackets = inp->inp_stat->txpackets; 1511 ti->tcpi_txbytes = inp->inp_stat->txbytes; 1512 ti->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes; 1513 ti->tcpi_txunacked = tp->snd_max - tp->snd_una; 1514 1515 //atomic_get_64(ti->tcpi_rxbytes, &inp->inp_stat->rxbytes); 1516 ti->tcpi_rxpackets = inp->inp_stat->rxpackets; 1517 ti->tcpi_rxbytes = inp->inp_stat->rxbytes; 1518 ti->tcpi_rxduplicatebytes = tp->t_stat.rxduplicatebytes; 1519 ti->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes; 1520 1521 if (tp->t_state > TCPS_LISTEN) { 1522 ti->tcpi_synrexmits = tp->t_stat.synrxtshift; 1523 } 1524 ti->tcpi_cell_rxpackets = inp->inp_cstat->rxpackets; 1525 ti->tcpi_cell_rxbytes = inp->inp_cstat->rxbytes; 1526 ti->tcpi_cell_txpackets = inp->inp_cstat->txpackets; 1527 ti->tcpi_cell_txbytes = inp->inp_cstat->txbytes; 1528 1529 ti->tcpi_wifi_rxpackets = inp->inp_wstat->rxpackets; 1530 ti->tcpi_wifi_rxbytes = inp->inp_wstat->rxbytes; 1531 ti->tcpi_wifi_txpackets = inp->inp_wstat->txpackets; 1532 ti->tcpi_wifi_txbytes = inp->inp_wstat->txbytes; 1533 1534 ti->tcpi_wired_rxpackets = inp->inp_Wstat->rxpackets; 1535 ti->tcpi_wired_rxbytes = inp->inp_Wstat->rxbytes; 1536 ti->tcpi_wired_txpackets = inp->inp_Wstat->txpackets; 1537 ti->tcpi_wired_txbytes = inp->inp_Wstat->txbytes; 1538 } 1539} 1540 1541__private_extern__ errno_t 1542tcp_fill_info_for_info_tuple(struct info_tuple *itpl, struct tcp_info *ti) 1543{ 1544 struct inpcbinfo *pcbinfo = NULL; 1545 struct inpcb *inp = NULL; 1546 struct socket *so; 1547 struct tcpcb *tp; 1548 1549 if (itpl->itpl_proto == IPPROTO_TCP) 1550 pcbinfo = &tcbinfo; 1551 else 1552 return EINVAL; 1553 1554 if (itpl->itpl_local_sa.sa_family == AF_INET && 1555 itpl->itpl_remote_sa.sa_family == AF_INET) { 1556 inp = in_pcblookup_hash(pcbinfo, 1557 itpl->itpl_remote_sin.sin_addr, 1558 itpl->itpl_remote_sin.sin_port, 1559 itpl->itpl_local_sin.sin_addr, 1560 itpl->itpl_local_sin.sin_port, 1561 0, NULL); 1562 } else if (itpl->itpl_local_sa.sa_family == AF_INET6 && 1563 itpl->itpl_remote_sa.sa_family == AF_INET6) { 1564 struct in6_addr ina6_local; 1565 struct in6_addr ina6_remote; 1566 1567 ina6_local = itpl->itpl_local_sin6.sin6_addr; 1568 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_local) && 1569 itpl->itpl_local_sin6.sin6_scope_id) 1570 ina6_local.s6_addr16[1] = htons(itpl->itpl_local_sin6.sin6_scope_id); 1571 1572 ina6_remote = itpl->itpl_remote_sin6.sin6_addr; 1573 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_remote) && 1574 itpl->itpl_remote_sin6.sin6_scope_id) 1575 ina6_remote.s6_addr16[1] = htons(itpl->itpl_remote_sin6.sin6_scope_id); 1576 1577 inp = in6_pcblookup_hash(pcbinfo, 1578 &ina6_remote, 1579 itpl->itpl_remote_sin6.sin6_port, 1580 &ina6_local, 1581 itpl->itpl_local_sin6.sin6_port, 1582 0, NULL); 1583 } else { 1584 return EINVAL; 1585 } 1586 if (inp == NULL || (so = inp->inp_socket) == NULL) 1587 return ENOENT; 1588 1589 socket_lock(so, 0); 1590 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { 1591 socket_unlock(so, 0); 1592 return ENOENT; 1593 } 1594 tp = intotcpcb(inp); 1595 1596 tcp_fill_info(tp, ti); 1597 socket_unlock(so, 0); 1598 1599 return 0; 1600} 1601 1602 1603__private_extern__ int 1604tcp_sysctl_info(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) 1605{ 1606 int error; 1607 struct tcp_info ti; 1608 struct info_tuple itpl; 1609 proc_t caller = PROC_NULL; 1610 proc_t caller_parent = PROC_NULL; 1611 char command_name[MAXCOMLEN + 1] = ""; 1612 char parent_name[MAXCOMLEN + 1] = ""; 1613 1614 if ((caller = proc_self()) != PROC_NULL) { 1615 /* get process name */ 1616 strlcpy(command_name, caller->p_comm, sizeof(command_name)); 1617 1618 /* get parent process name if possible */ 1619 if ((caller_parent = proc_find(caller->p_ppid)) != PROC_NULL) { 1620 strlcpy(parent_name, caller_parent->p_comm, 1621 sizeof(parent_name)); 1622 proc_rele(caller_parent); 1623 } 1624 1625 if ((escape_str(command_name, strlen(command_name), 1626 sizeof(command_name)) == 0) && 1627 (escape_str(parent_name, strlen(parent_name), 1628 sizeof(parent_name)) == 0)) { 1629 kern_asl_msg(LOG_DEBUG, "messagetracer", 1630 5, 1631 "com.apple.message.domain", 1632 "com.apple.kernel.tcpstat", /* 1 */ 1633 "com.apple.message.signature", 1634 "tcpinfo", /* 2 */ 1635 "com.apple.message.signature2", command_name, /* 3 */ 1636 "com.apple.message.signature3", parent_name, /* 4 */ 1637 "com.apple.message.summarize", "YES", /* 5 */ 1638 NULL); 1639 } 1640 } 1641 1642 if (caller != PROC_NULL) 1643 proc_rele(caller); 1644 1645 if (req->newptr == USER_ADDR_NULL) { 1646 return EINVAL; 1647 } 1648 if (req->newlen < sizeof(struct info_tuple)) { 1649 return EINVAL; 1650 } 1651 error = SYSCTL_IN(req, &itpl, sizeof(struct info_tuple)); 1652 if (error != 0) { 1653 return error; 1654 } 1655 error = tcp_fill_info_for_info_tuple(&itpl, &ti); 1656 if (error != 0) { 1657 return error; 1658 } 1659 error = SYSCTL_OUT(req, &ti, sizeof(struct tcp_info)); 1660 if (error != 0) { 1661 return error; 1662 } 1663 1664 return 0; 1665} 1666 1667static int 1668tcp_lookup_peer_pid_locked(struct socket *so, pid_t *out_pid) 1669{ 1670 int error = EHOSTUNREACH; 1671 *out_pid = -1; 1672 if ((so->so_state & SS_ISCONNECTED) == 0) return ENOTCONN; 1673 1674 struct inpcb *inp = (struct inpcb*)so->so_pcb; 1675 uint16_t lport = inp->inp_lport; 1676 uint16_t fport = inp->inp_fport; 1677 struct inpcb *finp = NULL; 1678 1679 if (inp->inp_vflag & INP_IPV6) { 1680 struct in6_addr laddr6 = inp->in6p_laddr; 1681 struct in6_addr faddr6 = inp->in6p_faddr; 1682 socket_unlock(so, 0); 1683 finp = in6_pcblookup_hash(&tcbinfo, &laddr6, lport, &faddr6, fport, 0, NULL); 1684 socket_lock(so, 0); 1685 } else if (inp->inp_vflag & INP_IPV4) { 1686 struct in_addr laddr4 = inp->inp_laddr; 1687 struct in_addr faddr4 = inp->inp_faddr; 1688 socket_unlock(so, 0); 1689 finp = in_pcblookup_hash(&tcbinfo, laddr4, lport, faddr4, fport, 0, NULL); 1690 socket_lock(so, 0); 1691 } 1692 1693 if (finp) { 1694 *out_pid = finp->inp_socket->last_pid; 1695 error = 0; 1696 in_pcb_checkstate(finp, WNT_RELEASE, 0); 1697 } 1698 1699 return error; 1700} 1701 1702void 1703tcp_getconninfo(struct socket *so, struct conninfo_tcp *tcp_ci) 1704{ 1705 (void) tcp_lookup_peer_pid_locked(so, &tcp_ci->tcpci_peer_pid); 1706 tcp_fill_info(sototcpcb(so), &tcp_ci->tcpci_tcp_info); 1707} 1708 1709/* 1710 * The new sockopt interface makes it possible for us to block in the 1711 * copyin/out step (if we take a page fault). Taking a page fault at 1712 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now 1713 * use TSM, there probably isn't any need for this function to run at 1714 * splnet() any more. This needs more examination.) 1715 */ 1716int 1717tcp_ctloutput(so, sopt) 1718 struct socket *so; 1719 struct sockopt *sopt; 1720{ 1721 int error, opt, optval; 1722 struct inpcb *inp; 1723 struct tcpcb *tp; 1724 1725 error = 0; 1726 inp = sotoinpcb(so); 1727 if (inp == NULL) { 1728 return (ECONNRESET); 1729 } 1730 /* Allow <SOL_SOCKET,SO_FLUSH/SO_TRAFFIC_MGT_BACKGROUND> at this level */ 1731 if (sopt->sopt_level != IPPROTO_TCP && 1732 !(sopt->sopt_level == SOL_SOCKET && (sopt->sopt_name == SO_FLUSH || 1733 sopt->sopt_name == SO_TRAFFIC_MGT_BACKGROUND))) { 1734#if INET6 1735 if (SOCK_CHECK_DOM(so, PF_INET6)) 1736 error = ip6_ctloutput(so, sopt); 1737 else 1738#endif /* INET6 */ 1739 error = ip_ctloutput(so, sopt); 1740 return (error); 1741 } 1742 tp = intotcpcb(inp); 1743 if (tp == NULL) { 1744 return (ECONNRESET); 1745 } 1746 1747 calculate_tcp_clock(); 1748 1749 switch (sopt->sopt_dir) { 1750 case SOPT_SET: 1751 switch (sopt->sopt_name) { 1752 case TCP_NODELAY: 1753 case TCP_NOOPT: 1754 case TCP_NOPUSH: 1755 case TCP_ENABLE_ECN: 1756 error = sooptcopyin(sopt, &optval, sizeof optval, 1757 sizeof optval); 1758 if (error) 1759 break; 1760 1761 switch (sopt->sopt_name) { 1762 case TCP_NODELAY: 1763 opt = TF_NODELAY; 1764 break; 1765 case TCP_NOOPT: 1766 opt = TF_NOOPT; 1767 break; 1768 case TCP_NOPUSH: 1769 opt = TF_NOPUSH; 1770 break; 1771 case TCP_ENABLE_ECN: 1772 opt = TF_ENABLE_ECN; 1773 break; 1774 default: 1775 opt = 0; /* dead code to fool gcc */ 1776 break; 1777 } 1778 1779 if (optval) 1780 tp->t_flags |= opt; 1781 else 1782 tp->t_flags &= ~opt; 1783 break; 1784 case TCP_RXT_FINDROP: 1785 case TCP_NOTIMEWAIT: 1786 error = sooptcopyin(sopt, &optval, sizeof optval, 1787 sizeof optval); 1788 if (error) 1789 break; 1790 switch (sopt->sopt_name) { 1791 case TCP_RXT_FINDROP: 1792 opt = TF_RXTFINDROP; 1793 break; 1794 case TCP_NOTIMEWAIT: 1795 opt = TF_NOTIMEWAIT; 1796 break; 1797 default: 1798 opt = 0; 1799 break; 1800 } 1801 if (optval) 1802 tp->t_flagsext |= opt; 1803 else 1804 tp->t_flagsext &= ~opt; 1805 break; 1806 case TCP_MEASURE_SND_BW: 1807 error = sooptcopyin(sopt, &optval, sizeof optval, 1808 sizeof optval); 1809 if (error) 1810 break; 1811 opt = TF_MEASURESNDBW; 1812 if (optval) { 1813 if (tp->t_bwmeas == NULL) { 1814 tp->t_bwmeas = tcp_bwmeas_alloc(tp); 1815 if (tp->t_bwmeas == NULL) { 1816 error = ENOMEM; 1817 break; 1818 } 1819 } 1820 tp->t_flagsext |= opt; 1821 } else { 1822 tp->t_flagsext &= ~opt; 1823 /* Reset snd bw measurement state */ 1824 tp->t_flagsext &= ~(TF_BWMEAS_INPROGRESS); 1825 if (tp->t_bwmeas != NULL) { 1826 tcp_bwmeas_free(tp); 1827 } 1828 } 1829 break; 1830 case TCP_MEASURE_BW_BURST: { 1831 struct tcp_measure_bw_burst in; 1832 uint32_t minpkts, maxpkts; 1833 bzero(&in, sizeof(in)); 1834 1835 error = sooptcopyin(sopt, &in, sizeof(in), 1836 sizeof(in)); 1837 if (error) 1838 break; 1839 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 || 1840 tp->t_bwmeas == NULL) { 1841 error = EINVAL; 1842 break; 1843 } 1844 minpkts = (in.min_burst_size != 0) ? in.min_burst_size : 1845 tp->t_bwmeas->bw_minsizepkts; 1846 maxpkts = (in.max_burst_size != 0) ? in.max_burst_size : 1847 tp->t_bwmeas->bw_maxsizepkts; 1848 if (minpkts > maxpkts) { 1849 error = EINVAL; 1850 break; 1851 } 1852 tp->t_bwmeas->bw_minsizepkts = minpkts; 1853 tp->t_bwmeas->bw_maxsizepkts = maxpkts; 1854 tp->t_bwmeas->bw_minsize = (minpkts * tp->t_maxseg); 1855 tp->t_bwmeas->bw_maxsize = (maxpkts * tp->t_maxseg); 1856 break; 1857 } 1858 case TCP_MAXSEG: 1859 error = sooptcopyin(sopt, &optval, sizeof optval, 1860 sizeof optval); 1861 if (error) 1862 break; 1863 1864 if (optval > 0 && optval <= tp->t_maxseg && 1865 optval + 40 >= tcp_minmss) 1866 tp->t_maxseg = optval; 1867 else 1868 error = EINVAL; 1869 break; 1870 1871 case TCP_KEEPALIVE: 1872 error = sooptcopyin(sopt, &optval, sizeof optval, 1873 sizeof optval); 1874 if (error) 1875 break; 1876 if (optval < 0 || optval > UINT32_MAX/TCP_RETRANSHZ) { 1877 error = EINVAL; 1878 } else { 1879 tp->t_keepidle = optval * TCP_RETRANSHZ; 1880 /* reset the timer to new value */ 1881 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 1882 TCP_CONN_KEEPIDLE(tp)); 1883 tcp_check_timer_state(tp); 1884 } 1885 break; 1886 1887 case TCP_CONNECTIONTIMEOUT: 1888 error = sooptcopyin(sopt, &optval, sizeof optval, 1889 sizeof optval); 1890 if (error) 1891 break; 1892 if (optval < 0 || optval > UINT32_MAX/TCP_RETRANSHZ) { 1893 error = EINVAL; 1894 } else { 1895 tp->t_keepinit = optval * TCP_RETRANSHZ; 1896 if (tp->t_state == TCPS_SYN_RECEIVED || 1897 tp->t_state == TCPS_SYN_SENT) { 1898 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 1899 TCP_CONN_KEEPINIT(tp)); 1900 tcp_check_timer_state(tp); 1901 } 1902 } 1903 break; 1904 1905 case TCP_KEEPINTVL: 1906 error = sooptcopyin(sopt, &optval, sizeof(optval), 1907 sizeof(optval)); 1908 if (error) 1909 break; 1910 if (optval < 0 || optval > UINT32_MAX/TCP_RETRANSHZ) { 1911 error = EINVAL; 1912 } else { 1913 tp->t_keepintvl = optval * TCP_RETRANSHZ; 1914 if (tp->t_state == TCPS_FIN_WAIT_2 && 1915 TCP_CONN_MAXIDLE(tp) > 0) { 1916 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, 1917 TCP_CONN_MAXIDLE(tp)); 1918 tcp_check_timer_state(tp); 1919 } 1920 } 1921 break; 1922 1923 case TCP_KEEPCNT: 1924 error = sooptcopyin(sopt, &optval, sizeof(optval), 1925 sizeof(optval)); 1926 if (error) 1927 break; 1928 if (optval < 0 || optval > INT32_MAX) { 1929 error = EINVAL; 1930 } else { 1931 tp->t_keepcnt = optval; 1932 if (tp->t_state == TCPS_FIN_WAIT_2 && 1933 TCP_CONN_MAXIDLE(tp) > 0) { 1934 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, 1935 TCP_CONN_MAXIDLE(tp)); 1936 tcp_check_timer_state(tp); 1937 } 1938 } 1939 break; 1940 1941 case PERSIST_TIMEOUT: 1942 error = sooptcopyin(sopt, &optval, sizeof optval, 1943 sizeof optval); 1944 if (error) 1945 break; 1946 if (optval < 0) 1947 error = EINVAL; 1948 else 1949 tp->t_persist_timeout = optval * TCP_RETRANSHZ; 1950 break; 1951 case TCP_RXT_CONNDROPTIME: 1952 error = sooptcopyin(sopt, &optval, sizeof(optval), 1953 sizeof(optval)); 1954 if (error) 1955 break; 1956 if (optval < 0) 1957 error = EINVAL; 1958 else 1959 tp->t_rxt_conndroptime = optval * TCP_RETRANSHZ; 1960 break; 1961 case TCP_NOTSENT_LOWAT: 1962 error = sooptcopyin(sopt, &optval, sizeof(optval), 1963 sizeof(optval)); 1964 if (error) 1965 break; 1966 if (optval < 0) { 1967 error = EINVAL; 1968 break; 1969 } else { 1970 if (optval == 0) { 1971 so->so_flags &= ~(SOF_NOTSENT_LOWAT); 1972 tp->t_notsent_lowat = 0; 1973 } else { 1974 so->so_flags |= SOF_NOTSENT_LOWAT; 1975 tp->t_notsent_lowat = optval; 1976 } 1977 } 1978 break; 1979 case TCP_ADAPTIVE_READ_TIMEOUT: 1980 error = sooptcopyin(sopt, &optval, sizeof (optval), 1981 sizeof(optval)); 1982 if (error) 1983 break; 1984 if (optval < 0 || 1985 optval > TCP_ADAPTIVE_TIMEOUT_MAX) { 1986 error = EINVAL; 1987 break; 1988 } else if (optval == 0) { 1989 tp->t_adaptive_rtimo = 0; 1990 tcp_keepalive_reset(tp); 1991 } else { 1992 tp->t_adaptive_rtimo = optval; 1993 } 1994 break; 1995 case TCP_ADAPTIVE_WRITE_TIMEOUT: 1996 error = sooptcopyin(sopt, &optval, sizeof (optval), 1997 sizeof (optval)); 1998 if (error) 1999 break; 2000 if (optval < 0 || 2001 optval > TCP_ADAPTIVE_TIMEOUT_MAX) { 2002 error = EINVAL; 2003 break; 2004 } else { 2005 tp->t_adaptive_wtimo = optval; 2006 } 2007 break; 2008 case TCP_ENABLE_MSGS: 2009 error = sooptcopyin(sopt, &optval, sizeof(optval), 2010 sizeof(optval)); 2011 if (error) 2012 break; 2013 if (optval < 0 || optval > 1) { 2014 error = EINVAL; 2015 } else if (optval == 1) { 2016 /* 2017 * Check if messages option is already 2018 * enabled, if so return. 2019 */ 2020 if (so->so_flags & SOF_ENABLE_MSGS) { 2021 VERIFY(so->so_msg_state != NULL); 2022 break; 2023 } 2024 2025 /* 2026 * allocate memory for storing message 2027 * related state 2028 */ 2029 VERIFY(so->so_msg_state == NULL); 2030 MALLOC(so->so_msg_state, 2031 struct msg_state *, 2032 sizeof(struct msg_state), 2033 M_TEMP, M_WAITOK | M_ZERO); 2034 if (so->so_msg_state == NULL) { 2035 error = ENOMEM; 2036 break; 2037 } 2038 2039 /* Enable message delivery */ 2040 so->so_flags |= SOF_ENABLE_MSGS; 2041 } else { 2042 /* 2043 * Can't disable message delivery on socket 2044 * because of restrictions imposed by 2045 * encoding/decoding 2046 */ 2047 error = EINVAL; 2048 } 2049 break; 2050 case TCP_SENDMOREACKS: 2051 error = sooptcopyin(sopt, &optval, sizeof(optval), 2052 sizeof(optval)); 2053 if (error) 2054 break; 2055 if (optval < 0 || optval > 1) { 2056 error = EINVAL; 2057 } else if (optval == 0) { 2058 tp->t_flagsext &= ~(TF_NOSTRETCHACK); 2059 } else { 2060 tp->t_flagsext |= TF_NOSTRETCHACK; 2061 } 2062 break; 2063 case TCP_DISABLE_BLACKHOLE_DETECTION: 2064 error = sooptcopyin(sopt, &optval, sizeof(optval), 2065 sizeof(optval)); 2066 if (error) 2067 break; 2068 if (optval < 0 || optval > 1) { 2069 error = EINVAL; 2070 } else if (optval == 0) { 2071 tp->t_flagsext &= ~TF_NOBLACKHOLE_DETECTION; 2072 } else { 2073 tp->t_flagsext |= TF_NOBLACKHOLE_DETECTION; 2074 if ((tp->t_flags & TF_BLACKHOLE) && 2075 tp->t_pmtud_saved_maxopd > 0) 2076 tcp_pmtud_revert_segment_size(tp); 2077 } 2078 break; 2079 case SO_FLUSH: 2080 if ((error = sooptcopyin(sopt, &optval, sizeof (optval), 2081 sizeof (optval))) != 0) 2082 break; 2083 2084 error = inp_flush(inp, optval); 2085 break; 2086 2087 case SO_TRAFFIC_MGT_BACKGROUND: 2088 if ((error = sooptcopyin(sopt, &optval, sizeof (optval), 2089 sizeof (optval))) != 0) 2090 break; 2091 2092 if (optval) { 2093 socket_set_traffic_mgt_flags_locked(so, 2094 TRAFFIC_MGT_SO_BACKGROUND); 2095 } else { 2096 socket_clear_traffic_mgt_flags_locked(so, 2097 TRAFFIC_MGT_SO_BACKGROUND); 2098 } 2099 break; 2100 2101 default: 2102 error = ENOPROTOOPT; 2103 break; 2104 } 2105 break; 2106 2107 case SOPT_GET: 2108 switch (sopt->sopt_name) { 2109 case TCP_NODELAY: 2110 optval = tp->t_flags & TF_NODELAY; 2111 break; 2112 case TCP_MAXSEG: 2113 optval = tp->t_maxseg; 2114 break; 2115 case TCP_KEEPALIVE: 2116 optval = tp->t_keepidle / TCP_RETRANSHZ; 2117 break; 2118 case TCP_KEEPINTVL: 2119 optval = tp->t_keepintvl / TCP_RETRANSHZ; 2120 break; 2121 case TCP_KEEPCNT: 2122 optval = tp->t_keepcnt; 2123 break; 2124 case TCP_NOOPT: 2125 optval = tp->t_flags & TF_NOOPT; 2126 break; 2127 case TCP_NOPUSH: 2128 optval = tp->t_flags & TF_NOPUSH; 2129 break; 2130 case TCP_ENABLE_ECN: 2131 optval = (tp->t_flags & TF_ENABLE_ECN) ? 1 : 0; 2132 break; 2133 case TCP_CONNECTIONTIMEOUT: 2134 optval = tp->t_keepinit / TCP_RETRANSHZ; 2135 break; 2136 case PERSIST_TIMEOUT: 2137 optval = tp->t_persist_timeout / TCP_RETRANSHZ; 2138 break; 2139 case TCP_RXT_CONNDROPTIME: 2140 optval = tp->t_rxt_conndroptime / TCP_RETRANSHZ; 2141 break; 2142 case TCP_RXT_FINDROP: 2143 optval = tp->t_flagsext & TF_RXTFINDROP; 2144 break; 2145 case TCP_NOTIMEWAIT: 2146 optval = (tp->t_flagsext & TF_NOTIMEWAIT) ? 1 : 0; 2147 break; 2148 case TCP_MEASURE_SND_BW: 2149 optval = tp->t_flagsext & TF_MEASURESNDBW; 2150 break; 2151 case TCP_INFO: { 2152 struct tcp_info ti; 2153 2154 tcp_fill_info(tp, &ti); 2155 error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info)); 2156 goto done; 2157 /* NOT REACHED */ 2158 } 2159 case TCP_MEASURE_BW_BURST: { 2160 struct tcp_measure_bw_burst out; 2161 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 || 2162 tp->t_bwmeas == NULL) { 2163 error = EINVAL; 2164 break; 2165 } 2166 out.min_burst_size = tp->t_bwmeas->bw_minsizepkts; 2167 out.max_burst_size = tp->t_bwmeas->bw_maxsizepkts; 2168 error = sooptcopyout(sopt, &out, sizeof(out)); 2169 goto done; 2170 } 2171 case TCP_NOTSENT_LOWAT: 2172 if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) { 2173 optval = tp->t_notsent_lowat; 2174 } else { 2175 optval = 0; 2176 } 2177 break; 2178 2179 case TCP_ENABLE_MSGS: 2180 if (so->so_flags & SOF_ENABLE_MSGS) { 2181 optval = 1; 2182 } else { 2183 optval = 0; 2184 } 2185 break; 2186 case TCP_SENDMOREACKS: 2187 if (tp->t_flagsext & TF_NOSTRETCHACK) 2188 optval = 1; 2189 else 2190 optval = 0; 2191 break; 2192 case TCP_DISABLE_BLACKHOLE_DETECTION: 2193 if (tp->t_flagsext & TF_NOBLACKHOLE_DETECTION) 2194 optval = 1; 2195 else 2196 optval = 0; 2197 break; 2198 case TCP_PEER_PID: { 2199 pid_t pid; 2200 error = tcp_lookup_peer_pid_locked(so, &pid); 2201 if (error == 0) 2202 error = sooptcopyout(sopt, &pid, sizeof(pid)); 2203 goto done; 2204 } 2205 case TCP_ADAPTIVE_READ_TIMEOUT: 2206 optval = tp->t_adaptive_rtimo; 2207 break; 2208 case TCP_ADAPTIVE_WRITE_TIMEOUT: 2209 optval = tp->t_adaptive_wtimo; 2210 break; 2211 case SO_TRAFFIC_MGT_BACKGROUND: 2212 optval = (so->so_traffic_mgt_flags & 2213 TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0; 2214 break; 2215 default: 2216 error = ENOPROTOOPT; 2217 break; 2218 } 2219 if (error == 0) 2220 error = sooptcopyout(sopt, &optval, sizeof optval); 2221 break; 2222 } 2223done: 2224 return (error); 2225} 2226 2227/* 2228 * tcp_sendspace and tcp_recvspace are the default send and receive window 2229 * sizes, respectively. These are obsolescent (this information should 2230 * be set by the route). 2231 */ 2232u_int32_t tcp_sendspace = 1448*256; 2233u_int32_t tcp_recvspace = 1448*384; 2234 2235/* During attach, the size of socket buffer allocated is limited to 2236 * sb_max in sbreserve. Disallow setting the tcp send and recv space 2237 * to be more than sb_max because that will cause tcp_attach to fail 2238 * (see radar 5713060) 2239 */ 2240static int 2241sysctl_tcp_sospace(struct sysctl_oid *oidp, __unused void *arg1, 2242 __unused int arg2, struct sysctl_req *req) { 2243 u_int32_t new_value = 0, *space_p = NULL; 2244 int changed = 0, error = 0; 2245 u_quad_t sb_effective_max = (sb_max / (MSIZE+MCLBYTES)) * MCLBYTES; 2246 2247 switch (oidp->oid_number) { 2248 case TCPCTL_SENDSPACE: 2249 space_p = &tcp_sendspace; 2250 break; 2251 case TCPCTL_RECVSPACE: 2252 space_p = &tcp_recvspace; 2253 break; 2254 default: 2255 return EINVAL; 2256 } 2257 error = sysctl_io_number(req, *space_p, sizeof(u_int32_t), 2258 &new_value, &changed); 2259 if (changed) { 2260 if (new_value > 0 && new_value <= sb_effective_max) { 2261 *space_p = new_value; 2262 } else { 2263 error = ERANGE; 2264 } 2265 } 2266 return error; 2267} 2268 2269SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 2270 &tcp_sendspace , 0, &sysctl_tcp_sospace, "IU", "Maximum outgoing TCP datagram size"); 2271SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 2272 &tcp_recvspace , 0, &sysctl_tcp_sospace, "IU", "Maximum incoming TCP datagram size"); 2273 2274 2275/* 2276 * Attach TCP protocol to socket, allocating 2277 * internet protocol control block, tcp control block, 2278 * bufer space, and entering LISTEN state if to accept connections. 2279 * 2280 * Returns: 0 Success 2281 * in_pcballoc:ENOBUFS 2282 * in_pcballoc:ENOMEM 2283 * in_pcballoc:??? [IPSEC specific] 2284 * soreserve:ENOBUFS 2285 */ 2286static int 2287tcp_attach(so, p) 2288 struct socket *so; 2289 struct proc *p; 2290{ 2291 register struct tcpcb *tp; 2292 struct inpcb *inp; 2293 int error; 2294#if INET6 2295 int isipv6 = SOCK_CHECK_DOM(so, PF_INET6) != 0; 2296#endif 2297 2298 error = in_pcballoc(so, &tcbinfo, p); 2299 if (error) 2300 return (error); 2301 2302 inp = sotoinpcb(so); 2303 2304 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 2305 error = soreserve(so, tcp_sendspace, tcp_recvspace); 2306 if (error) 2307 return (error); 2308 } 2309 if ((so->so_rcv.sb_flags & SB_USRSIZE) == 0) 2310 so->so_rcv.sb_flags |= SB_AUTOSIZE; 2311 if ((so->so_snd.sb_flags & SB_USRSIZE) == 0) 2312 so->so_snd.sb_flags |= SB_AUTOSIZE; 2313 2314#if INET6 2315 if (isipv6) { 2316 inp->inp_vflag |= INP_IPV6; 2317 inp->in6p_hops = -1; /* use kernel default */ 2318 } 2319 else 2320#endif /* INET6 */ 2321 inp->inp_vflag |= INP_IPV4; 2322 tp = tcp_newtcpcb(inp); 2323 if (tp == NULL) { 2324 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 2325 2326 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 2327#if INET6 2328 if (isipv6) 2329 in6_pcbdetach(inp); 2330 else 2331#endif /* INET6 */ 2332 in_pcbdetach(inp); 2333 so->so_state |= nofd; 2334 return (ENOBUFS); 2335 } 2336 if (nstat_collect) 2337 nstat_tcp_new_pcb(inp); 2338 tp->t_state = TCPS_CLOSED; 2339 return (0); 2340} 2341 2342/* 2343 * Initiate (or continue) disconnect. 2344 * If embryonic state, just send reset (once). 2345 * If in ``let data drain'' option and linger null, just drop. 2346 * Otherwise (hard), mark socket disconnecting and drop 2347 * current input data; switch states based on user close, and 2348 * send segment to peer (with FIN). 2349 */ 2350static struct tcpcb * 2351tcp_disconnect(tp) 2352 register struct tcpcb *tp; 2353{ 2354 struct socket *so = tp->t_inpcb->inp_socket; 2355 2356 if (tp->t_state < TCPS_ESTABLISHED) 2357 tp = tcp_close(tp); 2358 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 2359 tp = tcp_drop(tp, 0); 2360 else { 2361 soisdisconnecting(so); 2362 sbflush(&so->so_rcv); 2363 tp = tcp_usrclosed(tp); 2364#if MPTCP 2365 /* A reset has been sent but socket exists, do not send FIN */ 2366 if ((so->so_flags & SOF_MP_SUBFLOW) && 2367 (tp) && (tp->t_mpflags & TMPF_RESET)) 2368 return (tp); 2369#endif 2370 if (tp) 2371 (void) tcp_output(tp); 2372 } 2373 return (tp); 2374} 2375 2376/* 2377 * User issued close, and wish to trail through shutdown states: 2378 * if never received SYN, just forget it. If got a SYN from peer, 2379 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 2380 * If already got a FIN from peer, then almost done; go to LAST_ACK 2381 * state. In all other cases, have already sent FIN to peer (e.g. 2382 * after PRU_SHUTDOWN), and just have to play tedious game waiting 2383 * for peer to send FIN or not respond to keep-alives, etc. 2384 * We can let the user exit from the close as soon as the FIN is acked. 2385 */ 2386static struct tcpcb * 2387tcp_usrclosed(tp) 2388 register struct tcpcb *tp; 2389{ 2390 2391 switch (tp->t_state) { 2392 2393 case TCPS_CLOSED: 2394 case TCPS_LISTEN: 2395 tp = tcp_close(tp); 2396 break; 2397 2398 case TCPS_SYN_SENT: 2399 case TCPS_SYN_RECEIVED: 2400 tp->t_flags |= TF_NEEDFIN; 2401 break; 2402 2403 case TCPS_ESTABLISHED: 2404 DTRACE_TCP4(state__change, void, NULL, 2405 struct inpcb *, tp->t_inpcb, 2406 struct tcpcb *, tp, 2407 int32_t, TCPS_FIN_WAIT_1); 2408 tp->t_state = TCPS_FIN_WAIT_1; 2409 break; 2410 2411 case TCPS_CLOSE_WAIT: 2412 DTRACE_TCP4(state__change, void, NULL, 2413 struct inpcb *, tp->t_inpcb, 2414 struct tcpcb *, tp, 2415 int32_t, TCPS_LAST_ACK); 2416 tp->t_state = TCPS_LAST_ACK; 2417 break; 2418 } 2419 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 2420 soisdisconnected(tp->t_inpcb->inp_socket); 2421 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 2422 if (tp->t_state == TCPS_FIN_WAIT_2) 2423 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, 2424 TCP_CONN_MAXIDLE(tp)); 2425 } 2426 return (tp); 2427} 2428 2429void 2430tcp_in_cksum_stats(u_int32_t len) 2431{ 2432 tcpstat.tcps_rcv_swcsum++; 2433 tcpstat.tcps_rcv_swcsum_bytes += len; 2434} 2435 2436void 2437tcp_out_cksum_stats(u_int32_t len) 2438{ 2439 tcpstat.tcps_snd_swcsum++; 2440 tcpstat.tcps_snd_swcsum_bytes += len; 2441} 2442 2443#if INET6 2444void 2445tcp_in6_cksum_stats(u_int32_t len) 2446{ 2447 tcpstat.tcps_rcv6_swcsum++; 2448 tcpstat.tcps_rcv6_swcsum_bytes += len; 2449} 2450 2451void 2452tcp_out6_cksum_stats(u_int32_t len) 2453{ 2454 tcpstat.tcps_snd6_swcsum++; 2455 tcpstat.tcps_snd6_swcsum_bytes += len; 2456} 2457 2458/* 2459 * When messages are enabled on a TCP socket, the message priority 2460 * is sent as a control message. This function will extract it. 2461 */ 2462int 2463tcp_get_msg_priority(struct mbuf *control, uint32_t *msgpri) 2464{ 2465 struct cmsghdr *cm; 2466 if (control == NULL) 2467 return(EINVAL); 2468 2469 for (cm = M_FIRST_CMSGHDR(control); cm; 2470 cm = M_NXT_CMSGHDR(control, cm)) { 2471 if (cm->cmsg_len < sizeof(struct cmsghdr) || 2472 cm->cmsg_len > control->m_len) { 2473 return (EINVAL); 2474 } 2475 if (cm->cmsg_level == SOL_SOCKET && 2476 cm->cmsg_type == SCM_MSG_PRIORITY) { 2477 *msgpri = *(unsigned int *)(void *)CMSG_DATA(cm); 2478 break; 2479 } 2480 } 2481 2482 VERIFY(*msgpri >= MSG_PRI_MIN && *msgpri <= MSG_PRI_MAX); 2483 return (0); 2484} 2485#endif /* INET6 */ 2486